{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 20000, "global_step": 1017070, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 28.847705841064453, "learning_rate": 1.1000000000000001e-08, "loss": 0.9586, "step": 25 }, { "epoch": 0.0, "grad_norm": 50.49189376831055, "learning_rate": 2.3e-08, "loss": 2.0987, "step": 50 }, { "epoch": 0.0, "grad_norm": 40.780059814453125, "learning_rate": 3.550000000000001e-08, "loss": 0.9002, "step": 75 }, { "epoch": 0.0, "grad_norm": 40.9713134765625, "learning_rate": 4.8e-08, "loss": 2.071, "step": 100 }, { "epoch": 0.0, "grad_norm": 26.430599212646484, "learning_rate": 6.05e-08, "loss": 0.894, "step": 125 }, { "epoch": 0.0, "grad_norm": 50.3025016784668, "learning_rate": 7.3e-08, "loss": 2.0645, "step": 150 }, { "epoch": 0.0, "grad_norm": 33.30754470825195, "learning_rate": 8.55e-08, "loss": 0.9453, "step": 175 }, { "epoch": 0.0, "grad_norm": 47.734947204589844, "learning_rate": 9.8e-08, "loss": 2.1368, "step": 200 }, { "epoch": 0.0, "grad_norm": 23.294601440429688, "learning_rate": 1.1050000000000002e-07, "loss": 0.8853, "step": 225 }, { "epoch": 0.0, "grad_norm": 42.24452590942383, "learning_rate": 1.23e-07, "loss": 2.0806, "step": 250 }, { "epoch": 0.0, "grad_norm": 25.92540740966797, "learning_rate": 1.3550000000000002e-07, "loss": 0.8117, "step": 275 }, { "epoch": 0.0, "grad_norm": 50.27727127075195, "learning_rate": 1.4800000000000003e-07, "loss": 1.8358, "step": 300 }, { "epoch": 0.0, "grad_norm": 19.508121490478516, "learning_rate": 1.605e-07, "loss": 0.865, "step": 325 }, { "epoch": 0.0, "grad_norm": 38.935081481933594, "learning_rate": 1.73e-07, "loss": 1.7469, "step": 350 }, { "epoch": 0.0, "grad_norm": 17.33289909362793, "learning_rate": 1.8550000000000001e-07, "loss": 0.7007, "step": 375 }, { "epoch": 0.0, "grad_norm": 41.368186950683594, "learning_rate": 1.9800000000000003e-07, "loss": 1.5838, "step": 400 }, { "epoch": 0.0, "grad_norm": 24.94549560546875, "learning_rate": 2.105e-07, "loss": 0.6345, "step": 425 }, { "epoch": 0.0, "grad_norm": 49.505794525146484, "learning_rate": 2.2300000000000002e-07, "loss": 1.1913, "step": 450 }, { "epoch": 0.0, "grad_norm": 23.152936935424805, "learning_rate": 2.3550000000000004e-07, "loss": 0.4957, "step": 475 }, { "epoch": 0.0, "grad_norm": 42.70956802368164, "learning_rate": 2.48e-07, "loss": 1.0002, "step": 500 }, { "epoch": 0.01, "grad_norm": 17.532855987548828, "learning_rate": 2.6050000000000004e-07, "loss": 0.4952, "step": 525 }, { "epoch": 0.01, "grad_norm": 32.25798416137695, "learning_rate": 2.73e-07, "loss": 1.0309, "step": 550 }, { "epoch": 0.01, "grad_norm": 14.901487350463867, "learning_rate": 2.855e-07, "loss": 0.4604, "step": 575 }, { "epoch": 0.01, "grad_norm": 37.89100646972656, "learning_rate": 2.9800000000000005e-07, "loss": 1.1685, "step": 600 }, { "epoch": 0.01, "grad_norm": 18.824260711669922, "learning_rate": 3.1050000000000003e-07, "loss": 0.4056, "step": 625 }, { "epoch": 0.01, "grad_norm": 31.887435913085938, "learning_rate": 3.2300000000000007e-07, "loss": 0.8953, "step": 650 }, { "epoch": 0.01, "grad_norm": 14.27244758605957, "learning_rate": 3.3550000000000006e-07, "loss": 0.37, "step": 675 }, { "epoch": 0.01, "grad_norm": 30.90846061706543, "learning_rate": 3.48e-07, "loss": 0.9204, "step": 700 }, { "epoch": 0.01, "grad_norm": 12.430575370788574, "learning_rate": 3.6050000000000003e-07, "loss": 0.3652, "step": 725 }, { "epoch": 0.01, "grad_norm": 36.639732360839844, "learning_rate": 3.73e-07, "loss": 0.888, "step": 750 }, { "epoch": 0.01, "grad_norm": 19.905160903930664, "learning_rate": 3.8550000000000006e-07, "loss": 0.4094, "step": 775 }, { "epoch": 0.01, "grad_norm": 28.099273681640625, "learning_rate": 3.9800000000000004e-07, "loss": 0.8054, "step": 800 }, { "epoch": 0.01, "grad_norm": 12.171104431152344, "learning_rate": 4.105000000000001e-07, "loss": 0.3467, "step": 825 }, { "epoch": 0.01, "grad_norm": 29.21428871154785, "learning_rate": 4.23e-07, "loss": 0.6835, "step": 850 }, { "epoch": 0.01, "grad_norm": 15.55582332611084, "learning_rate": 4.355e-07, "loss": 0.3751, "step": 875 }, { "epoch": 0.01, "grad_norm": 33.754512786865234, "learning_rate": 4.4800000000000004e-07, "loss": 0.7809, "step": 900 }, { "epoch": 0.01, "grad_norm": 12.544480323791504, "learning_rate": 4.6050000000000003e-07, "loss": 0.3862, "step": 925 }, { "epoch": 0.01, "grad_norm": 28.473928451538086, "learning_rate": 4.7300000000000007e-07, "loss": 0.7245, "step": 950 }, { "epoch": 0.01, "grad_norm": 20.05843162536621, "learning_rate": 4.855e-07, "loss": 0.3533, "step": 975 }, { "epoch": 0.01, "grad_norm": 24.7759952545166, "learning_rate": 4.98e-07, "loss": 0.7687, "step": 1000 }, { "epoch": 0.01, "grad_norm": 21.777341842651367, "learning_rate": 5.105e-07, "loss": 0.3893, "step": 1025 }, { "epoch": 0.01, "grad_norm": 32.66143798828125, "learning_rate": 5.23e-07, "loss": 0.7582, "step": 1050 }, { "epoch": 0.01, "grad_norm": 9.50550651550293, "learning_rate": 5.355e-07, "loss": 0.3653, "step": 1075 }, { "epoch": 0.01, "grad_norm": 28.202194213867188, "learning_rate": 5.480000000000001e-07, "loss": 0.6338, "step": 1100 }, { "epoch": 0.01, "grad_norm": 7.273616790771484, "learning_rate": 5.605000000000001e-07, "loss": 0.3761, "step": 1125 }, { "epoch": 0.01, "grad_norm": 20.13578987121582, "learning_rate": 5.730000000000001e-07, "loss": 0.7762, "step": 1150 }, { "epoch": 0.01, "grad_norm": 12.328003883361816, "learning_rate": 5.855e-07, "loss": 0.3238, "step": 1175 }, { "epoch": 0.01, "grad_norm": 23.894184112548828, "learning_rate": 5.98e-07, "loss": 0.8009, "step": 1200 }, { "epoch": 0.01, "grad_norm": 21.52553939819336, "learning_rate": 6.105e-07, "loss": 0.3005, "step": 1225 }, { "epoch": 0.01, "grad_norm": 34.04698944091797, "learning_rate": 6.230000000000001e-07, "loss": 0.8044, "step": 1250 }, { "epoch": 0.01, "grad_norm": 12.525055885314941, "learning_rate": 6.355e-07, "loss": 0.3132, "step": 1275 }, { "epoch": 0.01, "grad_norm": 29.420198440551758, "learning_rate": 6.48e-07, "loss": 0.6925, "step": 1300 }, { "epoch": 0.01, "grad_norm": 17.890785217285156, "learning_rate": 6.605000000000001e-07, "loss": 0.387, "step": 1325 }, { "epoch": 0.01, "grad_norm": 30.08964729309082, "learning_rate": 6.730000000000001e-07, "loss": 0.7241, "step": 1350 }, { "epoch": 0.01, "grad_norm": 15.878478050231934, "learning_rate": 6.855e-07, "loss": 0.4171, "step": 1375 }, { "epoch": 0.01, "grad_norm": 32.391666412353516, "learning_rate": 6.98e-07, "loss": 0.7524, "step": 1400 }, { "epoch": 0.01, "grad_norm": 10.580907821655273, "learning_rate": 7.105000000000001e-07, "loss": 0.3578, "step": 1425 }, { "epoch": 0.01, "grad_norm": 41.9957160949707, "learning_rate": 7.230000000000001e-07, "loss": 0.7245, "step": 1450 }, { "epoch": 0.01, "grad_norm": 13.333178520202637, "learning_rate": 7.355000000000001e-07, "loss": 0.344, "step": 1475 }, { "epoch": 0.01, "grad_norm": 24.920408248901367, "learning_rate": 7.480000000000001e-07, "loss": 0.611, "step": 1500 }, { "epoch": 0.01, "grad_norm": 16.14500617980957, "learning_rate": 7.605000000000002e-07, "loss": 0.3475, "step": 1525 }, { "epoch": 0.02, "grad_norm": 27.01786231994629, "learning_rate": 7.73e-07, "loss": 0.5934, "step": 1550 }, { "epoch": 0.02, "grad_norm": 12.386067390441895, "learning_rate": 7.855e-07, "loss": 0.404, "step": 1575 }, { "epoch": 0.02, "grad_norm": 26.861234664916992, "learning_rate": 7.98e-07, "loss": 0.7371, "step": 1600 }, { "epoch": 0.02, "grad_norm": 15.57017707824707, "learning_rate": 8.105e-07, "loss": 0.3294, "step": 1625 }, { "epoch": 0.02, "grad_norm": 29.059101104736328, "learning_rate": 8.23e-07, "loss": 0.701, "step": 1650 }, { "epoch": 0.02, "grad_norm": 13.635632514953613, "learning_rate": 8.355000000000001e-07, "loss": 0.3064, "step": 1675 }, { "epoch": 0.02, "grad_norm": 23.52585220336914, "learning_rate": 8.480000000000001e-07, "loss": 0.6689, "step": 1700 }, { "epoch": 0.02, "grad_norm": 13.127826690673828, "learning_rate": 8.605000000000001e-07, "loss": 0.3625, "step": 1725 }, { "epoch": 0.02, "grad_norm": 38.72964096069336, "learning_rate": 8.73e-07, "loss": 0.6777, "step": 1750 }, { "epoch": 0.02, "grad_norm": 12.668331146240234, "learning_rate": 8.855000000000001e-07, "loss": 0.3264, "step": 1775 }, { "epoch": 0.02, "grad_norm": 23.919504165649414, "learning_rate": 8.980000000000001e-07, "loss": 0.6873, "step": 1800 }, { "epoch": 0.02, "grad_norm": 6.271666049957275, "learning_rate": 9.105000000000001e-07, "loss": 0.316, "step": 1825 }, { "epoch": 0.02, "grad_norm": 20.724960327148438, "learning_rate": 9.23e-07, "loss": 0.652, "step": 1850 }, { "epoch": 0.02, "grad_norm": 17.960193634033203, "learning_rate": 9.355e-07, "loss": 0.336, "step": 1875 }, { "epoch": 0.02, "grad_norm": 19.955202102661133, "learning_rate": 9.480000000000001e-07, "loss": 0.6399, "step": 1900 }, { "epoch": 0.02, "grad_norm": 14.765907287597656, "learning_rate": 9.605e-07, "loss": 0.3237, "step": 1925 }, { "epoch": 0.02, "grad_norm": 28.221097946166992, "learning_rate": 9.73e-07, "loss": 0.6293, "step": 1950 }, { "epoch": 0.02, "grad_norm": 12.195951461791992, "learning_rate": 9.855000000000001e-07, "loss": 0.3348, "step": 1975 }, { "epoch": 0.02, "grad_norm": 25.333417892456055, "learning_rate": 9.98e-07, "loss": 0.6063, "step": 2000 }, { "epoch": 0.02, "grad_norm": 21.09111976623535, "learning_rate": 1.0105000000000001e-06, "loss": 0.2789, "step": 2025 }, { "epoch": 0.02, "grad_norm": 32.072967529296875, "learning_rate": 1.0230000000000002e-06, "loss": 0.7533, "step": 2050 }, { "epoch": 0.02, "grad_norm": 9.684382438659668, "learning_rate": 1.0355e-06, "loss": 0.321, "step": 2075 }, { "epoch": 0.02, "grad_norm": 23.95610809326172, "learning_rate": 1.0480000000000002e-06, "loss": 0.643, "step": 2100 }, { "epoch": 0.02, "grad_norm": 9.330102920532227, "learning_rate": 1.0605e-06, "loss": 0.3215, "step": 2125 }, { "epoch": 0.02, "grad_norm": 24.018898010253906, "learning_rate": 1.0730000000000001e-06, "loss": 0.6074, "step": 2150 }, { "epoch": 0.02, "grad_norm": 16.121315002441406, "learning_rate": 1.0855e-06, "loss": 0.2855, "step": 2175 }, { "epoch": 0.02, "grad_norm": 29.43797492980957, "learning_rate": 1.0980000000000001e-06, "loss": 0.568, "step": 2200 }, { "epoch": 0.02, "grad_norm": 17.239381790161133, "learning_rate": 1.1105e-06, "loss": 0.2644, "step": 2225 }, { "epoch": 0.02, "grad_norm": 34.230430603027344, "learning_rate": 1.123e-06, "loss": 0.6016, "step": 2250 }, { "epoch": 0.02, "grad_norm": 9.016093254089355, "learning_rate": 1.1355e-06, "loss": 0.2832, "step": 2275 }, { "epoch": 0.02, "grad_norm": 24.23920440673828, "learning_rate": 1.148e-06, "loss": 0.6103, "step": 2300 }, { "epoch": 0.02, "grad_norm": 19.33639144897461, "learning_rate": 1.1605000000000002e-06, "loss": 0.3103, "step": 2325 }, { "epoch": 0.02, "grad_norm": 25.833850860595703, "learning_rate": 1.173e-06, "loss": 0.6822, "step": 2350 }, { "epoch": 0.02, "grad_norm": 17.170454025268555, "learning_rate": 1.1855000000000001e-06, "loss": 0.2827, "step": 2375 }, { "epoch": 0.02, "grad_norm": 24.243547439575195, "learning_rate": 1.1980000000000002e-06, "loss": 0.7076, "step": 2400 }, { "epoch": 0.02, "grad_norm": 15.752288818359375, "learning_rate": 1.2105e-06, "loss": 0.3273, "step": 2425 }, { "epoch": 0.02, "grad_norm": 23.900522232055664, "learning_rate": 1.2225000000000002e-06, "loss": 0.5979, "step": 2450 }, { "epoch": 0.02, "grad_norm": 10.737213134765625, "learning_rate": 1.235e-06, "loss": 0.3171, "step": 2475 }, { "epoch": 0.02, "grad_norm": 24.553348541259766, "learning_rate": 1.2475000000000001e-06, "loss": 0.5567, "step": 2500 }, { "epoch": 0.02, "grad_norm": 19.952777862548828, "learning_rate": 1.26e-06, "loss": 0.3518, "step": 2525 }, { "epoch": 0.03, "grad_norm": 29.79047966003418, "learning_rate": 1.2725e-06, "loss": 0.5855, "step": 2550 }, { "epoch": 0.03, "grad_norm": 15.847892761230469, "learning_rate": 1.2850000000000002e-06, "loss": 0.3369, "step": 2575 }, { "epoch": 0.03, "grad_norm": 28.51085090637207, "learning_rate": 1.2975e-06, "loss": 0.5575, "step": 2600 }, { "epoch": 0.03, "grad_norm": 8.789578437805176, "learning_rate": 1.3100000000000002e-06, "loss": 0.3387, "step": 2625 }, { "epoch": 0.03, "grad_norm": 24.334619522094727, "learning_rate": 1.3225000000000003e-06, "loss": 0.5738, "step": 2650 }, { "epoch": 0.03, "grad_norm": 17.373729705810547, "learning_rate": 1.3350000000000001e-06, "loss": 0.3014, "step": 2675 }, { "epoch": 0.03, "grad_norm": 21.119243621826172, "learning_rate": 1.3475000000000002e-06, "loss": 0.5935, "step": 2700 }, { "epoch": 0.03, "grad_norm": 13.30176830291748, "learning_rate": 1.3600000000000001e-06, "loss": 0.302, "step": 2725 }, { "epoch": 0.03, "grad_norm": 25.810537338256836, "learning_rate": 1.3725000000000002e-06, "loss": 0.5574, "step": 2750 }, { "epoch": 0.03, "grad_norm": 14.738842964172363, "learning_rate": 1.3850000000000003e-06, "loss": 0.286, "step": 2775 }, { "epoch": 0.03, "grad_norm": 21.148601531982422, "learning_rate": 1.3975000000000002e-06, "loss": 0.5944, "step": 2800 }, { "epoch": 0.03, "grad_norm": 17.612159729003906, "learning_rate": 1.41e-06, "loss": 0.2496, "step": 2825 }, { "epoch": 0.03, "grad_norm": 18.20772933959961, "learning_rate": 1.4225e-06, "loss": 0.5769, "step": 2850 }, { "epoch": 0.03, "grad_norm": 11.214025497436523, "learning_rate": 1.435e-06, "loss": 0.2753, "step": 2875 }, { "epoch": 0.03, "grad_norm": 33.65066909790039, "learning_rate": 1.4475000000000001e-06, "loss": 0.5492, "step": 2900 }, { "epoch": 0.03, "grad_norm": 12.693580627441406, "learning_rate": 1.46e-06, "loss": 0.2625, "step": 2925 }, { "epoch": 0.03, "grad_norm": 20.4018497467041, "learning_rate": 1.4725e-06, "loss": 0.5921, "step": 2950 }, { "epoch": 0.03, "grad_norm": 15.006670951843262, "learning_rate": 1.485e-06, "loss": 0.3038, "step": 2975 }, { "epoch": 0.03, "grad_norm": 21.46860122680664, "learning_rate": 1.4975e-06, "loss": 0.5656, "step": 3000 }, { "epoch": 0.03, "grad_norm": 21.65653419494629, "learning_rate": 1.5100000000000002e-06, "loss": 0.2764, "step": 3025 }, { "epoch": 0.03, "grad_norm": 22.678808212280273, "learning_rate": 1.5225e-06, "loss": 0.538, "step": 3050 }, { "epoch": 0.03, "grad_norm": 14.860958099365234, "learning_rate": 1.5350000000000001e-06, "loss": 0.3016, "step": 3075 }, { "epoch": 0.03, "grad_norm": 22.96617889404297, "learning_rate": 1.5475000000000002e-06, "loss": 0.6664, "step": 3100 }, { "epoch": 0.03, "grad_norm": 10.578760147094727, "learning_rate": 1.56e-06, "loss": 0.3357, "step": 3125 }, { "epoch": 0.03, "grad_norm": 24.811819076538086, "learning_rate": 1.5725000000000002e-06, "loss": 0.5681, "step": 3150 }, { "epoch": 0.03, "grad_norm": 40.5649299621582, "learning_rate": 1.585e-06, "loss": 0.3605, "step": 3175 }, { "epoch": 0.03, "grad_norm": 25.10923194885254, "learning_rate": 1.5975000000000002e-06, "loss": 0.6301, "step": 3200 }, { "epoch": 0.03, "grad_norm": 20.92035484313965, "learning_rate": 1.6100000000000003e-06, "loss": 0.3181, "step": 3225 }, { "epoch": 0.03, "grad_norm": 25.772808074951172, "learning_rate": 1.6225000000000001e-06, "loss": 0.6406, "step": 3250 }, { "epoch": 0.03, "grad_norm": 15.179930686950684, "learning_rate": 1.6350000000000002e-06, "loss": 0.2942, "step": 3275 }, { "epoch": 0.03, "grad_norm": 21.113195419311523, "learning_rate": 1.6475000000000001e-06, "loss": 0.6633, "step": 3300 }, { "epoch": 0.03, "grad_norm": 23.897584915161133, "learning_rate": 1.6600000000000002e-06, "loss": 0.2925, "step": 3325 }, { "epoch": 0.03, "grad_norm": 26.384321212768555, "learning_rate": 1.6725000000000003e-06, "loss": 0.526, "step": 3350 }, { "epoch": 0.03, "grad_norm": 14.188013076782227, "learning_rate": 1.6850000000000002e-06, "loss": 0.3083, "step": 3375 }, { "epoch": 0.03, "grad_norm": 24.757654190063477, "learning_rate": 1.6975000000000003e-06, "loss": 0.6363, "step": 3400 }, { "epoch": 0.03, "grad_norm": 13.438957214355469, "learning_rate": 1.7100000000000004e-06, "loss": 0.2451, "step": 3425 }, { "epoch": 0.03, "grad_norm": 23.39787483215332, "learning_rate": 1.7225e-06, "loss": 0.5614, "step": 3450 }, { "epoch": 0.03, "grad_norm": 16.605846405029297, "learning_rate": 1.7350000000000001e-06, "loss": 0.264, "step": 3475 }, { "epoch": 0.03, "grad_norm": 20.900297164916992, "learning_rate": 1.7475e-06, "loss": 0.5538, "step": 3500 }, { "epoch": 0.03, "grad_norm": 10.272775650024414, "learning_rate": 1.76e-06, "loss": 0.2996, "step": 3525 }, { "epoch": 0.03, "grad_norm": 24.102636337280273, "learning_rate": 1.7725e-06, "loss": 0.5484, "step": 3550 }, { "epoch": 0.04, "grad_norm": 7.758277893066406, "learning_rate": 1.785e-06, "loss": 0.3089, "step": 3575 }, { "epoch": 0.04, "grad_norm": 28.68324089050293, "learning_rate": 1.7975000000000002e-06, "loss": 0.6229, "step": 3600 }, { "epoch": 0.04, "grad_norm": 11.675895690917969, "learning_rate": 1.81e-06, "loss": 0.2881, "step": 3625 }, { "epoch": 0.04, "grad_norm": 25.599733352661133, "learning_rate": 1.8225000000000001e-06, "loss": 0.5349, "step": 3650 }, { "epoch": 0.04, "grad_norm": 12.584117889404297, "learning_rate": 1.8350000000000002e-06, "loss": 0.3089, "step": 3675 }, { "epoch": 0.04, "grad_norm": 22.68401527404785, "learning_rate": 1.8475e-06, "loss": 0.6212, "step": 3700 }, { "epoch": 0.04, "grad_norm": 15.373207092285156, "learning_rate": 1.8600000000000002e-06, "loss": 0.2584, "step": 3725 }, { "epoch": 0.04, "grad_norm": 31.86733055114746, "learning_rate": 1.8725e-06, "loss": 0.5686, "step": 3750 }, { "epoch": 0.04, "grad_norm": 17.177127838134766, "learning_rate": 1.8850000000000002e-06, "loss": 0.3296, "step": 3775 }, { "epoch": 0.04, "grad_norm": 21.377216339111328, "learning_rate": 1.8975000000000003e-06, "loss": 0.556, "step": 3800 }, { "epoch": 0.04, "grad_norm": 10.840351104736328, "learning_rate": 1.9100000000000003e-06, "loss": 0.3162, "step": 3825 }, { "epoch": 0.04, "grad_norm": 22.359825134277344, "learning_rate": 1.9225000000000002e-06, "loss": 0.5339, "step": 3850 }, { "epoch": 0.04, "grad_norm": 20.939926147460938, "learning_rate": 1.935e-06, "loss": 0.3504, "step": 3875 }, { "epoch": 0.04, "grad_norm": 30.1402645111084, "learning_rate": 1.9475000000000004e-06, "loss": 0.5382, "step": 3900 }, { "epoch": 0.04, "grad_norm": 18.81707000732422, "learning_rate": 1.9600000000000003e-06, "loss": 0.2407, "step": 3925 }, { "epoch": 0.04, "grad_norm": 22.982892990112305, "learning_rate": 1.9725e-06, "loss": 0.5393, "step": 3950 }, { "epoch": 0.04, "grad_norm": 15.778573989868164, "learning_rate": 1.985e-06, "loss": 0.3063, "step": 3975 }, { "epoch": 0.04, "grad_norm": 24.99148941040039, "learning_rate": 1.9975000000000004e-06, "loss": 0.5649, "step": 4000 }, { "epoch": 0.04, "grad_norm": 14.094460487365723, "learning_rate": 2.0100000000000002e-06, "loss": 0.2746, "step": 4025 }, { "epoch": 0.04, "grad_norm": 22.46392250061035, "learning_rate": 2.0225e-06, "loss": 0.5135, "step": 4050 }, { "epoch": 0.04, "grad_norm": 13.21150016784668, "learning_rate": 2.035e-06, "loss": 0.2695, "step": 4075 }, { "epoch": 0.04, "grad_norm": 25.412303924560547, "learning_rate": 2.0475e-06, "loss": 0.5399, "step": 4100 }, { "epoch": 0.04, "grad_norm": 12.634136199951172, "learning_rate": 2.06e-06, "loss": 0.2266, "step": 4125 }, { "epoch": 0.04, "grad_norm": 27.319255828857422, "learning_rate": 2.0725e-06, "loss": 0.5929, "step": 4150 }, { "epoch": 0.04, "grad_norm": 11.115804672241211, "learning_rate": 2.085e-06, "loss": 0.3304, "step": 4175 }, { "epoch": 0.04, "grad_norm": 29.844072341918945, "learning_rate": 2.0975000000000002e-06, "loss": 0.5319, "step": 4200 }, { "epoch": 0.04, "grad_norm": 16.064058303833008, "learning_rate": 2.11e-06, "loss": 0.3364, "step": 4225 }, { "epoch": 0.04, "grad_norm": 17.23797607421875, "learning_rate": 2.1225e-06, "loss": 0.5819, "step": 4250 }, { "epoch": 0.04, "grad_norm": 18.461576461791992, "learning_rate": 2.1350000000000003e-06, "loss": 0.2958, "step": 4275 }, { "epoch": 0.04, "grad_norm": 21.01024055480957, "learning_rate": 2.1475e-06, "loss": 0.6186, "step": 4300 }, { "epoch": 0.04, "grad_norm": 11.027046203613281, "learning_rate": 2.16e-06, "loss": 0.2872, "step": 4325 }, { "epoch": 0.04, "grad_norm": 26.586774826049805, "learning_rate": 2.1725000000000004e-06, "loss": 0.593, "step": 4350 }, { "epoch": 0.04, "grad_norm": 9.804169654846191, "learning_rate": 2.1850000000000003e-06, "loss": 0.303, "step": 4375 }, { "epoch": 0.04, "grad_norm": 22.380687713623047, "learning_rate": 2.1975e-06, "loss": 0.5256, "step": 4400 }, { "epoch": 0.04, "grad_norm": 8.40493392944336, "learning_rate": 2.21e-06, "loss": 0.3053, "step": 4425 }, { "epoch": 0.04, "grad_norm": 14.92182731628418, "learning_rate": 2.2225000000000003e-06, "loss": 0.6042, "step": 4450 }, { "epoch": 0.04, "grad_norm": 14.116979598999023, "learning_rate": 2.235e-06, "loss": 0.3178, "step": 4475 }, { "epoch": 0.04, "grad_norm": 21.750770568847656, "learning_rate": 2.2475e-06, "loss": 0.6285, "step": 4500 }, { "epoch": 0.04, "grad_norm": 10.67201042175293, "learning_rate": 2.2600000000000004e-06, "loss": 0.2749, "step": 4525 }, { "epoch": 0.04, "grad_norm": 29.427690505981445, "learning_rate": 2.2725000000000003e-06, "loss": 0.7229, "step": 4550 }, { "epoch": 0.04, "grad_norm": 13.640838623046875, "learning_rate": 2.285e-06, "loss": 0.2977, "step": 4575 }, { "epoch": 0.05, "grad_norm": 19.302093505859375, "learning_rate": 2.2975000000000004e-06, "loss": 0.5262, "step": 4600 }, { "epoch": 0.05, "grad_norm": 12.907156944274902, "learning_rate": 2.3100000000000003e-06, "loss": 0.2943, "step": 4625 }, { "epoch": 0.05, "grad_norm": 25.480953216552734, "learning_rate": 2.3225e-06, "loss": 0.5958, "step": 4650 }, { "epoch": 0.05, "grad_norm": 18.16008758544922, "learning_rate": 2.3350000000000005e-06, "loss": 0.2767, "step": 4675 }, { "epoch": 0.05, "grad_norm": 26.57048988342285, "learning_rate": 2.3475e-06, "loss": 0.5169, "step": 4700 }, { "epoch": 0.05, "grad_norm": 21.496782302856445, "learning_rate": 2.3600000000000003e-06, "loss": 0.28, "step": 4725 }, { "epoch": 0.05, "grad_norm": 22.303558349609375, "learning_rate": 2.3725e-06, "loss": 0.5656, "step": 4750 }, { "epoch": 0.05, "grad_norm": 12.915544509887695, "learning_rate": 2.385e-06, "loss": 0.2956, "step": 4775 }, { "epoch": 0.05, "grad_norm": 26.404857635498047, "learning_rate": 2.3975e-06, "loss": 0.5849, "step": 4800 }, { "epoch": 0.05, "grad_norm": 15.948336601257324, "learning_rate": 2.4100000000000002e-06, "loss": 0.336, "step": 4825 }, { "epoch": 0.05, "grad_norm": 25.08620834350586, "learning_rate": 2.4225e-06, "loss": 0.5631, "step": 4850 }, { "epoch": 0.05, "grad_norm": 19.255910873413086, "learning_rate": 2.435e-06, "loss": 0.3171, "step": 4875 }, { "epoch": 0.05, "grad_norm": 24.618165969848633, "learning_rate": 2.4475000000000003e-06, "loss": 0.4801, "step": 4900 }, { "epoch": 0.05, "grad_norm": 7.580767631530762, "learning_rate": 2.46e-06, "loss": 0.2461, "step": 4925 }, { "epoch": 0.05, "grad_norm": 19.49013328552246, "learning_rate": 2.4725e-06, "loss": 0.5092, "step": 4950 }, { "epoch": 0.05, "grad_norm": 13.87459659576416, "learning_rate": 2.4850000000000003e-06, "loss": 0.2866, "step": 4975 }, { "epoch": 0.05, "grad_norm": 19.162206649780273, "learning_rate": 2.4975000000000002e-06, "loss": 0.4787, "step": 5000 }, { "epoch": 0.05, "grad_norm": 15.875246047973633, "learning_rate": 2.51e-06, "loss": 0.3436, "step": 5025 }, { "epoch": 0.05, "grad_norm": 20.522253036499023, "learning_rate": 2.5225e-06, "loss": 0.5762, "step": 5050 }, { "epoch": 0.05, "grad_norm": 19.34189796447754, "learning_rate": 2.5350000000000003e-06, "loss": 0.2639, "step": 5075 }, { "epoch": 0.05, "grad_norm": 27.203989028930664, "learning_rate": 2.5475e-06, "loss": 0.6082, "step": 5100 }, { "epoch": 0.05, "grad_norm": 10.197469711303711, "learning_rate": 2.56e-06, "loss": 0.2578, "step": 5125 }, { "epoch": 0.05, "grad_norm": 27.610708236694336, "learning_rate": 2.5725e-06, "loss": 0.5651, "step": 5150 }, { "epoch": 0.05, "grad_norm": 19.15465545654297, "learning_rate": 2.5850000000000002e-06, "loss": 0.2387, "step": 5175 }, { "epoch": 0.05, "grad_norm": 30.258508682250977, "learning_rate": 2.5975e-06, "loss": 0.5827, "step": 5200 }, { "epoch": 0.05, "grad_norm": 11.530698776245117, "learning_rate": 2.6100000000000004e-06, "loss": 0.2643, "step": 5225 }, { "epoch": 0.05, "grad_norm": 21.208711624145508, "learning_rate": 2.6225e-06, "loss": 0.5645, "step": 5250 }, { "epoch": 0.05, "grad_norm": 7.096436023712158, "learning_rate": 2.635e-06, "loss": 0.2632, "step": 5275 }, { "epoch": 0.05, "grad_norm": 28.85466957092285, "learning_rate": 2.6475e-06, "loss": 0.5407, "step": 5300 }, { "epoch": 0.05, "grad_norm": 10.47000789642334, "learning_rate": 2.6600000000000004e-06, "loss": 0.257, "step": 5325 }, { "epoch": 0.05, "grad_norm": 23.184587478637695, "learning_rate": 2.6725000000000002e-06, "loss": 0.5223, "step": 5350 }, { "epoch": 0.05, "grad_norm": 16.883581161499023, "learning_rate": 2.6850000000000006e-06, "loss": 0.2721, "step": 5375 }, { "epoch": 0.05, "grad_norm": 17.821680068969727, "learning_rate": 2.6975e-06, "loss": 0.5876, "step": 5400 }, { "epoch": 0.05, "grad_norm": 11.550686836242676, "learning_rate": 2.7100000000000003e-06, "loss": 0.361, "step": 5425 }, { "epoch": 0.05, "grad_norm": 20.48277473449707, "learning_rate": 2.7225e-06, "loss": 0.5176, "step": 5450 }, { "epoch": 0.05, "grad_norm": 9.938935279846191, "learning_rate": 2.7350000000000005e-06, "loss": 0.2719, "step": 5475 }, { "epoch": 0.05, "grad_norm": 27.67144012451172, "learning_rate": 2.7475000000000004e-06, "loss": 0.5893, "step": 5500 }, { "epoch": 0.05, "grad_norm": 15.107447624206543, "learning_rate": 2.7600000000000003e-06, "loss": 0.259, "step": 5525 }, { "epoch": 0.05, "grad_norm": Infinity, "learning_rate": 2.7720000000000003e-06, "loss": 0.5958, "step": 5550 }, { "epoch": 0.05, "grad_norm": 17.594661712646484, "learning_rate": 2.7845e-06, "loss": 0.2659, "step": 5575 }, { "epoch": 0.06, "grad_norm": 17.903818130493164, "learning_rate": 2.797e-06, "loss": 0.5293, "step": 5600 }, { "epoch": 0.06, "grad_norm": 7.8892107009887695, "learning_rate": 2.8095e-06, "loss": 0.3264, "step": 5625 }, { "epoch": 0.06, "grad_norm": 28.820323944091797, "learning_rate": 2.8220000000000003e-06, "loss": 0.607, "step": 5650 }, { "epoch": 0.06, "grad_norm": 10.912083625793457, "learning_rate": 2.8345e-06, "loss": 0.2815, "step": 5675 }, { "epoch": 0.06, "grad_norm": 26.025020599365234, "learning_rate": 2.8470000000000004e-06, "loss": 0.5962, "step": 5700 }, { "epoch": 0.06, "grad_norm": 6.506562232971191, "learning_rate": 2.8595e-06, "loss": 0.2829, "step": 5725 }, { "epoch": 0.06, "grad_norm": 24.320459365844727, "learning_rate": 2.872e-06, "loss": 0.564, "step": 5750 }, { "epoch": 0.06, "grad_norm": 10.92339038848877, "learning_rate": 2.8845e-06, "loss": 0.2869, "step": 5775 }, { "epoch": 0.06, "grad_norm": 26.875972747802734, "learning_rate": 2.8970000000000004e-06, "loss": 0.5385, "step": 5800 }, { "epoch": 0.06, "grad_norm": 25.37314224243164, "learning_rate": 2.9095000000000003e-06, "loss": 0.2565, "step": 5825 }, { "epoch": 0.06, "grad_norm": 28.848682403564453, "learning_rate": 2.9220000000000006e-06, "loss": 0.543, "step": 5850 }, { "epoch": 0.06, "grad_norm": 10.071850776672363, "learning_rate": 2.9345e-06, "loss": 0.2583, "step": 5875 }, { "epoch": 0.06, "grad_norm": 34.29800796508789, "learning_rate": 2.9470000000000003e-06, "loss": 0.5571, "step": 5900 }, { "epoch": 0.06, "grad_norm": 12.92024040222168, "learning_rate": 2.9595e-06, "loss": 0.2731, "step": 5925 }, { "epoch": 0.06, "grad_norm": 24.217716217041016, "learning_rate": 2.9720000000000005e-06, "loss": 0.666, "step": 5950 }, { "epoch": 0.06, "grad_norm": 6.2856364250183105, "learning_rate": 2.9845e-06, "loss": 0.3121, "step": 5975 }, { "epoch": 0.06, "grad_norm": 24.842273712158203, "learning_rate": 2.9970000000000003e-06, "loss": 0.6474, "step": 6000 }, { "epoch": 0.06, "grad_norm": 7.107849597930908, "learning_rate": 3.0095e-06, "loss": 0.2738, "step": 6025 }, { "epoch": 0.06, "grad_norm": 24.826995849609375, "learning_rate": 3.0220000000000005e-06, "loss": 0.592, "step": 6050 }, { "epoch": 0.06, "grad_norm": 19.609437942504883, "learning_rate": 3.0345000000000003e-06, "loss": 0.3326, "step": 6075 }, { "epoch": 0.06, "grad_norm": 24.35671043395996, "learning_rate": 3.0470000000000006e-06, "loss": 0.5994, "step": 6100 }, { "epoch": 0.06, "grad_norm": 13.18606948852539, "learning_rate": 3.0595e-06, "loss": 0.2667, "step": 6125 }, { "epoch": 0.06, "grad_norm": 21.976835250854492, "learning_rate": 3.072e-06, "loss": 0.5335, "step": 6150 }, { "epoch": 0.06, "grad_norm": 18.63410758972168, "learning_rate": 3.0845000000000003e-06, "loss": 0.2451, "step": 6175 }, { "epoch": 0.06, "grad_norm": 28.247285842895508, "learning_rate": 3.097e-06, "loss": 0.5417, "step": 6200 }, { "epoch": 0.06, "grad_norm": 15.960722923278809, "learning_rate": 3.1095000000000005e-06, "loss": 0.2734, "step": 6225 }, { "epoch": 0.06, "grad_norm": 17.812959671020508, "learning_rate": 3.122e-06, "loss": 0.5054, "step": 6250 }, { "epoch": 0.06, "grad_norm": 14.012590408325195, "learning_rate": 3.1345000000000002e-06, "loss": 0.2391, "step": 6275 }, { "epoch": 0.06, "grad_norm": 21.24317741394043, "learning_rate": 3.147e-06, "loss": 0.4861, "step": 6300 }, { "epoch": 0.06, "grad_norm": 8.553666114807129, "learning_rate": 3.1595000000000004e-06, "loss": 0.2838, "step": 6325 }, { "epoch": 0.06, "grad_norm": 21.861032485961914, "learning_rate": 3.172e-06, "loss": 0.5158, "step": 6350 }, { "epoch": 0.06, "grad_norm": 9.552922248840332, "learning_rate": 3.1845e-06, "loss": 0.305, "step": 6375 }, { "epoch": 0.06, "grad_norm": 23.805503845214844, "learning_rate": 3.197e-06, "loss": 0.5132, "step": 6400 }, { "epoch": 0.06, "grad_norm": 14.1007719039917, "learning_rate": 3.2095000000000004e-06, "loss": 0.2613, "step": 6425 }, { "epoch": 0.06, "grad_norm": 20.142436981201172, "learning_rate": 3.2220000000000002e-06, "loss": 0.4463, "step": 6450 }, { "epoch": 0.06, "grad_norm": 18.8670597076416, "learning_rate": 3.2345000000000005e-06, "loss": 0.3191, "step": 6475 }, { "epoch": 0.06, "grad_norm": 17.095565795898438, "learning_rate": 3.247e-06, "loss": 0.5189, "step": 6500 }, { "epoch": 0.06, "grad_norm": 12.56757640838623, "learning_rate": 3.2595000000000003e-06, "loss": 0.2782, "step": 6525 }, { "epoch": 0.06, "grad_norm": 22.846195220947266, "learning_rate": 3.272e-06, "loss": 0.486, "step": 6550 }, { "epoch": 0.06, "grad_norm": 15.21054744720459, "learning_rate": 3.2845000000000005e-06, "loss": 0.3235, "step": 6575 }, { "epoch": 0.06, "grad_norm": 28.165435791015625, "learning_rate": 3.2970000000000004e-06, "loss": 0.5058, "step": 6600 }, { "epoch": 0.07, "grad_norm": 6.453917026519775, "learning_rate": 3.3095000000000007e-06, "loss": 0.2429, "step": 6625 }, { "epoch": 0.07, "grad_norm": 14.98429012298584, "learning_rate": 3.322e-06, "loss": 0.5072, "step": 6650 }, { "epoch": 0.07, "grad_norm": 15.812800407409668, "learning_rate": 3.3345000000000004e-06, "loss": 0.2419, "step": 6675 }, { "epoch": 0.07, "grad_norm": 15.731911659240723, "learning_rate": 3.3470000000000003e-06, "loss": 0.4944, "step": 6700 }, { "epoch": 0.07, "grad_norm": 13.339204788208008, "learning_rate": 3.3595000000000006e-06, "loss": 0.3134, "step": 6725 }, { "epoch": 0.07, "grad_norm": 24.882734298706055, "learning_rate": 3.372e-06, "loss": 0.4652, "step": 6750 }, { "epoch": 0.07, "grad_norm": 8.80457592010498, "learning_rate": 3.3845e-06, "loss": 0.2961, "step": 6775 }, { "epoch": 0.07, "grad_norm": 22.24454116821289, "learning_rate": 3.3970000000000003e-06, "loss": 0.5306, "step": 6800 }, { "epoch": 0.07, "grad_norm": 10.893237113952637, "learning_rate": 3.4095e-06, "loss": 0.2856, "step": 6825 }, { "epoch": 0.07, "grad_norm": 21.848621368408203, "learning_rate": 3.4220000000000004e-06, "loss": 0.5274, "step": 6850 }, { "epoch": 0.07, "grad_norm": 14.024698257446289, "learning_rate": 3.4345e-06, "loss": 0.2558, "step": 6875 }, { "epoch": 0.07, "grad_norm": 26.29399871826172, "learning_rate": 3.447e-06, "loss": 0.5125, "step": 6900 }, { "epoch": 0.07, "grad_norm": 9.820560455322266, "learning_rate": 3.4595e-06, "loss": 0.2572, "step": 6925 }, { "epoch": 0.07, "grad_norm": 28.196474075317383, "learning_rate": 3.4720000000000004e-06, "loss": 0.5627, "step": 6950 }, { "epoch": 0.07, "grad_norm": 14.124052047729492, "learning_rate": 3.4845000000000003e-06, "loss": 0.2832, "step": 6975 }, { "epoch": 0.07, "grad_norm": 22.85649871826172, "learning_rate": 3.4970000000000006e-06, "loss": 0.5186, "step": 7000 }, { "epoch": 0.07, "grad_norm": 14.750447273254395, "learning_rate": 3.5095e-06, "loss": 0.2403, "step": 7025 }, { "epoch": 0.07, "grad_norm": 19.621957778930664, "learning_rate": 3.5220000000000003e-06, "loss": 0.4861, "step": 7050 }, { "epoch": 0.07, "grad_norm": 20.507747650146484, "learning_rate": 3.5345e-06, "loss": 0.2931, "step": 7075 }, { "epoch": 0.07, "grad_norm": 25.39666748046875, "learning_rate": 3.5470000000000005e-06, "loss": 0.5592, "step": 7100 }, { "epoch": 0.07, "grad_norm": 11.910776138305664, "learning_rate": 3.5595e-06, "loss": 0.2552, "step": 7125 }, { "epoch": 0.07, "grad_norm": 25.64723777770996, "learning_rate": 3.5720000000000003e-06, "loss": 0.5246, "step": 7150 }, { "epoch": 0.07, "grad_norm": 15.35714054107666, "learning_rate": 3.5845e-06, "loss": 0.2292, "step": 7175 }, { "epoch": 0.07, "grad_norm": 23.31612205505371, "learning_rate": 3.5970000000000005e-06, "loss": 0.5988, "step": 7200 }, { "epoch": 0.07, "grad_norm": 12.978313446044922, "learning_rate": 3.6095000000000003e-06, "loss": 0.2889, "step": 7225 }, { "epoch": 0.07, "grad_norm": 26.85930633544922, "learning_rate": 3.6220000000000006e-06, "loss": 0.5019, "step": 7250 }, { "epoch": 0.07, "grad_norm": 17.600379943847656, "learning_rate": 3.6345e-06, "loss": 0.2556, "step": 7275 }, { "epoch": 0.07, "grad_norm": 23.689908981323242, "learning_rate": 3.6470000000000004e-06, "loss": 0.5746, "step": 7300 }, { "epoch": 0.07, "grad_norm": 13.22391128540039, "learning_rate": 3.6595000000000003e-06, "loss": 0.3464, "step": 7325 }, { "epoch": 0.07, "grad_norm": 24.8956298828125, "learning_rate": 3.6720000000000006e-06, "loss": 0.5345, "step": 7350 }, { "epoch": 0.07, "grad_norm": 14.578091621398926, "learning_rate": 3.6845000000000005e-06, "loss": 0.2815, "step": 7375 }, { "epoch": 0.07, "grad_norm": 26.383058547973633, "learning_rate": 3.697e-06, "loss": 0.5649, "step": 7400 }, { "epoch": 0.07, "grad_norm": 22.41503143310547, "learning_rate": 3.7095000000000002e-06, "loss": 0.3015, "step": 7425 }, { "epoch": 0.07, "grad_norm": 24.604387283325195, "learning_rate": 3.722e-06, "loss": 0.4839, "step": 7450 }, { "epoch": 0.07, "grad_norm": 14.580819129943848, "learning_rate": 3.7345000000000004e-06, "loss": 0.2978, "step": 7475 }, { "epoch": 0.07, "grad_norm": 21.283164978027344, "learning_rate": 3.7470000000000003e-06, "loss": 0.5139, "step": 7500 }, { "epoch": 0.07, "grad_norm": 10.010904312133789, "learning_rate": 3.7595e-06, "loss": 0.3079, "step": 7525 }, { "epoch": 0.07, "grad_norm": 24.23546600341797, "learning_rate": 3.772e-06, "loss": 0.4967, "step": 7550 }, { "epoch": 0.07, "grad_norm": 13.373985290527344, "learning_rate": 3.7845000000000004e-06, "loss": 0.2267, "step": 7575 }, { "epoch": 0.07, "grad_norm": 12.98265552520752, "learning_rate": 3.7970000000000002e-06, "loss": 0.4955, "step": 7600 }, { "epoch": 0.07, "grad_norm": 6.874820232391357, "learning_rate": 3.8095000000000005e-06, "loss": 0.2578, "step": 7625 }, { "epoch": 0.08, "grad_norm": 21.82326889038086, "learning_rate": 3.822e-06, "loss": 0.5047, "step": 7650 }, { "epoch": 0.08, "grad_norm": 12.44593334197998, "learning_rate": 3.8345e-06, "loss": 0.3044, "step": 7675 }, { "epoch": 0.08, "grad_norm": 26.83782386779785, "learning_rate": 3.847e-06, "loss": 0.5079, "step": 7700 }, { "epoch": 0.08, "grad_norm": 11.77517318725586, "learning_rate": 3.8595e-06, "loss": 0.282, "step": 7725 }, { "epoch": 0.08, "grad_norm": 25.238235473632812, "learning_rate": 3.872e-06, "loss": 0.5907, "step": 7750 }, { "epoch": 0.08, "grad_norm": 8.638229370117188, "learning_rate": 3.884500000000001e-06, "loss": 0.2488, "step": 7775 }, { "epoch": 0.08, "grad_norm": 23.63137435913086, "learning_rate": 3.897e-06, "loss": 0.478, "step": 7800 }, { "epoch": 0.08, "grad_norm": 9.084392547607422, "learning_rate": 3.9095000000000004e-06, "loss": 0.2474, "step": 7825 }, { "epoch": 0.08, "grad_norm": 21.994464874267578, "learning_rate": 3.922e-06, "loss": 0.6472, "step": 7850 }, { "epoch": 0.08, "grad_norm": 13.188467979431152, "learning_rate": 3.9345e-06, "loss": 0.2843, "step": 7875 }, { "epoch": 0.08, "grad_norm": 18.0020809173584, "learning_rate": 3.9470000000000005e-06, "loss": 0.5661, "step": 7900 }, { "epoch": 0.08, "grad_norm": 10.019874572753906, "learning_rate": 3.959500000000001e-06, "loss": 0.2968, "step": 7925 }, { "epoch": 0.08, "grad_norm": 28.442752838134766, "learning_rate": 3.972e-06, "loss": 0.4411, "step": 7950 }, { "epoch": 0.08, "grad_norm": 5.823444366455078, "learning_rate": 3.9845000000000006e-06, "loss": 0.3022, "step": 7975 }, { "epoch": 0.08, "grad_norm": 15.736676216125488, "learning_rate": 3.997e-06, "loss": 0.4735, "step": 8000 }, { "epoch": 0.08, "grad_norm": 17.315603256225586, "learning_rate": 4.0095e-06, "loss": 0.28, "step": 8025 }, { "epoch": 0.08, "grad_norm": 21.190200805664062, "learning_rate": 4.022000000000001e-06, "loss": 0.4471, "step": 8050 }, { "epoch": 0.08, "grad_norm": 10.620325088500977, "learning_rate": 4.0345e-06, "loss": 0.2958, "step": 8075 }, { "epoch": 0.08, "grad_norm": 17.59687614440918, "learning_rate": 4.047e-06, "loss": 0.4883, "step": 8100 }, { "epoch": 0.08, "grad_norm": 9.6557035446167, "learning_rate": 4.0595e-06, "loss": 0.2552, "step": 8125 }, { "epoch": 0.08, "grad_norm": 17.7050724029541, "learning_rate": 4.072e-06, "loss": 0.5205, "step": 8150 }, { "epoch": 0.08, "grad_norm": 16.824153900146484, "learning_rate": 4.0845000000000004e-06, "loss": 0.2733, "step": 8175 }, { "epoch": 0.08, "grad_norm": 23.354005813598633, "learning_rate": 4.097000000000001e-06, "loss": 0.523, "step": 8200 }, { "epoch": 0.08, "grad_norm": 11.490384101867676, "learning_rate": 4.1095e-06, "loss": 0.3069, "step": 8225 }, { "epoch": 0.08, "grad_norm": 21.421655654907227, "learning_rate": 4.1220000000000005e-06, "loss": 0.5702, "step": 8250 }, { "epoch": 0.08, "grad_norm": 13.663742065429688, "learning_rate": 4.1345e-06, "loss": 0.3051, "step": 8275 }, { "epoch": 0.08, "grad_norm": 15.061243057250977, "learning_rate": 4.147e-06, "loss": 0.49, "step": 8300 }, { "epoch": 0.08, "grad_norm": 13.875020027160645, "learning_rate": 4.159500000000001e-06, "loss": 0.2778, "step": 8325 }, { "epoch": 0.08, "grad_norm": 26.201854705810547, "learning_rate": 4.172000000000001e-06, "loss": 0.5266, "step": 8350 }, { "epoch": 0.08, "grad_norm": 13.63831901550293, "learning_rate": 4.1845e-06, "loss": 0.2854, "step": 8375 }, { "epoch": 0.08, "grad_norm": 23.785737991333008, "learning_rate": 4.197000000000001e-06, "loss": 0.5381, "step": 8400 }, { "epoch": 0.08, "grad_norm": 6.955095291137695, "learning_rate": 4.2095e-06, "loss": 0.2415, "step": 8425 }, { "epoch": 0.08, "grad_norm": 27.070676803588867, "learning_rate": 4.222e-06, "loss": 0.5918, "step": 8450 }, { "epoch": 0.08, "grad_norm": 7.101044178009033, "learning_rate": 4.2345e-06, "loss": 0.225, "step": 8475 }, { "epoch": 0.08, "grad_norm": 24.540523529052734, "learning_rate": 4.247e-06, "loss": 0.4973, "step": 8500 }, { "epoch": 0.08, "grad_norm": 8.22083568572998, "learning_rate": 4.2595000000000005e-06, "loss": 0.3192, "step": 8525 }, { "epoch": 0.08, "grad_norm": 18.599220275878906, "learning_rate": 4.272000000000001e-06, "loss": 0.4887, "step": 8550 }, { "epoch": 0.08, "grad_norm": 18.100698471069336, "learning_rate": 4.2845e-06, "loss": 0.259, "step": 8575 }, { "epoch": 0.08, "grad_norm": 28.053407669067383, "learning_rate": 4.2970000000000005e-06, "loss": 0.4608, "step": 8600 }, { "epoch": 0.08, "grad_norm": 11.991870880126953, "learning_rate": 4.3095e-06, "loss": 0.2614, "step": 8625 }, { "epoch": 0.09, "grad_norm": 21.04551124572754, "learning_rate": 4.322e-06, "loss": 0.5034, "step": 8650 }, { "epoch": 0.09, "grad_norm": 17.5179386138916, "learning_rate": 4.334500000000001e-06, "loss": 0.2551, "step": 8675 }, { "epoch": 0.09, "grad_norm": 23.22585105895996, "learning_rate": 4.347e-06, "loss": 0.5848, "step": 8700 }, { "epoch": 0.09, "grad_norm": 13.649556159973145, "learning_rate": 4.3595e-06, "loss": 0.2734, "step": 8725 }, { "epoch": 0.09, "grad_norm": 21.719152450561523, "learning_rate": 4.372e-06, "loss": 0.4945, "step": 8750 }, { "epoch": 0.09, "grad_norm": 13.563493728637695, "learning_rate": 4.3845e-06, "loss": 0.3496, "step": 8775 }, { "epoch": 0.09, "grad_norm": 23.399742126464844, "learning_rate": 4.397e-06, "loss": 0.5557, "step": 8800 }, { "epoch": 0.09, "grad_norm": 13.780128479003906, "learning_rate": 4.409500000000001e-06, "loss": 0.2798, "step": 8825 }, { "epoch": 0.09, "grad_norm": 25.886436462402344, "learning_rate": 4.422e-06, "loss": 0.5276, "step": 8850 }, { "epoch": 0.09, "grad_norm": 6.28591775894165, "learning_rate": 4.4345000000000005e-06, "loss": 0.2381, "step": 8875 }, { "epoch": 0.09, "grad_norm": 21.99445915222168, "learning_rate": 4.447e-06, "loss": 0.5332, "step": 8900 }, { "epoch": 0.09, "grad_norm": 16.101835250854492, "learning_rate": 4.4595e-06, "loss": 0.3008, "step": 8925 }, { "epoch": 0.09, "grad_norm": 22.013986587524414, "learning_rate": 4.4720000000000006e-06, "loss": 0.599, "step": 8950 }, { "epoch": 0.09, "grad_norm": 7.519379615783691, "learning_rate": 4.484500000000001e-06, "loss": 0.2823, "step": 8975 }, { "epoch": 0.09, "grad_norm": 26.04881477355957, "learning_rate": 4.497e-06, "loss": 0.4747, "step": 9000 }, { "epoch": 0.09, "grad_norm": 13.644451141357422, "learning_rate": 4.509500000000001e-06, "loss": 0.276, "step": 9025 }, { "epoch": 0.09, "grad_norm": 25.16413688659668, "learning_rate": 4.522e-06, "loss": 0.5637, "step": 9050 }, { "epoch": 0.09, "grad_norm": 8.572585105895996, "learning_rate": 4.5345e-06, "loss": 0.2662, "step": 9075 }, { "epoch": 0.09, "grad_norm": 17.3311767578125, "learning_rate": 4.547000000000001e-06, "loss": 0.6119, "step": 9100 }, { "epoch": 0.09, "grad_norm": 7.924992561340332, "learning_rate": 4.559500000000001e-06, "loss": 0.2257, "step": 9125 }, { "epoch": 0.09, "grad_norm": 17.264307022094727, "learning_rate": 4.5720000000000004e-06, "loss": 0.5441, "step": 9150 }, { "epoch": 0.09, "grad_norm": 10.749700546264648, "learning_rate": 4.584500000000001e-06, "loss": 0.2594, "step": 9175 }, { "epoch": 0.09, "grad_norm": 26.84062957763672, "learning_rate": 4.597e-06, "loss": 0.5022, "step": 9200 }, { "epoch": 0.09, "grad_norm": 24.41667938232422, "learning_rate": 4.6095000000000005e-06, "loss": 0.2491, "step": 9225 }, { "epoch": 0.09, "grad_norm": 21.916732788085938, "learning_rate": 4.622e-06, "loss": 0.5336, "step": 9250 }, { "epoch": 0.09, "grad_norm": 14.051859855651855, "learning_rate": 4.6345e-06, "loss": 0.249, "step": 9275 }, { "epoch": 0.09, "grad_norm": 28.83497428894043, "learning_rate": 4.6470000000000006e-06, "loss": 0.518, "step": 9300 }, { "epoch": 0.09, "grad_norm": 11.478419303894043, "learning_rate": 4.6595e-06, "loss": 0.2871, "step": 9325 }, { "epoch": 0.09, "grad_norm": 24.56195831298828, "learning_rate": 4.672e-06, "loss": 0.5698, "step": 9350 }, { "epoch": 0.09, "grad_norm": 11.39371109008789, "learning_rate": 4.6845e-06, "loss": 0.2798, "step": 9375 }, { "epoch": 0.09, "grad_norm": 30.029062271118164, "learning_rate": 4.697e-06, "loss": 0.479, "step": 9400 }, { "epoch": 0.09, "grad_norm": 18.96549415588379, "learning_rate": 4.7095e-06, "loss": 0.2298, "step": 9425 }, { "epoch": 0.09, "grad_norm": 20.138629913330078, "learning_rate": 4.722000000000001e-06, "loss": 0.4671, "step": 9450 }, { "epoch": 0.09, "grad_norm": 16.870698928833008, "learning_rate": 4.7345e-06, "loss": 0.2701, "step": 9475 }, { "epoch": 0.09, "grad_norm": 13.664759635925293, "learning_rate": 4.7465e-06, "loss": 0.4636, "step": 9500 }, { "epoch": 0.09, "grad_norm": 8.165226936340332, "learning_rate": 4.7590000000000005e-06, "loss": 0.2813, "step": 9525 }, { "epoch": 0.09, "grad_norm": 27.71885108947754, "learning_rate": 4.771500000000001e-06, "loss": 0.525, "step": 9550 }, { "epoch": 0.09, "grad_norm": 7.707765102386475, "learning_rate": 4.784e-06, "loss": 0.2858, "step": 9575 }, { "epoch": 0.09, "grad_norm": 19.818275451660156, "learning_rate": 4.796500000000001e-06, "loss": 0.5007, "step": 9600 }, { "epoch": 0.09, "grad_norm": 15.299739837646484, "learning_rate": 4.809e-06, "loss": 0.2143, "step": 9625 }, { "epoch": 0.09, "grad_norm": 21.056360244750977, "learning_rate": 4.8215e-06, "loss": 0.5008, "step": 9650 }, { "epoch": 0.1, "grad_norm": 11.27453327178955, "learning_rate": 4.834000000000001e-06, "loss": 0.2642, "step": 9675 }, { "epoch": 0.1, "grad_norm": 18.01426124572754, "learning_rate": 4.846500000000001e-06, "loss": 0.4637, "step": 9700 }, { "epoch": 0.1, "grad_norm": 17.159143447875977, "learning_rate": 4.859e-06, "loss": 0.3309, "step": 9725 }, { "epoch": 0.1, "grad_norm": 23.776634216308594, "learning_rate": 4.871500000000001e-06, "loss": 0.5211, "step": 9750 }, { "epoch": 0.1, "grad_norm": 12.18593692779541, "learning_rate": 4.884e-06, "loss": 0.2798, "step": 9775 }, { "epoch": 0.1, "grad_norm": 25.937992095947266, "learning_rate": 4.8965000000000005e-06, "loss": 0.6071, "step": 9800 }, { "epoch": 0.1, "grad_norm": 11.095364570617676, "learning_rate": 4.909000000000001e-06, "loss": 0.2424, "step": 9825 }, { "epoch": 0.1, "grad_norm": 17.490135192871094, "learning_rate": 4.9215e-06, "loss": 0.5596, "step": 9850 }, { "epoch": 0.1, "grad_norm": 11.591625213623047, "learning_rate": 4.9340000000000005e-06, "loss": 0.2748, "step": 9875 }, { "epoch": 0.1, "grad_norm": 22.617801666259766, "learning_rate": 4.9465e-06, "loss": 0.5004, "step": 9900 }, { "epoch": 0.1, "grad_norm": 19.421396255493164, "learning_rate": 4.959e-06, "loss": 0.2638, "step": 9925 }, { "epoch": 0.1, "grad_norm": 26.397296905517578, "learning_rate": 4.971500000000001e-06, "loss": 0.5179, "step": 9950 }, { "epoch": 0.1, "grad_norm": 17.36543083190918, "learning_rate": 4.984000000000001e-06, "loss": 0.2262, "step": 9975 }, { "epoch": 0.1, "grad_norm": 23.090946197509766, "learning_rate": 4.9965e-06, "loss": 0.5335, "step": 10000 }, { "epoch": 0.1, "grad_norm": 13.282963752746582, "learning_rate": 4.999910631832941e-06, "loss": 0.2487, "step": 10025 }, { "epoch": 0.1, "grad_norm": 25.679306030273438, "learning_rate": 4.999786509378693e-06, "loss": 0.6211, "step": 10050 }, { "epoch": 0.1, "grad_norm": 9.152881622314453, "learning_rate": 4.9996623869244444e-06, "loss": 0.2976, "step": 10075 }, { "epoch": 0.1, "grad_norm": 16.787349700927734, "learning_rate": 4.9995382644701965e-06, "loss": 0.5056, "step": 10100 }, { "epoch": 0.1, "grad_norm": 10.850042343139648, "learning_rate": 4.999414142015948e-06, "loss": 0.2436, "step": 10125 }, { "epoch": 0.1, "grad_norm": 23.591821670532227, "learning_rate": 4.9992900195617e-06, "loss": 0.5115, "step": 10150 }, { "epoch": 0.1, "grad_norm": 19.35371971130371, "learning_rate": 4.999165897107451e-06, "loss": 0.2776, "step": 10175 }, { "epoch": 0.1, "grad_norm": 19.542165756225586, "learning_rate": 4.999041774653202e-06, "loss": 0.5427, "step": 10200 }, { "epoch": 0.1, "grad_norm": 8.266374588012695, "learning_rate": 4.998917652198954e-06, "loss": 0.2444, "step": 10225 }, { "epoch": 0.1, "grad_norm": 24.8053035736084, "learning_rate": 4.9987935297447055e-06, "loss": 0.5708, "step": 10250 }, { "epoch": 0.1, "grad_norm": 18.700214385986328, "learning_rate": 4.998669407290457e-06, "loss": 0.2436, "step": 10275 }, { "epoch": 0.1, "grad_norm": 13.351673126220703, "learning_rate": 4.998545284836208e-06, "loss": 0.5795, "step": 10300 }, { "epoch": 0.1, "grad_norm": 12.220335960388184, "learning_rate": 4.99842116238196e-06, "loss": 0.2643, "step": 10325 }, { "epoch": 0.1, "grad_norm": 26.57877540588379, "learning_rate": 4.998297039927711e-06, "loss": 0.5469, "step": 10350 }, { "epoch": 0.1, "grad_norm": 11.269149780273438, "learning_rate": 4.998172917473462e-06, "loss": 0.2697, "step": 10375 }, { "epoch": 0.1, "grad_norm": 18.482614517211914, "learning_rate": 4.9980487950192144e-06, "loss": 0.507, "step": 10400 }, { "epoch": 0.1, "grad_norm": 13.973858833312988, "learning_rate": 4.997924672564966e-06, "loss": 0.2572, "step": 10425 }, { "epoch": 0.1, "grad_norm": 20.106151580810547, "learning_rate": 4.997800550110718e-06, "loss": 0.5853, "step": 10450 }, { "epoch": 0.1, "grad_norm": 14.178559303283691, "learning_rate": 4.997676427656469e-06, "loss": 0.29, "step": 10475 }, { "epoch": 0.1, "grad_norm": 19.054811477661133, "learning_rate": 4.997552305202221e-06, "loss": 0.5189, "step": 10500 }, { "epoch": 0.1, "grad_norm": 11.503232955932617, "learning_rate": 4.997428182747972e-06, "loss": 0.2779, "step": 10525 }, { "epoch": 0.1, "grad_norm": 20.076221466064453, "learning_rate": 4.997304060293724e-06, "loss": 0.5398, "step": 10550 }, { "epoch": 0.1, "grad_norm": 12.404020309448242, "learning_rate": 4.9971799378394755e-06, "loss": 0.2911, "step": 10575 }, { "epoch": 0.1, "grad_norm": 26.11992645263672, "learning_rate": 4.997055815385227e-06, "loss": 0.576, "step": 10600 }, { "epoch": 0.1, "grad_norm": 16.749366760253906, "learning_rate": 4.996931692930979e-06, "loss": 0.3276, "step": 10625 }, { "epoch": 0.1, "grad_norm": 23.507652282714844, "learning_rate": 4.99680757047673e-06, "loss": 0.5383, "step": 10650 }, { "epoch": 0.1, "grad_norm": 12.26186752319336, "learning_rate": 4.996683448022481e-06, "loss": 0.2997, "step": 10675 }, { "epoch": 0.11, "grad_norm": 19.75731086730957, "learning_rate": 4.996559325568233e-06, "loss": 0.4982, "step": 10700 }, { "epoch": 0.11, "grad_norm": 15.8826904296875, "learning_rate": 4.996435203113984e-06, "loss": 0.2744, "step": 10725 }, { "epoch": 0.11, "grad_norm": 18.90651512145996, "learning_rate": 4.996311080659736e-06, "loss": 0.5486, "step": 10750 }, { "epoch": 0.11, "grad_norm": 11.345558166503906, "learning_rate": 4.996186958205488e-06, "loss": 0.2397, "step": 10775 }, { "epoch": 0.11, "grad_norm": 22.565942764282227, "learning_rate": 4.996062835751239e-06, "loss": 0.5751, "step": 10800 }, { "epoch": 0.11, "grad_norm": 7.750554084777832, "learning_rate": 4.995938713296991e-06, "loss": 0.2544, "step": 10825 }, { "epoch": 0.11, "grad_norm": 22.427248001098633, "learning_rate": 4.995814590842742e-06, "loss": 0.6291, "step": 10850 }, { "epoch": 0.11, "grad_norm": 18.952747344970703, "learning_rate": 4.995690468388494e-06, "loss": 0.2513, "step": 10875 }, { "epoch": 0.11, "grad_norm": 25.117530822753906, "learning_rate": 4.9955663459342454e-06, "loss": 0.5179, "step": 10900 }, { "epoch": 0.11, "grad_norm": 11.321707725524902, "learning_rate": 4.9954422234799975e-06, "loss": 0.33, "step": 10925 }, { "epoch": 0.11, "grad_norm": 22.165754318237305, "learning_rate": 4.995318101025749e-06, "loss": 0.47, "step": 10950 }, { "epoch": 0.11, "grad_norm": 12.325865745544434, "learning_rate": 4.9951939785715e-06, "loss": 0.3183, "step": 10975 }, { "epoch": 0.11, "grad_norm": 24.12384033203125, "learning_rate": 4.995069856117252e-06, "loss": 0.5357, "step": 11000 }, { "epoch": 0.11, "grad_norm": 7.307888507843018, "learning_rate": 4.994945733663003e-06, "loss": 0.253, "step": 11025 }, { "epoch": 0.11, "grad_norm": 13.414085388183594, "learning_rate": 4.994821611208754e-06, "loss": 0.4648, "step": 11050 }, { "epoch": 0.11, "grad_norm": 15.519962310791016, "learning_rate": 4.9946974887545065e-06, "loss": 0.2464, "step": 11075 }, { "epoch": 0.11, "grad_norm": 19.317256927490234, "learning_rate": 4.994573366300258e-06, "loss": 0.5807, "step": 11100 }, { "epoch": 0.11, "grad_norm": 9.14033317565918, "learning_rate": 4.994449243846009e-06, "loss": 0.2594, "step": 11125 }, { "epoch": 0.11, "grad_norm": 28.810344696044922, "learning_rate": 4.99432512139176e-06, "loss": 0.5246, "step": 11150 }, { "epoch": 0.11, "grad_norm": 5.725365161895752, "learning_rate": 4.994200998937512e-06, "loss": 0.2355, "step": 11175 }, { "epoch": 0.11, "grad_norm": 20.414751052856445, "learning_rate": 4.994076876483263e-06, "loss": 0.5011, "step": 11200 }, { "epoch": 0.11, "grad_norm": 13.467123985290527, "learning_rate": 4.993952754029015e-06, "loss": 0.264, "step": 11225 }, { "epoch": 0.11, "grad_norm": 19.81863784790039, "learning_rate": 4.993828631574767e-06, "loss": 0.6013, "step": 11250 }, { "epoch": 0.11, "grad_norm": 9.675612449645996, "learning_rate": 4.993704509120519e-06, "loss": 0.2357, "step": 11275 }, { "epoch": 0.11, "grad_norm": 13.906631469726562, "learning_rate": 4.99358038666627e-06, "loss": 0.5337, "step": 11300 }, { "epoch": 0.11, "grad_norm": 14.109436988830566, "learning_rate": 4.993456264212022e-06, "loss": 0.2444, "step": 11325 }, { "epoch": 0.11, "grad_norm": 21.926607131958008, "learning_rate": 4.993332141757773e-06, "loss": 0.4864, "step": 11350 }, { "epoch": 0.11, "grad_norm": 15.570734024047852, "learning_rate": 4.993208019303524e-06, "loss": 0.3237, "step": 11375 }, { "epoch": 0.11, "grad_norm": 17.91618537902832, "learning_rate": 4.9930838968492764e-06, "loss": 0.4621, "step": 11400 }, { "epoch": 0.11, "grad_norm": 10.728463172912598, "learning_rate": 4.992959774395028e-06, "loss": 0.2791, "step": 11425 }, { "epoch": 0.11, "grad_norm": 19.393306732177734, "learning_rate": 4.992835651940779e-06, "loss": 0.5369, "step": 11450 }, { "epoch": 0.11, "grad_norm": 11.921093940734863, "learning_rate": 4.992711529486531e-06, "loss": 0.2709, "step": 11475 }, { "epoch": 0.11, "grad_norm": 24.586091995239258, "learning_rate": 4.992587407032282e-06, "loss": 0.5406, "step": 11500 }, { "epoch": 0.11, "grad_norm": 14.760448455810547, "learning_rate": 4.992463284578033e-06, "loss": 0.238, "step": 11525 }, { "epoch": 0.11, "grad_norm": 21.653860092163086, "learning_rate": 4.992339162123785e-06, "loss": 0.5387, "step": 11550 }, { "epoch": 0.11, "grad_norm": 12.344537734985352, "learning_rate": 4.992215039669537e-06, "loss": 0.3145, "step": 11575 }, { "epoch": 0.11, "grad_norm": 16.253210067749023, "learning_rate": 4.992090917215288e-06, "loss": 0.4605, "step": 11600 }, { "epoch": 0.11, "grad_norm": 7.623489856719971, "learning_rate": 4.99196679476104e-06, "loss": 0.233, "step": 11625 }, { "epoch": 0.11, "grad_norm": 30.13361930847168, "learning_rate": 4.991842672306791e-06, "loss": 0.5879, "step": 11650 }, { "epoch": 0.11, "grad_norm": 8.123913764953613, "learning_rate": 4.991718549852543e-06, "loss": 0.2808, "step": 11675 }, { "epoch": 0.12, "grad_norm": 19.540828704833984, "learning_rate": 4.991594427398294e-06, "loss": 0.5065, "step": 11700 }, { "epoch": 0.12, "grad_norm": 14.377106666564941, "learning_rate": 4.9914703049440464e-06, "loss": 0.297, "step": 11725 }, { "epoch": 0.12, "grad_norm": 16.65022087097168, "learning_rate": 4.991346182489798e-06, "loss": 0.5256, "step": 11750 }, { "epoch": 0.12, "grad_norm": 11.502799034118652, "learning_rate": 4.99122206003555e-06, "loss": 0.2809, "step": 11775 }, { "epoch": 0.12, "grad_norm": 26.51661491394043, "learning_rate": 4.991097937581301e-06, "loss": 0.5021, "step": 11800 }, { "epoch": 0.12, "grad_norm": 6.03272819519043, "learning_rate": 4.990973815127052e-06, "loss": 0.2623, "step": 11825 }, { "epoch": 0.12, "grad_norm": 30.85100555419922, "learning_rate": 4.990849692672804e-06, "loss": 0.4916, "step": 11850 }, { "epoch": 0.12, "grad_norm": 6.211580753326416, "learning_rate": 4.990725570218555e-06, "loss": 0.2791, "step": 11875 }, { "epoch": 0.12, "grad_norm": 23.7497615814209, "learning_rate": 4.990601447764307e-06, "loss": 0.4964, "step": 11900 }, { "epoch": 0.12, "grad_norm": 6.711952209472656, "learning_rate": 4.990477325310059e-06, "loss": 0.2713, "step": 11925 }, { "epoch": 0.12, "grad_norm": 21.505863189697266, "learning_rate": 4.99035320285581e-06, "loss": 0.5217, "step": 11950 }, { "epoch": 0.12, "grad_norm": 11.935708045959473, "learning_rate": 4.990229080401561e-06, "loss": 0.2872, "step": 11975 }, { "epoch": 0.12, "grad_norm": 23.39388656616211, "learning_rate": 4.990104957947312e-06, "loss": 0.5317, "step": 12000 }, { "epoch": 0.12, "grad_norm": 30.143388748168945, "learning_rate": 4.989980835493064e-06, "loss": 0.2678, "step": 12025 }, { "epoch": 0.12, "grad_norm": 21.727155685424805, "learning_rate": 4.9898567130388156e-06, "loss": 0.5064, "step": 12050 }, { "epoch": 0.12, "grad_norm": 10.196945190429688, "learning_rate": 4.989732590584568e-06, "loss": 0.2703, "step": 12075 }, { "epoch": 0.12, "grad_norm": 16.543643951416016, "learning_rate": 4.989608468130319e-06, "loss": 0.5277, "step": 12100 }, { "epoch": 0.12, "grad_norm": 13.497827529907227, "learning_rate": 4.989484345676071e-06, "loss": 0.2726, "step": 12125 }, { "epoch": 0.12, "grad_norm": 23.109840393066406, "learning_rate": 4.989360223221822e-06, "loss": 0.5661, "step": 12150 }, { "epoch": 0.12, "grad_norm": 13.47994327545166, "learning_rate": 4.989236100767574e-06, "loss": 0.3115, "step": 12175 }, { "epoch": 0.12, "grad_norm": 16.0810489654541, "learning_rate": 4.989111978313325e-06, "loss": 0.4769, "step": 12200 }, { "epoch": 0.12, "grad_norm": 6.685299873352051, "learning_rate": 4.988987855859077e-06, "loss": 0.2466, "step": 12225 }, { "epoch": 0.12, "grad_norm": 23.770832061767578, "learning_rate": 4.988863733404829e-06, "loss": 0.5365, "step": 12250 }, { "epoch": 0.12, "grad_norm": 10.945273399353027, "learning_rate": 4.98873961095058e-06, "loss": 0.3016, "step": 12275 }, { "epoch": 0.12, "grad_norm": 18.38798713684082, "learning_rate": 4.988615488496331e-06, "loss": 0.5854, "step": 12300 }, { "epoch": 0.12, "grad_norm": 8.091839790344238, "learning_rate": 4.988491366042083e-06, "loss": 0.2847, "step": 12325 }, { "epoch": 0.12, "grad_norm": 22.905858993530273, "learning_rate": 4.988367243587834e-06, "loss": 0.5267, "step": 12350 }, { "epoch": 0.12, "grad_norm": 14.37793254852295, "learning_rate": 4.9882431211335855e-06, "loss": 0.2781, "step": 12375 }, { "epoch": 0.12, "grad_norm": 19.86760139465332, "learning_rate": 4.988118998679338e-06, "loss": 0.5226, "step": 12400 }, { "epoch": 0.12, "grad_norm": 17.15184211730957, "learning_rate": 4.987994876225089e-06, "loss": 0.2861, "step": 12425 }, { "epoch": 0.12, "grad_norm": 32.382713317871094, "learning_rate": 4.987875718669011e-06, "loss": 0.5792, "step": 12450 }, { "epoch": 0.12, "grad_norm": 5.926074981689453, "learning_rate": 4.987751596214762e-06, "loss": 0.252, "step": 12475 }, { "epoch": 0.12, "grad_norm": 25.782726287841797, "learning_rate": 4.987627473760513e-06, "loss": 0.5469, "step": 12500 }, { "epoch": 0.12, "grad_norm": 10.610969543457031, "learning_rate": 4.987503351306265e-06, "loss": 0.3028, "step": 12525 }, { "epoch": 0.12, "grad_norm": 20.43320655822754, "learning_rate": 4.987379228852016e-06, "loss": 0.5453, "step": 12550 }, { "epoch": 0.12, "grad_norm": 11.803654670715332, "learning_rate": 4.9872551063977684e-06, "loss": 0.3022, "step": 12575 }, { "epoch": 0.12, "grad_norm": 26.188396453857422, "learning_rate": 4.98713098394352e-06, "loss": 0.4797, "step": 12600 }, { "epoch": 0.12, "grad_norm": 20.657249450683594, "learning_rate": 4.987006861489272e-06, "loss": 0.314, "step": 12625 }, { "epoch": 0.12, "grad_norm": 15.013749122619629, "learning_rate": 4.986882739035023e-06, "loss": 0.4943, "step": 12650 }, { "epoch": 0.12, "grad_norm": 16.18950843811035, "learning_rate": 4.986758616580774e-06, "loss": 0.2509, "step": 12675 }, { "epoch": 0.12, "grad_norm": 19.276540756225586, "learning_rate": 4.986634494126526e-06, "loss": 0.5471, "step": 12700 }, { "epoch": 0.13, "grad_norm": 8.666194915771484, "learning_rate": 4.986510371672277e-06, "loss": 0.2846, "step": 12725 }, { "epoch": 0.13, "grad_norm": 17.62954330444336, "learning_rate": 4.986386249218029e-06, "loss": 0.5482, "step": 12750 }, { "epoch": 0.13, "grad_norm": 9.162704467773438, "learning_rate": 4.986262126763781e-06, "loss": 0.2451, "step": 12775 }, { "epoch": 0.13, "grad_norm": 16.740333557128906, "learning_rate": 4.986138004309532e-06, "loss": 0.4916, "step": 12800 }, { "epoch": 0.13, "grad_norm": 12.710491180419922, "learning_rate": 4.986013881855283e-06, "loss": 0.27, "step": 12825 }, { "epoch": 0.13, "grad_norm": 15.47750186920166, "learning_rate": 4.985889759401035e-06, "loss": 0.5211, "step": 12850 }, { "epoch": 0.13, "grad_norm": 12.91158676147461, "learning_rate": 4.985765636946786e-06, "loss": 0.2447, "step": 12875 }, { "epoch": 0.13, "grad_norm": 21.692819595336914, "learning_rate": 4.985641514492538e-06, "loss": 0.5374, "step": 12900 }, { "epoch": 0.13, "grad_norm": 20.991777420043945, "learning_rate": 4.98551739203829e-06, "loss": 0.2709, "step": 12925 }, { "epoch": 0.13, "grad_norm": 33.01903533935547, "learning_rate": 4.985393269584041e-06, "loss": 0.5092, "step": 12950 }, { "epoch": 0.13, "grad_norm": 10.786160469055176, "learning_rate": 4.985269147129793e-06, "loss": 0.3071, "step": 12975 }, { "epoch": 0.13, "grad_norm": 19.437297821044922, "learning_rate": 4.985145024675544e-06, "loss": 0.5101, "step": 13000 }, { "epoch": 0.13, "grad_norm": 11.589970588684082, "learning_rate": 4.985020902221296e-06, "loss": 0.2588, "step": 13025 }, { "epoch": 0.13, "grad_norm": 18.21245574951172, "learning_rate": 4.984896779767047e-06, "loss": 0.5163, "step": 13050 }, { "epoch": 0.13, "grad_norm": 9.781768798828125, "learning_rate": 4.9847726573127995e-06, "loss": 0.2164, "step": 13075 }, { "epoch": 0.13, "grad_norm": 30.204601287841797, "learning_rate": 4.984648534858551e-06, "loss": 0.5606, "step": 13100 }, { "epoch": 0.13, "grad_norm": 10.192940711975098, "learning_rate": 4.984524412404302e-06, "loss": 0.2842, "step": 13125 }, { "epoch": 0.13, "grad_norm": 21.646982192993164, "learning_rate": 4.984400289950054e-06, "loss": 0.4577, "step": 13150 }, { "epoch": 0.13, "grad_norm": 22.002431869506836, "learning_rate": 4.984276167495805e-06, "loss": 0.2717, "step": 13175 }, { "epoch": 0.13, "grad_norm": 24.286048889160156, "learning_rate": 4.984152045041556e-06, "loss": 0.4823, "step": 13200 }, { "epoch": 0.13, "grad_norm": 12.708107948303223, "learning_rate": 4.984027922587308e-06, "loss": 0.28, "step": 13225 }, { "epoch": 0.13, "grad_norm": 24.947509765625, "learning_rate": 4.98390380013306e-06, "loss": 0.448, "step": 13250 }, { "epoch": 0.13, "grad_norm": 12.28394603729248, "learning_rate": 4.983779677678811e-06, "loss": 0.2611, "step": 13275 }, { "epoch": 0.13, "grad_norm": 21.61871910095215, "learning_rate": 4.983655555224563e-06, "loss": 0.5637, "step": 13300 }, { "epoch": 0.13, "grad_norm": 12.270915985107422, "learning_rate": 4.983531432770314e-06, "loss": 0.2957, "step": 13325 }, { "epoch": 0.13, "grad_norm": 11.274682998657227, "learning_rate": 4.983407310316065e-06, "loss": 0.4706, "step": 13350 }, { "epoch": 0.13, "grad_norm": 17.540630340576172, "learning_rate": 4.983283187861817e-06, "loss": 0.2904, "step": 13375 }, { "epoch": 0.13, "grad_norm": 17.78231430053711, "learning_rate": 4.983159065407569e-06, "loss": 0.5424, "step": 13400 }, { "epoch": 0.13, "grad_norm": 13.334033966064453, "learning_rate": 4.983034942953321e-06, "loss": 0.2913, "step": 13425 }, { "epoch": 0.13, "grad_norm": 20.807716369628906, "learning_rate": 4.982910820499072e-06, "loss": 0.481, "step": 13450 }, { "epoch": 0.13, "grad_norm": 15.895357131958008, "learning_rate": 4.982786698044824e-06, "loss": 0.2417, "step": 13475 }, { "epoch": 0.13, "grad_norm": 31.618642807006836, "learning_rate": 4.982662575590575e-06, "loss": 0.5579, "step": 13500 }, { "epoch": 0.13, "grad_norm": 8.626304626464844, "learning_rate": 4.982538453136326e-06, "loss": 0.2723, "step": 13525 }, { "epoch": 0.13, "grad_norm": 17.925159454345703, "learning_rate": 4.982414330682078e-06, "loss": 0.4956, "step": 13550 }, { "epoch": 0.13, "grad_norm": 11.024227142333984, "learning_rate": 4.98229020822783e-06, "loss": 0.2446, "step": 13575 }, { "epoch": 0.13, "grad_norm": 19.85013198852539, "learning_rate": 4.982166085773581e-06, "loss": 0.5389, "step": 13600 }, { "epoch": 0.13, "grad_norm": 7.441731929779053, "learning_rate": 4.982041963319333e-06, "loss": 0.2648, "step": 13625 }, { "epoch": 0.13, "grad_norm": 26.371042251586914, "learning_rate": 4.981917840865084e-06, "loss": 0.6291, "step": 13650 }, { "epoch": 0.13, "grad_norm": 19.1645450592041, "learning_rate": 4.981793718410835e-06, "loss": 0.246, "step": 13675 }, { "epoch": 0.13, "grad_norm": 23.33361053466797, "learning_rate": 4.981669595956587e-06, "loss": 0.5515, "step": 13700 }, { "epoch": 0.13, "grad_norm": 10.347960472106934, "learning_rate": 4.9815454735023386e-06, "loss": 0.2693, "step": 13725 }, { "epoch": 0.14, "grad_norm": 17.796226501464844, "learning_rate": 4.981421351048091e-06, "loss": 0.4806, "step": 13750 }, { "epoch": 0.14, "grad_norm": 13.04281997680664, "learning_rate": 4.981297228593842e-06, "loss": 0.2705, "step": 13775 }, { "epoch": 0.14, "grad_norm": 27.559202194213867, "learning_rate": 4.981173106139594e-06, "loss": 0.5813, "step": 13800 }, { "epoch": 0.14, "grad_norm": 7.85401725769043, "learning_rate": 4.981048983685345e-06, "loss": 0.2983, "step": 13825 }, { "epoch": 0.14, "grad_norm": 20.356754302978516, "learning_rate": 4.980924861231097e-06, "loss": 0.5561, "step": 13850 }, { "epoch": 0.14, "grad_norm": 11.374999046325684, "learning_rate": 4.980800738776848e-06, "loss": 0.2473, "step": 13875 }, { "epoch": 0.14, "grad_norm": 18.950660705566406, "learning_rate": 4.9806766163226e-06, "loss": 0.453, "step": 13900 }, { "epoch": 0.14, "grad_norm": 7.7770233154296875, "learning_rate": 4.980552493868352e-06, "loss": 0.2485, "step": 13925 }, { "epoch": 0.14, "grad_norm": 19.08078956604004, "learning_rate": 4.980428371414103e-06, "loss": 0.4818, "step": 13950 }, { "epoch": 0.14, "grad_norm": 14.961115837097168, "learning_rate": 4.980304248959854e-06, "loss": 0.2536, "step": 13975 }, { "epoch": 0.14, "grad_norm": 23.517932891845703, "learning_rate": 4.980180126505606e-06, "loss": 0.5066, "step": 14000 }, { "epoch": 0.14, "grad_norm": 11.202055931091309, "learning_rate": 4.980056004051357e-06, "loss": 0.3154, "step": 14025 }, { "epoch": 0.14, "grad_norm": 21.779247283935547, "learning_rate": 4.9799318815971086e-06, "loss": 0.5258, "step": 14050 }, { "epoch": 0.14, "grad_norm": 13.34636116027832, "learning_rate": 4.979807759142861e-06, "loss": 0.2362, "step": 14075 }, { "epoch": 0.14, "grad_norm": 27.772789001464844, "learning_rate": 4.979683636688612e-06, "loss": 0.446, "step": 14100 }, { "epoch": 0.14, "grad_norm": 9.692258834838867, "learning_rate": 4.979559514234363e-06, "loss": 0.2747, "step": 14125 }, { "epoch": 0.14, "grad_norm": 25.284006118774414, "learning_rate": 4.979435391780115e-06, "loss": 0.4436, "step": 14150 }, { "epoch": 0.14, "grad_norm": 6.150668144226074, "learning_rate": 4.979311269325866e-06, "loss": 0.284, "step": 14175 }, { "epoch": 0.14, "grad_norm": 27.137489318847656, "learning_rate": 4.979187146871618e-06, "loss": 0.5524, "step": 14200 }, { "epoch": 0.14, "grad_norm": 7.087206840515137, "learning_rate": 4.97906302441737e-06, "loss": 0.26, "step": 14225 }, { "epoch": 0.14, "grad_norm": 23.612459182739258, "learning_rate": 4.978938901963122e-06, "loss": 0.5385, "step": 14250 }, { "epoch": 0.14, "grad_norm": 7.186532974243164, "learning_rate": 4.978814779508873e-06, "loss": 0.2848, "step": 14275 }, { "epoch": 0.14, "grad_norm": 17.90462303161621, "learning_rate": 4.978690657054624e-06, "loss": 0.4194, "step": 14300 }, { "epoch": 0.14, "grad_norm": 21.560543060302734, "learning_rate": 4.978566534600376e-06, "loss": 0.2672, "step": 14325 }, { "epoch": 0.14, "grad_norm": 16.415712356567383, "learning_rate": 4.978442412146127e-06, "loss": 0.5434, "step": 14350 }, { "epoch": 0.14, "grad_norm": 14.378253936767578, "learning_rate": 4.9783182896918785e-06, "loss": 0.2393, "step": 14375 }, { "epoch": 0.14, "grad_norm": 21.49327278137207, "learning_rate": 4.978194167237631e-06, "loss": 0.4547, "step": 14400 }, { "epoch": 0.14, "grad_norm": 15.48121166229248, "learning_rate": 4.978070044783382e-06, "loss": 0.2927, "step": 14425 }, { "epoch": 0.14, "grad_norm": 19.058778762817383, "learning_rate": 4.977945922329133e-06, "loss": 0.5367, "step": 14450 }, { "epoch": 0.14, "grad_norm": 10.133377075195312, "learning_rate": 4.977821799874885e-06, "loss": 0.2394, "step": 14475 }, { "epoch": 0.14, "grad_norm": 17.430953979492188, "learning_rate": 4.977697677420636e-06, "loss": 0.4202, "step": 14500 }, { "epoch": 0.14, "grad_norm": 5.638731002807617, "learning_rate": 4.9775735549663875e-06, "loss": 0.2344, "step": 14525 }, { "epoch": 0.14, "grad_norm": 30.51548957824707, "learning_rate": 4.9774494325121396e-06, "loss": 0.5765, "step": 14550 }, { "epoch": 0.14, "grad_norm": 3.6119155883789062, "learning_rate": 4.977325310057891e-06, "loss": 0.2325, "step": 14575 }, { "epoch": 0.14, "grad_norm": 26.820199966430664, "learning_rate": 4.977201187603643e-06, "loss": 0.4875, "step": 14600 }, { "epoch": 0.14, "grad_norm": 5.338459491729736, "learning_rate": 4.977077065149394e-06, "loss": 0.281, "step": 14625 }, { "epoch": 0.14, "grad_norm": 17.131752014160156, "learning_rate": 4.976952942695146e-06, "loss": 0.5171, "step": 14650 }, { "epoch": 0.14, "grad_norm": 11.638932228088379, "learning_rate": 4.976828820240897e-06, "loss": 0.2903, "step": 14675 }, { "epoch": 0.14, "grad_norm": 14.166421890258789, "learning_rate": 4.976704697786649e-06, "loss": 0.4519, "step": 14700 }, { "epoch": 0.14, "grad_norm": 17.425994873046875, "learning_rate": 4.976580575332401e-06, "loss": 0.2429, "step": 14725 }, { "epoch": 0.15, "grad_norm": 18.350223541259766, "learning_rate": 4.976456452878152e-06, "loss": 0.4962, "step": 14750 }, { "epoch": 0.15, "grad_norm": 11.77961254119873, "learning_rate": 4.976332330423904e-06, "loss": 0.2675, "step": 14775 }, { "epoch": 0.15, "grad_norm": 16.29339027404785, "learning_rate": 4.976208207969655e-06, "loss": 0.4747, "step": 14800 }, { "epoch": 0.15, "grad_norm": 17.989728927612305, "learning_rate": 4.976084085515406e-06, "loss": 0.269, "step": 14825 }, { "epoch": 0.15, "grad_norm": 19.6663818359375, "learning_rate": 4.975959963061158e-06, "loss": 0.502, "step": 14850 }, { "epoch": 0.15, "grad_norm": 8.90152645111084, "learning_rate": 4.9758358406069095e-06, "loss": 0.2107, "step": 14875 }, { "epoch": 0.15, "grad_norm": 16.244401931762695, "learning_rate": 4.975711718152661e-06, "loss": 0.5199, "step": 14900 }, { "epoch": 0.15, "grad_norm": 9.38062572479248, "learning_rate": 4.975587595698413e-06, "loss": 0.2409, "step": 14925 }, { "epoch": 0.15, "grad_norm": 14.46139144897461, "learning_rate": 4.975463473244164e-06, "loss": 0.5549, "step": 14950 }, { "epoch": 0.15, "grad_norm": 20.936086654663086, "learning_rate": 4.975339350789915e-06, "loss": 0.3264, "step": 14975 }, { "epoch": 0.15, "grad_norm": 22.464397430419922, "learning_rate": 4.975215228335667e-06, "loss": 0.5057, "step": 15000 }, { "epoch": 0.15, "grad_norm": 12.063855171203613, "learning_rate": 4.9750911058814185e-06, "loss": 0.3005, "step": 15025 }, { "epoch": 0.15, "grad_norm": 21.589576721191406, "learning_rate": 4.9749669834271706e-06, "loss": 0.5677, "step": 15050 }, { "epoch": 0.15, "grad_norm": 18.305194854736328, "learning_rate": 4.974842860972922e-06, "loss": 0.2825, "step": 15075 }, { "epoch": 0.15, "grad_norm": 14.700572967529297, "learning_rate": 4.974718738518674e-06, "loss": 0.4603, "step": 15100 }, { "epoch": 0.15, "grad_norm": 10.48423957824707, "learning_rate": 4.974594616064425e-06, "loss": 0.3119, "step": 15125 }, { "epoch": 0.15, "grad_norm": 20.655241012573242, "learning_rate": 4.974470493610176e-06, "loss": 0.4902, "step": 15150 }, { "epoch": 0.15, "grad_norm": 11.36275863647461, "learning_rate": 4.974346371155928e-06, "loss": 0.2514, "step": 15175 }, { "epoch": 0.15, "grad_norm": 27.180463790893555, "learning_rate": 4.9742222487016795e-06, "loss": 0.4635, "step": 15200 }, { "epoch": 0.15, "grad_norm": 12.618508338928223, "learning_rate": 4.974098126247431e-06, "loss": 0.24, "step": 15225 }, { "epoch": 0.15, "grad_norm": 19.663719177246094, "learning_rate": 4.973974003793183e-06, "loss": 0.5363, "step": 15250 }, { "epoch": 0.15, "grad_norm": 11.07215404510498, "learning_rate": 4.973849881338934e-06, "loss": 0.3128, "step": 15275 }, { "epoch": 0.15, "grad_norm": 13.9849853515625, "learning_rate": 4.973725758884685e-06, "loss": 0.4762, "step": 15300 }, { "epoch": 0.15, "grad_norm": 8.55556869506836, "learning_rate": 4.973601636430437e-06, "loss": 0.2887, "step": 15325 }, { "epoch": 0.15, "grad_norm": 22.939956665039062, "learning_rate": 4.9734775139761885e-06, "loss": 0.5367, "step": 15350 }, { "epoch": 0.15, "grad_norm": 7.116028308868408, "learning_rate": 4.97335339152194e-06, "loss": 0.243, "step": 15375 }, { "epoch": 0.15, "grad_norm": 23.05391502380371, "learning_rate": 4.973229269067692e-06, "loss": 0.4455, "step": 15400 }, { "epoch": 0.15, "grad_norm": 9.976334571838379, "learning_rate": 4.973105146613443e-06, "loss": 0.2302, "step": 15425 }, { "epoch": 0.15, "grad_norm": 35.89129638671875, "learning_rate": 4.972981024159195e-06, "loss": 0.4328, "step": 15450 }, { "epoch": 0.15, "grad_norm": 11.260632514953613, "learning_rate": 4.972856901704946e-06, "loss": 0.2341, "step": 15475 }, { "epoch": 0.15, "grad_norm": 17.072036743164062, "learning_rate": 4.972732779250698e-06, "loss": 0.5717, "step": 15500 }, { "epoch": 0.15, "grad_norm": 10.643904685974121, "learning_rate": 4.9726086567964495e-06, "loss": 0.2232, "step": 15525 }, { "epoch": 0.15, "grad_norm": 28.469635009765625, "learning_rate": 4.9724845343422016e-06, "loss": 0.5401, "step": 15550 }, { "epoch": 0.15, "grad_norm": 14.579791069030762, "learning_rate": 4.972360411887953e-06, "loss": 0.2597, "step": 15575 }, { "epoch": 0.15, "grad_norm": 23.388647079467773, "learning_rate": 4.972236289433704e-06, "loss": 0.4084, "step": 15600 }, { "epoch": 0.15, "grad_norm": 12.857820510864258, "learning_rate": 4.972112166979456e-06, "loss": 0.2395, "step": 15625 }, { "epoch": 0.15, "grad_norm": 20.087690353393555, "learning_rate": 4.971988044525207e-06, "loss": 0.4569, "step": 15650 }, { "epoch": 0.15, "grad_norm": 13.204368591308594, "learning_rate": 4.9718639220709585e-06, "loss": 0.2473, "step": 15675 }, { "epoch": 0.15, "grad_norm": 23.355751037597656, "learning_rate": 4.9717397996167105e-06, "loss": 0.4627, "step": 15700 }, { "epoch": 0.15, "grad_norm": 9.791829109191895, "learning_rate": 4.971615677162462e-06, "loss": 0.2826, "step": 15725 }, { "epoch": 0.15, "grad_norm": 20.32525062561035, "learning_rate": 4.971491554708213e-06, "loss": 0.4669, "step": 15750 }, { "epoch": 0.16, "grad_norm": 8.821320533752441, "learning_rate": 4.971367432253965e-06, "loss": 0.2439, "step": 15775 }, { "epoch": 0.16, "grad_norm": 19.94088363647461, "learning_rate": 4.971243309799716e-06, "loss": 0.4316, "step": 15800 }, { "epoch": 0.16, "grad_norm": 13.622116088867188, "learning_rate": 4.9711191873454674e-06, "loss": 0.2502, "step": 15825 }, { "epoch": 0.16, "grad_norm": 32.333702087402344, "learning_rate": 4.9709950648912195e-06, "loss": 0.512, "step": 15850 }, { "epoch": 0.16, "grad_norm": 11.651398658752441, "learning_rate": 4.970870942436971e-06, "loss": 0.2535, "step": 15875 }, { "epoch": 0.16, "grad_norm": 20.219032287597656, "learning_rate": 4.970746819982723e-06, "loss": 0.4621, "step": 15900 }, { "epoch": 0.16, "grad_norm": 7.731549263000488, "learning_rate": 4.970622697528474e-06, "loss": 0.302, "step": 15925 }, { "epoch": 0.16, "grad_norm": 19.88355827331543, "learning_rate": 4.970498575074226e-06, "loss": 0.5391, "step": 15950 }, { "epoch": 0.16, "grad_norm": 11.04710865020752, "learning_rate": 4.970374452619977e-06, "loss": 0.2542, "step": 15975 }, { "epoch": 0.16, "grad_norm": 26.96494483947754, "learning_rate": 4.9702503301657285e-06, "loss": 0.4738, "step": 16000 }, { "epoch": 0.16, "grad_norm": 6.251286029815674, "learning_rate": 4.9701262077114805e-06, "loss": 0.2691, "step": 16025 }, { "epoch": 0.16, "grad_norm": 22.478897094726562, "learning_rate": 4.970002085257232e-06, "loss": 0.4848, "step": 16050 }, { "epoch": 0.16, "grad_norm": 14.696756362915039, "learning_rate": 4.969877962802983e-06, "loss": 0.2471, "step": 16075 }, { "epoch": 0.16, "grad_norm": 23.886762619018555, "learning_rate": 4.969753840348735e-06, "loss": 0.4978, "step": 16100 }, { "epoch": 0.16, "grad_norm": 11.509363174438477, "learning_rate": 4.969629717894486e-06, "loss": 0.2891, "step": 16125 }, { "epoch": 0.16, "grad_norm": 19.187963485717773, "learning_rate": 4.9695055954402374e-06, "loss": 0.5049, "step": 16150 }, { "epoch": 0.16, "grad_norm": 17.53785514831543, "learning_rate": 4.9693814729859895e-06, "loss": 0.2538, "step": 16175 }, { "epoch": 0.16, "grad_norm": 23.84693717956543, "learning_rate": 4.969257350531741e-06, "loss": 0.4836, "step": 16200 }, { "epoch": 0.16, "grad_norm": 9.805728912353516, "learning_rate": 4.969133228077492e-06, "loss": 0.2971, "step": 16225 }, { "epoch": 0.16, "grad_norm": 22.895946502685547, "learning_rate": 4.969009105623244e-06, "loss": 0.4756, "step": 16250 }, { "epoch": 0.16, "grad_norm": 9.801761627197266, "learning_rate": 4.968884983168995e-06, "loss": 0.2208, "step": 16275 }, { "epoch": 0.16, "grad_norm": 19.623172760009766, "learning_rate": 4.968760860714747e-06, "loss": 0.5081, "step": 16300 }, { "epoch": 0.16, "grad_norm": 12.879268646240234, "learning_rate": 4.9686367382604984e-06, "loss": 0.3037, "step": 16325 }, { "epoch": 0.16, "grad_norm": 14.988588333129883, "learning_rate": 4.9685126158062505e-06, "loss": 0.5019, "step": 16350 }, { "epoch": 0.16, "grad_norm": 7.808897495269775, "learning_rate": 4.968388493352002e-06, "loss": 0.2437, "step": 16375 }, { "epoch": 0.16, "grad_norm": 21.12799072265625, "learning_rate": 4.968264370897754e-06, "loss": 0.4582, "step": 16400 }, { "epoch": 0.16, "grad_norm": 12.282492637634277, "learning_rate": 4.968140248443505e-06, "loss": 0.2936, "step": 16425 }, { "epoch": 0.16, "grad_norm": Infinity, "learning_rate": 4.968021090887427e-06, "loss": 0.5027, "step": 16450 }, { "epoch": 0.16, "grad_norm": 10.037689208984375, "learning_rate": 4.967896968433178e-06, "loss": 0.2785, "step": 16475 }, { "epoch": 0.16, "grad_norm": 20.163658142089844, "learning_rate": 4.967772845978929e-06, "loss": 0.4768, "step": 16500 }, { "epoch": 0.16, "grad_norm": 11.835463523864746, "learning_rate": 4.9676487235246805e-06, "loss": 0.2344, "step": 16525 }, { "epoch": 0.16, "grad_norm": 23.98141098022461, "learning_rate": 4.9675246010704326e-06, "loss": 0.4923, "step": 16550 }, { "epoch": 0.16, "grad_norm": 15.236113548278809, "learning_rate": 4.967400478616184e-06, "loss": 0.3177, "step": 16575 }, { "epoch": 0.16, "grad_norm": Infinity, "learning_rate": 4.967281321060106e-06, "loss": 0.5228, "step": 16600 }, { "epoch": 0.16, "grad_norm": 12.203200340270996, "learning_rate": 4.967157198605857e-06, "loss": 0.2442, "step": 16625 }, { "epoch": 0.16, "grad_norm": 26.26402473449707, "learning_rate": 4.967033076151608e-06, "loss": 0.4832, "step": 16650 }, { "epoch": 0.16, "grad_norm": 5.828324317932129, "learning_rate": 4.96690895369736e-06, "loss": 0.2561, "step": 16675 }, { "epoch": 0.16, "grad_norm": 18.208147048950195, "learning_rate": 4.966784831243111e-06, "loss": 0.4855, "step": 16700 }, { "epoch": 0.16, "grad_norm": 20.633235931396484, "learning_rate": 4.966660708788863e-06, "loss": 0.2663, "step": 16725 }, { "epoch": 0.16, "grad_norm": 31.885820388793945, "learning_rate": 4.966536586334615e-06, "loss": 0.481, "step": 16750 }, { "epoch": 0.16, "grad_norm": 10.812783241271973, "learning_rate": 4.966412463880367e-06, "loss": 0.2324, "step": 16775 }, { "epoch": 0.17, "grad_norm": 23.603435516357422, "learning_rate": 4.966288341426118e-06, "loss": 0.4985, "step": 16800 }, { "epoch": 0.17, "grad_norm": 10.6779203414917, "learning_rate": 4.96616421897187e-06, "loss": 0.2843, "step": 16825 }, { "epoch": 0.17, "grad_norm": 15.835171699523926, "learning_rate": 4.966040096517621e-06, "loss": 0.5514, "step": 16850 }, { "epoch": 0.17, "grad_norm": 9.793071746826172, "learning_rate": 4.965915974063372e-06, "loss": 0.2696, "step": 16875 }, { "epoch": 0.17, "grad_norm": 18.969919204711914, "learning_rate": 4.965791851609124e-06, "loss": 0.5197, "step": 16900 }, { "epoch": 0.17, "grad_norm": 19.53415298461914, "learning_rate": 4.965667729154876e-06, "loss": 0.2713, "step": 16925 }, { "epoch": 0.17, "grad_norm": 18.550703048706055, "learning_rate": 4.965543606700627e-06, "loss": 0.4739, "step": 16950 }, { "epoch": 0.17, "grad_norm": 14.718280792236328, "learning_rate": 4.965419484246379e-06, "loss": 0.272, "step": 16975 }, { "epoch": 0.17, "grad_norm": 21.451736450195312, "learning_rate": 4.96529536179213e-06, "loss": 0.4434, "step": 17000 }, { "epoch": 0.17, "grad_norm": 8.39813232421875, "learning_rate": 4.965171239337881e-06, "loss": 0.2707, "step": 17025 }, { "epoch": 0.17, "grad_norm": 22.227256774902344, "learning_rate": 4.965047116883633e-06, "loss": 0.5078, "step": 17050 }, { "epoch": 0.17, "grad_norm": 12.235150337219238, "learning_rate": 4.964922994429385e-06, "loss": 0.2564, "step": 17075 }, { "epoch": 0.17, "grad_norm": 20.009714126586914, "learning_rate": 4.964798871975136e-06, "loss": 0.4985, "step": 17100 }, { "epoch": 0.17, "grad_norm": 12.395207405090332, "learning_rate": 4.964674749520888e-06, "loss": 0.286, "step": 17125 }, { "epoch": 0.17, "grad_norm": 20.421070098876953, "learning_rate": 4.964550627066639e-06, "loss": 0.5653, "step": 17150 }, { "epoch": 0.17, "grad_norm": 10.982576370239258, "learning_rate": 4.964426504612391e-06, "loss": 0.2649, "step": 17175 }, { "epoch": 0.17, "grad_norm": 13.663883209228516, "learning_rate": 4.964302382158142e-06, "loss": 0.4879, "step": 17200 }, { "epoch": 0.17, "grad_norm": 15.14064884185791, "learning_rate": 4.964178259703894e-06, "loss": 0.2866, "step": 17225 }, { "epoch": 0.17, "grad_norm": 17.585609436035156, "learning_rate": 4.964054137249646e-06, "loss": 0.5298, "step": 17250 }, { "epoch": 0.17, "grad_norm": 17.74048614501953, "learning_rate": 4.963930014795397e-06, "loss": 0.2906, "step": 17275 }, { "epoch": 0.17, "grad_norm": 23.71223258972168, "learning_rate": 4.963805892341149e-06, "loss": 0.6191, "step": 17300 }, { "epoch": 0.17, "grad_norm": 12.305322647094727, "learning_rate": 4.9636817698869e-06, "loss": 0.2723, "step": 17325 }, { "epoch": 0.17, "grad_norm": 22.370222091674805, "learning_rate": 4.963557647432651e-06, "loss": 0.5592, "step": 17350 }, { "epoch": 0.17, "grad_norm": 15.362010955810547, "learning_rate": 4.963433524978403e-06, "loss": 0.2518, "step": 17375 }, { "epoch": 0.17, "grad_norm": 23.26699447631836, "learning_rate": 4.9633094025241546e-06, "loss": 0.4946, "step": 17400 }, { "epoch": 0.17, "grad_norm": 14.207911491394043, "learning_rate": 4.963185280069906e-06, "loss": 0.2676, "step": 17425 }, { "epoch": 0.17, "grad_norm": 20.793851852416992, "learning_rate": 4.963061157615658e-06, "loss": 0.4157, "step": 17450 }, { "epoch": 0.17, "grad_norm": 12.330697059631348, "learning_rate": 4.962937035161409e-06, "loss": 0.2761, "step": 17475 }, { "epoch": 0.17, "grad_norm": 18.283008575439453, "learning_rate": 4.96281291270716e-06, "loss": 0.5131, "step": 17500 }, { "epoch": 0.17, "grad_norm": 12.976131439208984, "learning_rate": 4.962688790252912e-06, "loss": 0.2363, "step": 17525 }, { "epoch": 0.17, "grad_norm": 25.542652130126953, "learning_rate": 4.9625646677986635e-06, "loss": 0.5819, "step": 17550 }, { "epoch": 0.17, "grad_norm": 14.807012557983398, "learning_rate": 4.962440545344416e-06, "loss": 0.2424, "step": 17575 }, { "epoch": 0.17, "grad_norm": 21.885854721069336, "learning_rate": 4.962316422890167e-06, "loss": 0.4665, "step": 17600 }, { "epoch": 0.17, "grad_norm": 21.303295135498047, "learning_rate": 4.962192300435919e-06, "loss": 0.201, "step": 17625 }, { "epoch": 0.17, "grad_norm": 19.957319259643555, "learning_rate": 4.96206817798167e-06, "loss": 0.4607, "step": 17650 }, { "epoch": 0.17, "grad_norm": 6.708022594451904, "learning_rate": 4.961944055527422e-06, "loss": 0.2181, "step": 17675 }, { "epoch": 0.17, "grad_norm": 18.394012451171875, "learning_rate": 4.961819933073173e-06, "loss": 0.4658, "step": 17700 }, { "epoch": 0.17, "grad_norm": 12.625444412231445, "learning_rate": 4.9616958106189246e-06, "loss": 0.2402, "step": 17725 }, { "epoch": 0.17, "grad_norm": 26.109956741333008, "learning_rate": 4.961571688164677e-06, "loss": 0.486, "step": 17750 }, { "epoch": 0.17, "grad_norm": 20.29021453857422, "learning_rate": 4.961447565710428e-06, "loss": 0.2796, "step": 17775 }, { "epoch": 0.18, "grad_norm": 20.687271118164062, "learning_rate": 4.961323443256179e-06, "loss": 0.4551, "step": 17800 }, { "epoch": 0.18, "grad_norm": 16.492097854614258, "learning_rate": 4.961199320801931e-06, "loss": 0.2849, "step": 17825 }, { "epoch": 0.18, "grad_norm": 19.456451416015625, "learning_rate": 4.961075198347682e-06, "loss": 0.5737, "step": 17850 }, { "epoch": 0.18, "grad_norm": 11.405406951904297, "learning_rate": 4.9609510758934335e-06, "loss": 0.2815, "step": 17875 }, { "epoch": 0.18, "grad_norm": 20.56942367553711, "learning_rate": 4.9608269534391856e-06, "loss": 0.4829, "step": 17900 }, { "epoch": 0.18, "grad_norm": 8.106593132019043, "learning_rate": 4.960702830984937e-06, "loss": 0.2717, "step": 17925 }, { "epoch": 0.18, "grad_norm": 26.863611221313477, "learning_rate": 4.960578708530688e-06, "loss": 0.5287, "step": 17950 }, { "epoch": 0.18, "grad_norm": 11.14274787902832, "learning_rate": 4.96045458607644e-06, "loss": 0.2327, "step": 17975 }, { "epoch": 0.18, "grad_norm": 23.13273811340332, "learning_rate": 4.960330463622191e-06, "loss": 0.4658, "step": 18000 }, { "epoch": 0.18, "grad_norm": 4.759295463562012, "learning_rate": 4.960206341167943e-06, "loss": 0.2467, "step": 18025 }, { "epoch": 0.18, "grad_norm": 24.984586715698242, "learning_rate": 4.9600822187136945e-06, "loss": 0.4833, "step": 18050 }, { "epoch": 0.18, "grad_norm": 13.765995025634766, "learning_rate": 4.959958096259447e-06, "loss": 0.2419, "step": 18075 }, { "epoch": 0.18, "grad_norm": 19.945146560668945, "learning_rate": 4.959833973805198e-06, "loss": 0.5091, "step": 18100 }, { "epoch": 0.18, "grad_norm": 15.573372840881348, "learning_rate": 4.959709851350949e-06, "loss": 0.2853, "step": 18125 }, { "epoch": 0.18, "grad_norm": 20.575883865356445, "learning_rate": 4.959585728896701e-06, "loss": 0.5002, "step": 18150 }, { "epoch": 0.18, "grad_norm": 11.19843578338623, "learning_rate": 4.959461606442452e-06, "loss": 0.2308, "step": 18175 }, { "epoch": 0.18, "grad_norm": 18.31629180908203, "learning_rate": 4.9593374839882035e-06, "loss": 0.501, "step": 18200 }, { "epoch": 0.18, "grad_norm": 9.595577239990234, "learning_rate": 4.9592133615339556e-06, "loss": 0.2799, "step": 18225 }, { "epoch": 0.18, "grad_norm": 24.388702392578125, "learning_rate": 4.959089239079707e-06, "loss": 0.5132, "step": 18250 }, { "epoch": 0.18, "grad_norm": 10.995932579040527, "learning_rate": 4.958965116625458e-06, "loss": 0.2698, "step": 18275 }, { "epoch": 0.18, "grad_norm": 17.501413345336914, "learning_rate": 4.95884099417121e-06, "loss": 0.3914, "step": 18300 }, { "epoch": 0.18, "grad_norm": 6.233504772186279, "learning_rate": 4.958716871716961e-06, "loss": 0.2472, "step": 18325 }, { "epoch": 0.18, "grad_norm": 19.783485412597656, "learning_rate": 4.9585927492627125e-06, "loss": 0.4534, "step": 18350 }, { "epoch": 0.18, "grad_norm": 13.707152366638184, "learning_rate": 4.9584686268084645e-06, "loss": 0.2731, "step": 18375 }, { "epoch": 0.18, "grad_norm": 16.168397903442383, "learning_rate": 4.958344504354216e-06, "loss": 0.5106, "step": 18400 }, { "epoch": 0.18, "grad_norm": 6.2424116134643555, "learning_rate": 4.958220381899968e-06, "loss": 0.2471, "step": 18425 }, { "epoch": 0.18, "grad_norm": 19.727684020996094, "learning_rate": 4.958096259445719e-06, "loss": 0.5157, "step": 18450 }, { "epoch": 0.18, "grad_norm": 13.32768726348877, "learning_rate": 4.957972136991471e-06, "loss": 0.3, "step": 18475 }, { "epoch": 0.18, "grad_norm": 23.106477737426758, "learning_rate": 4.957848014537222e-06, "loss": 0.5912, "step": 18500 }, { "epoch": 0.18, "grad_norm": 10.746122360229492, "learning_rate": 4.957723892082974e-06, "loss": 0.2125, "step": 18525 }, { "epoch": 0.18, "grad_norm": 12.154765129089355, "learning_rate": 4.9575997696287255e-06, "loss": 0.4023, "step": 18550 }, { "epoch": 0.18, "grad_norm": 16.249340057373047, "learning_rate": 4.957475647174477e-06, "loss": 0.2636, "step": 18575 }, { "epoch": 0.18, "grad_norm": 13.786974906921387, "learning_rate": 4.957351524720229e-06, "loss": 0.4772, "step": 18600 }, { "epoch": 0.18, "grad_norm": 7.3253889083862305, "learning_rate": 4.95722740226598e-06, "loss": 0.2122, "step": 18625 }, { "epoch": 0.18, "grad_norm": 15.017351150512695, "learning_rate": 4.957103279811731e-06, "loss": 0.4823, "step": 18650 }, { "epoch": 0.18, "grad_norm": 12.86546802520752, "learning_rate": 4.956979157357483e-06, "loss": 0.2511, "step": 18675 }, { "epoch": 0.18, "grad_norm": 18.178733825683594, "learning_rate": 4.9568550349032345e-06, "loss": 0.4571, "step": 18700 }, { "epoch": 0.18, "grad_norm": 10.969866752624512, "learning_rate": 4.956730912448986e-06, "loss": 0.2587, "step": 18725 }, { "epoch": 0.18, "grad_norm": 17.490053176879883, "learning_rate": 4.956606789994738e-06, "loss": 0.5471, "step": 18750 }, { "epoch": 0.18, "grad_norm": 14.22276496887207, "learning_rate": 4.956482667540489e-06, "loss": 0.2922, "step": 18775 }, { "epoch": 0.18, "grad_norm": 18.67643165588379, "learning_rate": 4.95635854508624e-06, "loss": 0.4658, "step": 18800 }, { "epoch": 0.19, "grad_norm": 8.630776405334473, "learning_rate": 4.956234422631992e-06, "loss": 0.3331, "step": 18825 }, { "epoch": 0.19, "grad_norm": Infinity, "learning_rate": 4.956115265075913e-06, "loss": 0.4303, "step": 18850 }, { "epoch": 0.19, "grad_norm": 9.818132400512695, "learning_rate": 4.955991142621665e-06, "loss": 0.2051, "step": 18875 }, { "epoch": 0.19, "grad_norm": 24.189741134643555, "learning_rate": 4.9558670201674166e-06, "loss": 0.4819, "step": 18900 }, { "epoch": 0.19, "grad_norm": 14.922125816345215, "learning_rate": 4.955742897713169e-06, "loss": 0.2703, "step": 18925 }, { "epoch": 0.19, "grad_norm": 26.798564910888672, "learning_rate": 4.95561877525892e-06, "loss": 0.4619, "step": 18950 }, { "epoch": 0.19, "grad_norm": 11.300326347351074, "learning_rate": 4.955494652804672e-06, "loss": 0.2764, "step": 18975 }, { "epoch": 0.19, "grad_norm": 24.095901489257812, "learning_rate": 4.955370530350423e-06, "loss": 0.491, "step": 19000 }, { "epoch": 0.19, "grad_norm": 11.655546188354492, "learning_rate": 4.955246407896174e-06, "loss": 0.2738, "step": 19025 }, { "epoch": 0.19, "grad_norm": 21.32692527770996, "learning_rate": 4.955122285441926e-06, "loss": 0.4861, "step": 19050 }, { "epoch": 0.19, "grad_norm": 14.087748527526855, "learning_rate": 4.9549981629876776e-06, "loss": 0.2221, "step": 19075 }, { "epoch": 0.19, "grad_norm": 20.225738525390625, "learning_rate": 4.954874040533429e-06, "loss": 0.5014, "step": 19100 }, { "epoch": 0.19, "grad_norm": 11.343185424804688, "learning_rate": 4.954749918079181e-06, "loss": 0.2507, "step": 19125 }, { "epoch": 0.19, "grad_norm": 17.965002059936523, "learning_rate": 4.954625795624932e-06, "loss": 0.5182, "step": 19150 }, { "epoch": 0.19, "grad_norm": 8.052295684814453, "learning_rate": 4.954501673170683e-06, "loss": 0.2818, "step": 19175 }, { "epoch": 0.19, "grad_norm": 17.4932918548584, "learning_rate": 4.954377550716435e-06, "loss": 0.5546, "step": 19200 }, { "epoch": 0.19, "grad_norm": 11.61941909790039, "learning_rate": 4.9542534282621865e-06, "loss": 0.3303, "step": 19225 }, { "epoch": 0.19, "grad_norm": 9.683981895446777, "learning_rate": 4.954129305807938e-06, "loss": 0.4951, "step": 19250 }, { "epoch": 0.19, "grad_norm": 13.167567253112793, "learning_rate": 4.95400518335369e-06, "loss": 0.2724, "step": 19275 }, { "epoch": 0.19, "grad_norm": 17.204322814941406, "learning_rate": 4.953881060899441e-06, "loss": 0.4504, "step": 19300 }, { "epoch": 0.19, "grad_norm": 18.2950382232666, "learning_rate": 4.953756938445193e-06, "loss": 0.2749, "step": 19325 }, { "epoch": 0.19, "grad_norm": 13.4788236618042, "learning_rate": 4.953632815990944e-06, "loss": 0.5198, "step": 19350 }, { "epoch": 0.19, "grad_norm": 15.595027923583984, "learning_rate": 4.953508693536696e-06, "loss": 0.2648, "step": 19375 }, { "epoch": 0.19, "grad_norm": 16.332239151000977, "learning_rate": 4.9533845710824476e-06, "loss": 0.5255, "step": 19400 }, { "epoch": 0.19, "grad_norm": 13.27827262878418, "learning_rate": 4.953260448628199e-06, "loss": 0.2707, "step": 19425 }, { "epoch": 0.19, "grad_norm": 18.075292587280273, "learning_rate": 4.953136326173951e-06, "loss": 0.5005, "step": 19450 }, { "epoch": 0.19, "grad_norm": 6.38661527633667, "learning_rate": 4.953012203719702e-06, "loss": 0.2367, "step": 19475 }, { "epoch": 0.19, "grad_norm": 25.551753997802734, "learning_rate": 4.952888081265453e-06, "loss": 0.4647, "step": 19500 }, { "epoch": 0.19, "grad_norm": 17.350330352783203, "learning_rate": 4.952763958811205e-06, "loss": 0.2888, "step": 19525 }, { "epoch": 0.19, "grad_norm": 22.866981506347656, "learning_rate": 4.9526398363569565e-06, "loss": 0.4415, "step": 19550 }, { "epoch": 0.19, "grad_norm": 16.289382934570312, "learning_rate": 4.952515713902708e-06, "loss": 0.2952, "step": 19575 }, { "epoch": 0.19, "grad_norm": 18.690582275390625, "learning_rate": 4.95239159144846e-06, "loss": 0.5078, "step": 19600 }, { "epoch": 0.19, "grad_norm": 11.802841186523438, "learning_rate": 4.952267468994211e-06, "loss": 0.2694, "step": 19625 }, { "epoch": 0.19, "grad_norm": 18.225393295288086, "learning_rate": 4.952143346539963e-06, "loss": 0.4187, "step": 19650 }, { "epoch": 0.19, "grad_norm": 11.816027641296387, "learning_rate": 4.952019224085714e-06, "loss": 0.2343, "step": 19675 }, { "epoch": 0.19, "grad_norm": 22.69363784790039, "learning_rate": 4.951895101631466e-06, "loss": 0.5394, "step": 19700 }, { "epoch": 0.19, "grad_norm": 5.6050639152526855, "learning_rate": 4.9517709791772175e-06, "loss": 0.2602, "step": 19725 }, { "epoch": 0.19, "grad_norm": 25.60657501220703, "learning_rate": 4.95164685672297e-06, "loss": 0.4456, "step": 19750 }, { "epoch": 0.19, "grad_norm": 11.37600326538086, "learning_rate": 4.951522734268721e-06, "loss": 0.295, "step": 19775 }, { "epoch": 0.19, "grad_norm": 21.67757225036621, "learning_rate": 4.951398611814472e-06, "loss": 0.4308, "step": 19800 }, { "epoch": 0.19, "grad_norm": 4.504390239715576, "learning_rate": 4.951274489360224e-06, "loss": 0.2605, "step": 19825 }, { "epoch": 0.2, "grad_norm": 16.03318214416504, "learning_rate": 4.951150366905975e-06, "loss": 0.4482, "step": 19850 }, { "epoch": 0.2, "grad_norm": 18.365142822265625, "learning_rate": 4.9510262444517265e-06, "loss": 0.2574, "step": 19875 }, { "epoch": 0.2, "grad_norm": 23.231056213378906, "learning_rate": 4.9509021219974786e-06, "loss": 0.4926, "step": 19900 }, { "epoch": 0.2, "grad_norm": 14.362161636352539, "learning_rate": 4.95077799954323e-06, "loss": 0.2687, "step": 19925 }, { "epoch": 0.2, "grad_norm": 20.381616592407227, "learning_rate": 4.950653877088981e-06, "loss": 0.4464, "step": 19950 }, { "epoch": 0.2, "grad_norm": 6.215094566345215, "learning_rate": 4.950529754634733e-06, "loss": 0.2607, "step": 19975 }, { "epoch": 0.2, "grad_norm": 24.06468963623047, "learning_rate": 4.950405632180484e-06, "loss": 0.4423, "step": 20000 }, { "epoch": 0.2, "eval_loss": 0.47234195470809937, "eval_runtime": 5964.9077, "eval_samples_per_second": 1.587, "eval_steps_per_second": 0.198, "eval_wer": 0.16331359042039528, "step": 20000 }, { "epoch": 0.2, "grad_norm": 15.731663703918457, "learning_rate": 4.9502815097262355e-06, "loss": 0.2601, "step": 20025 }, { "epoch": 0.2, "grad_norm": 19.48676300048828, "learning_rate": 4.9501573872719875e-06, "loss": 0.481, "step": 20050 }, { "epoch": 0.2, "grad_norm": 8.39816951751709, "learning_rate": 4.950033264817739e-06, "loss": 0.2634, "step": 20075 }, { "epoch": 0.2, "grad_norm": 19.63481330871582, "learning_rate": 4.949909142363491e-06, "loss": 0.555, "step": 20100 }, { "epoch": 0.2, "grad_norm": 8.394368171691895, "learning_rate": 4.949785019909242e-06, "loss": 0.2438, "step": 20125 }, { "epoch": 0.2, "grad_norm": 25.349153518676758, "learning_rate": 4.949660897454994e-06, "loss": 0.5033, "step": 20150 }, { "epoch": 0.2, "grad_norm": 11.277151107788086, "learning_rate": 4.949536775000745e-06, "loss": 0.3143, "step": 20175 }, { "epoch": 0.2, "grad_norm": 26.42441177368164, "learning_rate": 4.949412652546497e-06, "loss": 0.4552, "step": 20200 }, { "epoch": 0.2, "grad_norm": 16.53975486755371, "learning_rate": 4.9492885300922485e-06, "loss": 0.3048, "step": 20225 }, { "epoch": 0.2, "grad_norm": 20.427490234375, "learning_rate": 4.949164407638e-06, "loss": 0.5108, "step": 20250 }, { "epoch": 0.2, "grad_norm": 21.96413230895996, "learning_rate": 4.949040285183751e-06, "loss": 0.2628, "step": 20275 }, { "epoch": 0.2, "grad_norm": 28.27332878112793, "learning_rate": 4.948916162729503e-06, "loss": 0.462, "step": 20300 }, { "epoch": 0.2, "grad_norm": 8.79551887512207, "learning_rate": 4.948792040275254e-06, "loss": 0.2244, "step": 20325 }, { "epoch": 0.2, "grad_norm": 21.324583053588867, "learning_rate": 4.9486679178210055e-06, "loss": 0.4136, "step": 20350 }, { "epoch": 0.2, "grad_norm": 8.386858940124512, "learning_rate": 4.9485437953667575e-06, "loss": 0.2961, "step": 20375 }, { "epoch": 0.2, "grad_norm": 17.37444305419922, "learning_rate": 4.948419672912509e-06, "loss": 0.4603, "step": 20400 }, { "epoch": 0.2, "grad_norm": 15.041015625, "learning_rate": 4.94829555045826e-06, "loss": 0.2954, "step": 20425 }, { "epoch": 0.2, "grad_norm": 17.363924026489258, "learning_rate": 4.948171428004012e-06, "loss": 0.4135, "step": 20450 }, { "epoch": 0.2, "grad_norm": 7.9248046875, "learning_rate": 4.948047305549763e-06, "loss": 0.2118, "step": 20475 }, { "epoch": 0.2, "grad_norm": 19.030044555664062, "learning_rate": 4.947923183095515e-06, "loss": 0.4606, "step": 20500 }, { "epoch": 0.2, "grad_norm": 7.098836898803711, "learning_rate": 4.9477990606412665e-06, "loss": 0.2484, "step": 20525 }, { "epoch": 0.2, "grad_norm": 19.469932556152344, "learning_rate": 4.9476749381870185e-06, "loss": 0.5482, "step": 20550 }, { "epoch": 0.2, "grad_norm": 13.216876029968262, "learning_rate": 4.94755081573277e-06, "loss": 0.261, "step": 20575 }, { "epoch": 0.2, "grad_norm": 14.815937042236328, "learning_rate": 4.947426693278522e-06, "loss": 0.5078, "step": 20600 }, { "epoch": 0.2, "grad_norm": 9.48562240600586, "learning_rate": 4.947302570824273e-06, "loss": 0.3136, "step": 20625 }, { "epoch": 0.2, "grad_norm": 30.12717056274414, "learning_rate": 4.947178448370024e-06, "loss": 0.4828, "step": 20650 }, { "epoch": 0.2, "grad_norm": 9.607439994812012, "learning_rate": 4.947054325915776e-06, "loss": 0.2551, "step": 20675 }, { "epoch": 0.2, "grad_norm": 19.303606033325195, "learning_rate": 4.9469302034615275e-06, "loss": 0.4677, "step": 20700 }, { "epoch": 0.2, "grad_norm": 11.301396369934082, "learning_rate": 4.946806081007279e-06, "loss": 0.2181, "step": 20725 }, { "epoch": 0.2, "grad_norm": 13.529568672180176, "learning_rate": 4.946681958553031e-06, "loss": 0.4188, "step": 20750 }, { "epoch": 0.2, "grad_norm": 15.784148216247559, "learning_rate": 4.946557836098782e-06, "loss": 0.27, "step": 20775 }, { "epoch": 0.2, "grad_norm": 18.60282325744629, "learning_rate": 4.946433713644533e-06, "loss": 0.5889, "step": 20800 }, { "epoch": 0.2, "grad_norm": 8.187053680419922, "learning_rate": 4.946309591190285e-06, "loss": 0.1911, "step": 20825 }, { "epoch": 0.21, "grad_norm": 18.444313049316406, "learning_rate": 4.9461854687360365e-06, "loss": 0.4572, "step": 20850 }, { "epoch": 0.21, "grad_norm": 7.1215739250183105, "learning_rate": 4.946061346281788e-06, "loss": 0.2054, "step": 20875 }, { "epoch": 0.21, "grad_norm": 22.464834213256836, "learning_rate": 4.94593722382754e-06, "loss": 0.4615, "step": 20900 }, { "epoch": 0.21, "grad_norm": 8.665238380432129, "learning_rate": 4.945813101373291e-06, "loss": 0.2642, "step": 20925 }, { "epoch": 0.21, "grad_norm": 16.190874099731445, "learning_rate": 4.945688978919043e-06, "loss": 0.3976, "step": 20950 }, { "epoch": 0.21, "grad_norm": 9.849424362182617, "learning_rate": 4.945564856464794e-06, "loss": 0.2418, "step": 20975 }, { "epoch": 0.21, "grad_norm": 10.674076080322266, "learning_rate": 4.945440734010546e-06, "loss": 0.501, "step": 21000 }, { "epoch": 0.21, "grad_norm": 10.964690208435059, "learning_rate": 4.9453166115562975e-06, "loss": 0.2834, "step": 21025 }, { "epoch": 0.21, "grad_norm": 24.71860122680664, "learning_rate": 4.9451924891020495e-06, "loss": 0.5117, "step": 21050 }, { "epoch": 0.21, "grad_norm": 8.784431457519531, "learning_rate": 4.945068366647801e-06, "loss": 0.2008, "step": 21075 }, { "epoch": 0.21, "grad_norm": 20.898019790649414, "learning_rate": 4.944944244193552e-06, "loss": 0.4228, "step": 21100 }, { "epoch": 0.21, "grad_norm": 10.511014938354492, "learning_rate": 4.944820121739303e-06, "loss": 0.2511, "step": 21125 }, { "epoch": 0.21, "grad_norm": 16.674985885620117, "learning_rate": 4.944695999285055e-06, "loss": 0.5097, "step": 21150 }, { "epoch": 0.21, "grad_norm": 9.881532669067383, "learning_rate": 4.9445718768308064e-06, "loss": 0.1984, "step": 21175 }, { "epoch": 0.21, "grad_norm": 15.878728866577148, "learning_rate": 4.944447754376558e-06, "loss": 0.4962, "step": 21200 }, { "epoch": 0.21, "grad_norm": 8.039122581481934, "learning_rate": 4.94432363192231e-06, "loss": 0.2537, "step": 21225 }, { "epoch": 0.21, "grad_norm": 19.75711441040039, "learning_rate": 4.944199509468061e-06, "loss": 0.4504, "step": 21250 }, { "epoch": 0.21, "grad_norm": 13.148359298706055, "learning_rate": 4.944075387013812e-06, "loss": 0.2515, "step": 21275 }, { "epoch": 0.21, "grad_norm": 22.05157470703125, "learning_rate": 4.943951264559564e-06, "loss": 0.5013, "step": 21300 }, { "epoch": 0.21, "grad_norm": 13.6741361618042, "learning_rate": 4.943827142105315e-06, "loss": 0.2404, "step": 21325 }, { "epoch": 0.21, "grad_norm": 21.100099563598633, "learning_rate": 4.9437030196510675e-06, "loss": 0.5149, "step": 21350 }, { "epoch": 0.21, "grad_norm": 10.604293823242188, "learning_rate": 4.943578897196819e-06, "loss": 0.2661, "step": 21375 }, { "epoch": 0.21, "grad_norm": 21.32961654663086, "learning_rate": 4.943454774742571e-06, "loss": 0.5574, "step": 21400 }, { "epoch": 0.21, "grad_norm": 10.900554656982422, "learning_rate": 4.943330652288322e-06, "loss": 0.2696, "step": 21425 }, { "epoch": 0.21, "grad_norm": 16.381832122802734, "learning_rate": 4.943206529834074e-06, "loss": 0.4522, "step": 21450 }, { "epoch": 0.21, "grad_norm": 15.462983131408691, "learning_rate": 4.943082407379825e-06, "loss": 0.2488, "step": 21475 }, { "epoch": 0.21, "grad_norm": 20.562969207763672, "learning_rate": 4.9429582849255764e-06, "loss": 0.4679, "step": 21500 }, { "epoch": 0.21, "grad_norm": 15.823814392089844, "learning_rate": 4.9428341624713285e-06, "loss": 0.2609, "step": 21525 }, { "epoch": 0.21, "grad_norm": 15.772719383239746, "learning_rate": 4.94271004001708e-06, "loss": 0.477, "step": 21550 }, { "epoch": 0.21, "grad_norm": 13.759319305419922, "learning_rate": 4.942585917562831e-06, "loss": 0.2706, "step": 21575 }, { "epoch": 0.21, "grad_norm": 30.117406845092773, "learning_rate": 4.942461795108583e-06, "loss": 0.5536, "step": 21600 }, { "epoch": 0.21, "grad_norm": 13.33657455444336, "learning_rate": 4.942337672654334e-06, "loss": 0.2243, "step": 21625 }, { "epoch": 0.21, "grad_norm": 19.980302810668945, "learning_rate": 4.942213550200085e-06, "loss": 0.3728, "step": 21650 }, { "epoch": 0.21, "grad_norm": 2.8122260570526123, "learning_rate": 4.9420894277458374e-06, "loss": 0.2537, "step": 21675 }, { "epoch": 0.21, "grad_norm": 21.375629425048828, "learning_rate": 4.941965305291589e-06, "loss": 0.4578, "step": 21700 }, { "epoch": 0.21, "grad_norm": 11.897346496582031, "learning_rate": 4.94184118283734e-06, "loss": 0.2716, "step": 21725 }, { "epoch": 0.21, "grad_norm": 21.95948028564453, "learning_rate": 4.941717060383092e-06, "loss": 0.4334, "step": 21750 }, { "epoch": 0.21, "grad_norm": 5.083585739135742, "learning_rate": 4.941592937928843e-06, "loss": 0.2664, "step": 21775 }, { "epoch": 0.21, "grad_norm": 17.475215911865234, "learning_rate": 4.941468815474595e-06, "loss": 0.4412, "step": 21800 }, { "epoch": 0.21, "grad_norm": 16.703554153442383, "learning_rate": 4.941344693020346e-06, "loss": 0.2081, "step": 21825 }, { "epoch": 0.21, "grad_norm": 12.983189582824707, "learning_rate": 4.9412205705660985e-06, "loss": 0.4328, "step": 21850 }, { "epoch": 0.22, "grad_norm": 7.641730308532715, "learning_rate": 4.94109644811185e-06, "loss": 0.2375, "step": 21875 }, { "epoch": 0.22, "grad_norm": 15.126535415649414, "learning_rate": 4.940972325657602e-06, "loss": 0.3941, "step": 21900 }, { "epoch": 0.22, "grad_norm": 6.117859840393066, "learning_rate": 4.940848203203353e-06, "loss": 0.2204, "step": 21925 }, { "epoch": 0.22, "grad_norm": 21.396831512451172, "learning_rate": 4.940724080749104e-06, "loss": 0.4964, "step": 21950 }, { "epoch": 0.22, "grad_norm": 10.764791488647461, "learning_rate": 4.940599958294855e-06, "loss": 0.2888, "step": 21975 }, { "epoch": 0.22, "grad_norm": 24.20563316345215, "learning_rate": 4.9404758358406074e-06, "loss": 0.5371, "step": 22000 }, { "epoch": 0.22, "grad_norm": 12.586981773376465, "learning_rate": 4.940351713386359e-06, "loss": 0.2729, "step": 22025 }, { "epoch": 0.22, "grad_norm": 12.413281440734863, "learning_rate": 4.94022759093211e-06, "loss": 0.5159, "step": 22050 }, { "epoch": 0.22, "grad_norm": 18.75006866455078, "learning_rate": 4.940103468477862e-06, "loss": 0.2724, "step": 22075 }, { "epoch": 0.22, "grad_norm": 15.819109916687012, "learning_rate": 4.939979346023613e-06, "loss": 0.4859, "step": 22100 }, { "epoch": 0.22, "grad_norm": 8.564759254455566, "learning_rate": 4.939855223569364e-06, "loss": 0.2831, "step": 22125 }, { "epoch": 0.22, "grad_norm": 9.857722282409668, "learning_rate": 4.939731101115116e-06, "loss": 0.4609, "step": 22150 }, { "epoch": 0.22, "grad_norm": 14.614459991455078, "learning_rate": 4.939606978660868e-06, "loss": 0.2593, "step": 22175 }, { "epoch": 0.22, "grad_norm": 20.569414138793945, "learning_rate": 4.93948285620662e-06, "loss": 0.4556, "step": 22200 }, { "epoch": 0.22, "grad_norm": 11.630912780761719, "learning_rate": 4.939358733752371e-06, "loss": 0.2833, "step": 22225 }, { "epoch": 0.22, "grad_norm": 22.41660499572754, "learning_rate": 4.939234611298123e-06, "loss": 0.4621, "step": 22250 }, { "epoch": 0.22, "grad_norm": 6.36210298538208, "learning_rate": 4.939110488843874e-06, "loss": 0.2428, "step": 22275 }, { "epoch": 0.22, "grad_norm": 18.587646484375, "learning_rate": 4.938986366389626e-06, "loss": 0.4499, "step": 22300 }, { "epoch": 0.22, "grad_norm": 8.639568328857422, "learning_rate": 4.938862243935377e-06, "loss": 0.2184, "step": 22325 }, { "epoch": 0.22, "grad_norm": 12.01052188873291, "learning_rate": 4.938738121481129e-06, "loss": 0.4801, "step": 22350 }, { "epoch": 0.22, "grad_norm": 12.65496826171875, "learning_rate": 4.938613999026881e-06, "loss": 0.2109, "step": 22375 }, { "epoch": 0.22, "grad_norm": 14.16475772857666, "learning_rate": 4.938489876572632e-06, "loss": 0.472, "step": 22400 }, { "epoch": 0.22, "grad_norm": 10.130462646484375, "learning_rate": 4.938365754118383e-06, "loss": 0.2253, "step": 22425 }, { "epoch": 0.22, "grad_norm": 12.30122184753418, "learning_rate": 4.938241631664135e-06, "loss": 0.4577, "step": 22450 }, { "epoch": 0.22, "grad_norm": 13.341716766357422, "learning_rate": 4.938117509209886e-06, "loss": 0.2084, "step": 22475 }, { "epoch": 0.22, "grad_norm": 20.634302139282227, "learning_rate": 4.937993386755638e-06, "loss": 0.4598, "step": 22500 }, { "epoch": 0.22, "grad_norm": 12.493928909301758, "learning_rate": 4.93786926430139e-06, "loss": 0.2507, "step": 22525 }, { "epoch": 0.22, "grad_norm": 22.07119369506836, "learning_rate": 4.937745141847141e-06, "loss": 0.483, "step": 22550 }, { "epoch": 0.22, "grad_norm": 9.04368782043457, "learning_rate": 4.937621019392893e-06, "loss": 0.2619, "step": 22575 }, { "epoch": 0.22, "grad_norm": 24.2895450592041, "learning_rate": 4.937496896938644e-06, "loss": 0.4967, "step": 22600 }, { "epoch": 0.22, "grad_norm": 10.883474349975586, "learning_rate": 4.937372774484396e-06, "loss": 0.2492, "step": 22625 }, { "epoch": 0.22, "grad_norm": 21.098007202148438, "learning_rate": 4.937248652030147e-06, "loss": 0.4857, "step": 22650 }, { "epoch": 0.22, "grad_norm": 9.271581649780273, "learning_rate": 4.9371245295758995e-06, "loss": 0.2226, "step": 22675 }, { "epoch": 0.22, "grad_norm": 23.118925094604492, "learning_rate": 4.937000407121651e-06, "loss": 0.4734, "step": 22700 }, { "epoch": 0.22, "grad_norm": 7.77742338180542, "learning_rate": 4.936876284667402e-06, "loss": 0.2191, "step": 22725 }, { "epoch": 0.22, "grad_norm": 19.434635162353516, "learning_rate": 4.936752162213154e-06, "loss": 0.4936, "step": 22750 }, { "epoch": 0.22, "grad_norm": 11.337224006652832, "learning_rate": 4.936628039758905e-06, "loss": 0.2422, "step": 22775 }, { "epoch": 0.22, "grad_norm": 21.413400650024414, "learning_rate": 4.936503917304656e-06, "loss": 0.4831, "step": 22800 }, { "epoch": 0.22, "grad_norm": 7.92233419418335, "learning_rate": 4.9363797948504076e-06, "loss": 0.2421, "step": 22825 }, { "epoch": 0.22, "grad_norm": 20.097232818603516, "learning_rate": 4.93625567239616e-06, "loss": 0.41, "step": 22850 }, { "epoch": 0.22, "grad_norm": 9.601716995239258, "learning_rate": 4.936131549941911e-06, "loss": 0.2318, "step": 22875 }, { "epoch": 0.23, "grad_norm": 19.19198226928711, "learning_rate": 4.936012392385833e-06, "loss": 0.4579, "step": 22900 }, { "epoch": 0.23, "grad_norm": 7.5989179611206055, "learning_rate": 4.935888269931584e-06, "loss": 0.2322, "step": 22925 }, { "epoch": 0.23, "grad_norm": 17.827478408813477, "learning_rate": 4.935764147477335e-06, "loss": 0.429, "step": 22950 }, { "epoch": 0.23, "grad_norm": 23.135822296142578, "learning_rate": 4.935640025023087e-06, "loss": 0.2172, "step": 22975 }, { "epoch": 0.23, "grad_norm": 19.98003387451172, "learning_rate": 4.935515902568838e-06, "loss": 0.5284, "step": 23000 }, { "epoch": 0.23, "grad_norm": 11.709184646606445, "learning_rate": 4.9353917801145905e-06, "loss": 0.2429, "step": 23025 }, { "epoch": 0.23, "grad_norm": 21.980653762817383, "learning_rate": 4.935267657660342e-06, "loss": 0.4289, "step": 23050 }, { "epoch": 0.23, "grad_norm": 11.714218139648438, "learning_rate": 4.935143535206094e-06, "loss": 0.2345, "step": 23075 }, { "epoch": 0.23, "grad_norm": 13.11427116394043, "learning_rate": 4.935019412751845e-06, "loss": 0.4114, "step": 23100 }, { "epoch": 0.23, "grad_norm": 6.2316107749938965, "learning_rate": 4.934895290297597e-06, "loss": 0.2362, "step": 23125 }, { "epoch": 0.23, "grad_norm": 24.76997184753418, "learning_rate": 4.934771167843348e-06, "loss": 0.4506, "step": 23150 }, { "epoch": 0.23, "grad_norm": 13.199085235595703, "learning_rate": 4.9346470453890994e-06, "loss": 0.2632, "step": 23175 }, { "epoch": 0.23, "grad_norm": 24.408475875854492, "learning_rate": 4.9345229229348515e-06, "loss": 0.5366, "step": 23200 }, { "epoch": 0.23, "grad_norm": 13.732388496398926, "learning_rate": 4.934398800480603e-06, "loss": 0.2765, "step": 23225 }, { "epoch": 0.23, "grad_norm": 16.152482986450195, "learning_rate": 4.934274678026354e-06, "loss": 0.4562, "step": 23250 }, { "epoch": 0.23, "grad_norm": 16.615066528320312, "learning_rate": 4.934150555572106e-06, "loss": 0.2492, "step": 23275 }, { "epoch": 0.23, "grad_norm": 18.467952728271484, "learning_rate": 4.934026433117857e-06, "loss": 0.4962, "step": 23300 }, { "epoch": 0.23, "grad_norm": 16.515247344970703, "learning_rate": 4.933902310663608e-06, "loss": 0.2908, "step": 23325 }, { "epoch": 0.23, "grad_norm": 19.212844848632812, "learning_rate": 4.9337781882093605e-06, "loss": 0.4766, "step": 23350 }, { "epoch": 0.23, "grad_norm": 8.28486442565918, "learning_rate": 4.933654065755112e-06, "loss": 0.2785, "step": 23375 }, { "epoch": 0.23, "grad_norm": 26.033130645751953, "learning_rate": 4.933529943300863e-06, "loss": 0.4524, "step": 23400 }, { "epoch": 0.23, "grad_norm": 8.55504322052002, "learning_rate": 4.933405820846615e-06, "loss": 0.2612, "step": 23425 }, { "epoch": 0.23, "grad_norm": 19.146902084350586, "learning_rate": 4.933281698392366e-06, "loss": 0.4536, "step": 23450 }, { "epoch": 0.23, "grad_norm": 12.5919771194458, "learning_rate": 4.933157575938118e-06, "loss": 0.2761, "step": 23475 }, { "epoch": 0.23, "grad_norm": 19.311796188354492, "learning_rate": 4.933033453483869e-06, "loss": 0.4653, "step": 23500 }, { "epoch": 0.23, "grad_norm": 9.830127716064453, "learning_rate": 4.9329093310296215e-06, "loss": 0.2756, "step": 23525 }, { "epoch": 0.23, "grad_norm": 17.00377655029297, "learning_rate": 4.932785208575373e-06, "loss": 0.4847, "step": 23550 }, { "epoch": 0.23, "grad_norm": 10.901337623596191, "learning_rate": 4.932661086121124e-06, "loss": 0.2231, "step": 23575 }, { "epoch": 0.23, "grad_norm": 19.566734313964844, "learning_rate": 4.932536963666876e-06, "loss": 0.4831, "step": 23600 }, { "epoch": 0.23, "grad_norm": 10.249028205871582, "learning_rate": 4.932412841212627e-06, "loss": 0.2518, "step": 23625 }, { "epoch": 0.23, "grad_norm": 18.801862716674805, "learning_rate": 4.932288718758378e-06, "loss": 0.5293, "step": 23650 }, { "epoch": 0.23, "grad_norm": 4.40006685256958, "learning_rate": 4.9321645963041304e-06, "loss": 0.3063, "step": 23675 }, { "epoch": 0.23, "grad_norm": 20.281389236450195, "learning_rate": 4.932040473849882e-06, "loss": 0.5028, "step": 23700 }, { "epoch": 0.23, "grad_norm": 9.342434883117676, "learning_rate": 4.931916351395633e-06, "loss": 0.2609, "step": 23725 }, { "epoch": 0.23, "grad_norm": 15.03763198852539, "learning_rate": 4.931792228941385e-06, "loss": 0.4422, "step": 23750 }, { "epoch": 0.23, "grad_norm": 11.492680549621582, "learning_rate": 4.931668106487136e-06, "loss": 0.2561, "step": 23775 }, { "epoch": 0.23, "grad_norm": 18.5802001953125, "learning_rate": 4.931543984032887e-06, "loss": 0.447, "step": 23800 }, { "epoch": 0.23, "grad_norm": 12.128190040588379, "learning_rate": 4.931419861578639e-06, "loss": 0.2474, "step": 23825 }, { "epoch": 0.23, "grad_norm": 19.015737533569336, "learning_rate": 4.931295739124391e-06, "loss": 0.4894, "step": 23850 }, { "epoch": 0.23, "grad_norm": 8.406634330749512, "learning_rate": 4.931171616670143e-06, "loss": 0.2252, "step": 23875 }, { "epoch": 0.23, "grad_norm": 19.010175704956055, "learning_rate": 4.931047494215894e-06, "loss": 0.4933, "step": 23900 }, { "epoch": 0.24, "grad_norm": 6.59144401550293, "learning_rate": 4.930923371761646e-06, "loss": 0.2738, "step": 23925 }, { "epoch": 0.24, "grad_norm": 21.247758865356445, "learning_rate": 4.930799249307397e-06, "loss": 0.4644, "step": 23950 }, { "epoch": 0.24, "grad_norm": 11.879525184631348, "learning_rate": 4.930675126853149e-06, "loss": 0.2254, "step": 23975 }, { "epoch": 0.24, "grad_norm": 16.102115631103516, "learning_rate": 4.9305510043989004e-06, "loss": 0.4765, "step": 24000 }, { "epoch": 0.24, "grad_norm": 8.503607749938965, "learning_rate": 4.930426881944652e-06, "loss": 0.2251, "step": 24025 }, { "epoch": 0.24, "grad_norm": 25.24961280822754, "learning_rate": 4.930302759490404e-06, "loss": 0.4741, "step": 24050 }, { "epoch": 0.24, "grad_norm": 8.182397842407227, "learning_rate": 4.930178637036155e-06, "loss": 0.2069, "step": 24075 }, { "epoch": 0.24, "grad_norm": 19.762121200561523, "learning_rate": 4.930054514581906e-06, "loss": 0.4206, "step": 24100 }, { "epoch": 0.24, "grad_norm": 16.89647102355957, "learning_rate": 4.929930392127658e-06, "loss": 0.2794, "step": 24125 }, { "epoch": 0.24, "grad_norm": 28.581439971923828, "learning_rate": 4.929806269673409e-06, "loss": 0.5052, "step": 24150 }, { "epoch": 0.24, "grad_norm": 8.429298400878906, "learning_rate": 4.929682147219161e-06, "loss": 0.2463, "step": 24175 }, { "epoch": 0.24, "grad_norm": 17.288232803344727, "learning_rate": 4.929558024764913e-06, "loss": 0.5067, "step": 24200 }, { "epoch": 0.24, "grad_norm": 14.586867332458496, "learning_rate": 4.929433902310664e-06, "loss": 0.2582, "step": 24225 }, { "epoch": 0.24, "grad_norm": 18.877239227294922, "learning_rate": 4.929309779856415e-06, "loss": 0.467, "step": 24250 }, { "epoch": 0.24, "grad_norm": 12.734740257263184, "learning_rate": 4.929185657402167e-06, "loss": 0.2582, "step": 24275 }, { "epoch": 0.24, "grad_norm": 22.222166061401367, "learning_rate": 4.929061534947918e-06, "loss": 0.4557, "step": 24300 }, { "epoch": 0.24, "grad_norm": 7.535680770874023, "learning_rate": 4.92893741249367e-06, "loss": 0.2394, "step": 24325 }, { "epoch": 0.24, "grad_norm": 16.97195816040039, "learning_rate": 4.928813290039422e-06, "loss": 0.4071, "step": 24350 }, { "epoch": 0.24, "grad_norm": 13.400140762329102, "learning_rate": 4.928689167585174e-06, "loss": 0.2402, "step": 24375 }, { "epoch": 0.24, "grad_norm": 24.10315704345703, "learning_rate": 4.928565045130925e-06, "loss": 0.5015, "step": 24400 }, { "epoch": 0.24, "grad_norm": 10.846888542175293, "learning_rate": 4.928440922676676e-06, "loss": 0.2352, "step": 24425 }, { "epoch": 0.24, "grad_norm": 24.093637466430664, "learning_rate": 4.928316800222428e-06, "loss": 0.477, "step": 24450 }, { "epoch": 0.24, "grad_norm": 16.99740219116211, "learning_rate": 4.928192677768179e-06, "loss": 0.2354, "step": 24475 }, { "epoch": 0.24, "grad_norm": 16.644433975219727, "learning_rate": 4.928068555313931e-06, "loss": 0.4471, "step": 24500 }, { "epoch": 0.24, "grad_norm": 9.461723327636719, "learning_rate": 4.927944432859683e-06, "loss": 0.2276, "step": 24525 }, { "epoch": 0.24, "grad_norm": 17.786169052124023, "learning_rate": 4.927820310405434e-06, "loss": 0.4998, "step": 24550 }, { "epoch": 0.24, "grad_norm": 4.569359302520752, "learning_rate": 4.927696187951185e-06, "loss": 0.2064, "step": 24575 }, { "epoch": 0.24, "grad_norm": 20.506813049316406, "learning_rate": 4.927572065496937e-06, "loss": 0.4597, "step": 24600 }, { "epoch": 0.24, "grad_norm": 12.914841651916504, "learning_rate": 4.927447943042688e-06, "loss": 0.2076, "step": 24625 }, { "epoch": 0.24, "grad_norm": 14.882383346557617, "learning_rate": 4.9273238205884395e-06, "loss": 0.4344, "step": 24650 }, { "epoch": 0.24, "grad_norm": 14.503281593322754, "learning_rate": 4.927199698134192e-06, "loss": 0.2739, "step": 24675 }, { "epoch": 0.24, "grad_norm": 17.289949417114258, "learning_rate": 4.927075575679943e-06, "loss": 0.5075, "step": 24700 }, { "epoch": 0.24, "grad_norm": 8.12880802154541, "learning_rate": 4.926951453225695e-06, "loss": 0.2402, "step": 24725 }, { "epoch": 0.24, "grad_norm": 18.232824325561523, "learning_rate": 4.926827330771446e-06, "loss": 0.4372, "step": 24750 }, { "epoch": 0.24, "grad_norm": 10.066722869873047, "learning_rate": 4.926703208317198e-06, "loss": 0.281, "step": 24775 }, { "epoch": 0.24, "grad_norm": 24.521072387695312, "learning_rate": 4.926579085862949e-06, "loss": 0.426, "step": 24800 }, { "epoch": 0.24, "grad_norm": 7.767642974853516, "learning_rate": 4.926454963408701e-06, "loss": 0.2499, "step": 24825 }, { "epoch": 0.24, "grad_norm": 20.071826934814453, "learning_rate": 4.926330840954453e-06, "loss": 0.4476, "step": 24850 }, { "epoch": 0.24, "grad_norm": 9.433667182922363, "learning_rate": 4.926206718500204e-06, "loss": 0.2587, "step": 24875 }, { "epoch": 0.24, "grad_norm": 19.065303802490234, "learning_rate": 4.926087560944126e-06, "loss": 0.5365, "step": 24900 }, { "epoch": 0.25, "grad_norm": 7.465023517608643, "learning_rate": 4.925963438489877e-06, "loss": 0.284, "step": 24925 }, { "epoch": 0.25, "grad_norm": 16.57472801208496, "learning_rate": 4.925839316035628e-06, "loss": 0.5005, "step": 24950 }, { "epoch": 0.25, "grad_norm": 16.208114624023438, "learning_rate": 4.92571519358138e-06, "loss": 0.2741, "step": 24975 }, { "epoch": 0.25, "grad_norm": Infinity, "learning_rate": 4.925596036025301e-06, "loss": 0.5345, "step": 25000 }, { "epoch": 0.25, "grad_norm": 9.62043571472168, "learning_rate": 4.925471913571053e-06, "loss": 0.2425, "step": 25025 }, { "epoch": 0.25, "grad_norm": 17.895278930664062, "learning_rate": 4.9253477911168045e-06, "loss": 0.5272, "step": 25050 }, { "epoch": 0.25, "grad_norm": 13.094559669494629, "learning_rate": 4.925223668662556e-06, "loss": 0.3024, "step": 25075 }, { "epoch": 0.25, "grad_norm": 14.309676170349121, "learning_rate": 4.925099546208308e-06, "loss": 0.432, "step": 25100 }, { "epoch": 0.25, "grad_norm": 9.627264976501465, "learning_rate": 4.924975423754059e-06, "loss": 0.2665, "step": 25125 }, { "epoch": 0.25, "grad_norm": 16.32972526550293, "learning_rate": 4.92485130129981e-06, "loss": 0.493, "step": 25150 }, { "epoch": 0.25, "grad_norm": 15.634971618652344, "learning_rate": 4.924727178845562e-06, "loss": 0.2514, "step": 25175 }, { "epoch": 0.25, "grad_norm": 15.64287281036377, "learning_rate": 4.9246030563913135e-06, "loss": 0.4541, "step": 25200 }, { "epoch": 0.25, "grad_norm": 9.664909362792969, "learning_rate": 4.9244789339370655e-06, "loss": 0.2296, "step": 25225 }, { "epoch": 0.25, "grad_norm": 24.082664489746094, "learning_rate": 4.924354811482817e-06, "loss": 0.4287, "step": 25250 }, { "epoch": 0.25, "grad_norm": 11.936466217041016, "learning_rate": 4.924230689028569e-06, "loss": 0.2592, "step": 25275 }, { "epoch": 0.25, "grad_norm": 25.032175064086914, "learning_rate": 4.92410656657432e-06, "loss": 0.4376, "step": 25300 }, { "epoch": 0.25, "grad_norm": 8.506226539611816, "learning_rate": 4.923982444120072e-06, "loss": 0.323, "step": 25325 }, { "epoch": 0.25, "grad_norm": 20.111589431762695, "learning_rate": 4.923858321665823e-06, "loss": 0.5344, "step": 25350 }, { "epoch": 0.25, "grad_norm": 11.530801773071289, "learning_rate": 4.9237341992115745e-06, "loss": 0.2296, "step": 25375 }, { "epoch": 0.25, "grad_norm": 21.63082504272461, "learning_rate": 4.9236100767573265e-06, "loss": 0.4676, "step": 25400 }, { "epoch": 0.25, "grad_norm": 13.573336601257324, "learning_rate": 4.923485954303078e-06, "loss": 0.2399, "step": 25425 }, { "epoch": 0.25, "grad_norm": 20.55254364013672, "learning_rate": 4.923361831848829e-06, "loss": 0.4616, "step": 25450 }, { "epoch": 0.25, "grad_norm": 8.518818855285645, "learning_rate": 4.92323770939458e-06, "loss": 0.2504, "step": 25475 }, { "epoch": 0.25, "grad_norm": 25.573884963989258, "learning_rate": 4.923113586940332e-06, "loss": 0.5246, "step": 25500 }, { "epoch": 0.25, "grad_norm": 9.405385971069336, "learning_rate": 4.9229894644860834e-06, "loss": 0.2518, "step": 25525 }, { "epoch": 0.25, "grad_norm": 29.856977462768555, "learning_rate": 4.9228653420318355e-06, "loss": 0.5072, "step": 25550 }, { "epoch": 0.25, "grad_norm": 12.11672306060791, "learning_rate": 4.922741219577587e-06, "loss": 0.2158, "step": 25575 }, { "epoch": 0.25, "grad_norm": 11.836603164672852, "learning_rate": 4.922617097123339e-06, "loss": 0.4151, "step": 25600 }, { "epoch": 0.25, "grad_norm": 12.808690071105957, "learning_rate": 4.92249297466909e-06, "loss": 0.2718, "step": 25625 }, { "epoch": 0.25, "grad_norm": 15.216001510620117, "learning_rate": 4.922368852214842e-06, "loss": 0.4533, "step": 25650 }, { "epoch": 0.25, "grad_norm": 11.914241790771484, "learning_rate": 4.922244729760593e-06, "loss": 0.2818, "step": 25675 }, { "epoch": 0.25, "grad_norm": 19.749048233032227, "learning_rate": 4.9221206073063445e-06, "loss": 0.4697, "step": 25700 }, { "epoch": 0.25, "grad_norm": 12.172308921813965, "learning_rate": 4.9219964848520965e-06, "loss": 0.2727, "step": 25725 }, { "epoch": 0.25, "grad_norm": 17.497583389282227, "learning_rate": 4.921872362397848e-06, "loss": 0.4636, "step": 25750 }, { "epoch": 0.25, "grad_norm": 6.918204307556152, "learning_rate": 4.921748239943599e-06, "loss": 0.2197, "step": 25775 }, { "epoch": 0.25, "grad_norm": 20.529300689697266, "learning_rate": 4.921624117489351e-06, "loss": 0.4841, "step": 25800 }, { "epoch": 0.25, "grad_norm": 7.477242469787598, "learning_rate": 4.921499995035102e-06, "loss": 0.2609, "step": 25825 }, { "epoch": 0.25, "grad_norm": 21.972383499145508, "learning_rate": 4.921375872580853e-06, "loss": 0.4652, "step": 25850 }, { "epoch": 0.25, "grad_norm": 9.725136756896973, "learning_rate": 4.9212517501266055e-06, "loss": 0.2454, "step": 25875 }, { "epoch": 0.25, "grad_norm": 14.235319137573242, "learning_rate": 4.921127627672357e-06, "loss": 0.5038, "step": 25900 }, { "epoch": 0.25, "grad_norm": 8.386384963989258, "learning_rate": 4.921003505218108e-06, "loss": 0.2625, "step": 25925 }, { "epoch": 0.26, "grad_norm": 26.187156677246094, "learning_rate": 4.92087938276386e-06, "loss": 0.3853, "step": 25950 }, { "epoch": 0.26, "grad_norm": 17.24013900756836, "learning_rate": 4.920755260309611e-06, "loss": 0.2659, "step": 25975 }, { "epoch": 0.26, "grad_norm": 14.907536506652832, "learning_rate": 4.920631137855363e-06, "loss": 0.4164, "step": 26000 }, { "epoch": 0.26, "grad_norm": 7.262904167175293, "learning_rate": 4.9205070154011144e-06, "loss": 0.2708, "step": 26025 }, { "epoch": 0.26, "grad_norm": 19.26106834411621, "learning_rate": 4.9203828929468665e-06, "loss": 0.5462, "step": 26050 }, { "epoch": 0.26, "grad_norm": 9.847299575805664, "learning_rate": 4.920258770492618e-06, "loss": 0.2252, "step": 26075 }, { "epoch": 0.26, "grad_norm": 23.198070526123047, "learning_rate": 4.92013464803837e-06, "loss": 0.4638, "step": 26100 }, { "epoch": 0.26, "grad_norm": 7.81346321105957, "learning_rate": 4.920010525584121e-06, "loss": 0.3023, "step": 26125 }, { "epoch": 0.26, "grad_norm": 18.033245086669922, "learning_rate": 4.919886403129872e-06, "loss": 0.4303, "step": 26150 }, { "epoch": 0.26, "grad_norm": 7.271194934844971, "learning_rate": 4.919762280675624e-06, "loss": 0.2423, "step": 26175 }, { "epoch": 0.26, "grad_norm": 25.1314640045166, "learning_rate": 4.9196381582213755e-06, "loss": 0.4838, "step": 26200 }, { "epoch": 0.26, "grad_norm": 15.194581031799316, "learning_rate": 4.919514035767127e-06, "loss": 0.2397, "step": 26225 }, { "epoch": 0.26, "grad_norm": 18.880075454711914, "learning_rate": 4.919389913312879e-06, "loss": 0.4896, "step": 26250 }, { "epoch": 0.26, "grad_norm": 10.341778755187988, "learning_rate": 4.91926579085863e-06, "loss": 0.2436, "step": 26275 }, { "epoch": 0.26, "grad_norm": 16.351551055908203, "learning_rate": 4.919141668404381e-06, "loss": 0.5268, "step": 26300 }, { "epoch": 0.26, "grad_norm": 12.41880989074707, "learning_rate": 4.919017545950132e-06, "loss": 0.2245, "step": 26325 }, { "epoch": 0.26, "grad_norm": 15.60360336303711, "learning_rate": 4.9188934234958844e-06, "loss": 0.4754, "step": 26350 }, { "epoch": 0.26, "grad_norm": 10.7340726852417, "learning_rate": 4.918769301041636e-06, "loss": 0.2538, "step": 26375 }, { "epoch": 0.26, "grad_norm": 17.67392349243164, "learning_rate": 4.918645178587388e-06, "loss": 0.471, "step": 26400 }, { "epoch": 0.26, "grad_norm": 14.06425952911377, "learning_rate": 4.918521056133139e-06, "loss": 0.2352, "step": 26425 }, { "epoch": 0.26, "grad_norm": 17.206249237060547, "learning_rate": 4.918396933678891e-06, "loss": 0.4286, "step": 26450 }, { "epoch": 0.26, "grad_norm": 5.888020992279053, "learning_rate": 4.918272811224642e-06, "loss": 0.211, "step": 26475 }, { "epoch": 0.26, "grad_norm": 22.259557723999023, "learning_rate": 4.918148688770394e-06, "loss": 0.497, "step": 26500 }, { "epoch": 0.26, "grad_norm": 17.508752822875977, "learning_rate": 4.9180245663161454e-06, "loss": 0.2438, "step": 26525 }, { "epoch": 0.26, "grad_norm": 20.07469367980957, "learning_rate": 4.917900443861897e-06, "loss": 0.456, "step": 26550 }, { "epoch": 0.26, "grad_norm": 9.086527824401855, "learning_rate": 4.917776321407649e-06, "loss": 0.2524, "step": 26575 }, { "epoch": 0.26, "grad_norm": 11.780539512634277, "learning_rate": 4.9176521989534e-06, "loss": 0.3946, "step": 26600 }, { "epoch": 0.26, "grad_norm": 21.89257049560547, "learning_rate": 4.917528076499151e-06, "loss": 0.2869, "step": 26625 }, { "epoch": 0.26, "grad_norm": 19.430574417114258, "learning_rate": 4.917403954044903e-06, "loss": 0.5091, "step": 26650 }, { "epoch": 0.26, "grad_norm": 10.39022445678711, "learning_rate": 4.917279831590654e-06, "loss": 0.2358, "step": 26675 }, { "epoch": 0.26, "grad_norm": 15.321340560913086, "learning_rate": 4.917155709136406e-06, "loss": 0.449, "step": 26700 }, { "epoch": 0.26, "grad_norm": 7.410252571105957, "learning_rate": 4.917031586682158e-06, "loss": 0.2185, "step": 26725 }, { "epoch": 0.26, "grad_norm": 22.883499145507812, "learning_rate": 4.916907464227909e-06, "loss": 0.4926, "step": 26750 }, { "epoch": 0.26, "grad_norm": 11.358713150024414, "learning_rate": 4.91678334177366e-06, "loss": 0.219, "step": 26775 }, { "epoch": 0.26, "grad_norm": 10.280389785766602, "learning_rate": 4.916659219319412e-06, "loss": 0.4059, "step": 26800 }, { "epoch": 0.26, "grad_norm": 11.899345397949219, "learning_rate": 4.916535096865163e-06, "loss": 0.2783, "step": 26825 }, { "epoch": 0.26, "grad_norm": 14.38054084777832, "learning_rate": 4.9164109744109154e-06, "loss": 0.4861, "step": 26850 }, { "epoch": 0.26, "grad_norm": 11.894752502441406, "learning_rate": 4.916286851956667e-06, "loss": 0.2209, "step": 26875 }, { "epoch": 0.26, "grad_norm": 20.25960350036621, "learning_rate": 4.916162729502419e-06, "loss": 0.5047, "step": 26900 }, { "epoch": 0.26, "grad_norm": 15.813525199890137, "learning_rate": 4.91603860704817e-06, "loss": 0.2496, "step": 26925 }, { "epoch": 0.26, "grad_norm": 21.830677032470703, "learning_rate": 4.915914484593922e-06, "loss": 0.4469, "step": 26950 }, { "epoch": 0.27, "grad_norm": 16.989540100097656, "learning_rate": 4.915790362139673e-06, "loss": 0.2946, "step": 26975 }, { "epoch": 0.27, "grad_norm": 16.391223907470703, "learning_rate": 4.915666239685424e-06, "loss": 0.4001, "step": 27000 }, { "epoch": 0.27, "grad_norm": 8.20090103149414, "learning_rate": 4.9155421172311765e-06, "loss": 0.2778, "step": 27025 }, { "epoch": 0.27, "grad_norm": 16.43694496154785, "learning_rate": 4.915417994776928e-06, "loss": 0.4586, "step": 27050 }, { "epoch": 0.27, "grad_norm": 12.959839820861816, "learning_rate": 4.915293872322679e-06, "loss": 0.2319, "step": 27075 }, { "epoch": 0.27, "grad_norm": 17.38564682006836, "learning_rate": 4.915169749868431e-06, "loss": 0.4079, "step": 27100 }, { "epoch": 0.27, "grad_norm": 6.380073070526123, "learning_rate": 4.915045627414182e-06, "loss": 0.2282, "step": 27125 }, { "epoch": 0.27, "grad_norm": 15.371408462524414, "learning_rate": 4.914921504959933e-06, "loss": 0.4784, "step": 27150 }, { "epoch": 0.27, "grad_norm": 9.557467460632324, "learning_rate": 4.9147973825056846e-06, "loss": 0.2558, "step": 27175 }, { "epoch": 0.27, "grad_norm": 24.07956314086914, "learning_rate": 4.914673260051437e-06, "loss": 0.4458, "step": 27200 }, { "epoch": 0.27, "grad_norm": 10.34411907196045, "learning_rate": 4.914549137597188e-06, "loss": 0.2304, "step": 27225 }, { "epoch": 0.27, "grad_norm": 24.043739318847656, "learning_rate": 4.91442501514294e-06, "loss": 0.4333, "step": 27250 }, { "epoch": 0.27, "grad_norm": 13.435334205627441, "learning_rate": 4.914300892688691e-06, "loss": 0.2777, "step": 27275 }, { "epoch": 0.27, "grad_norm": 22.610233306884766, "learning_rate": 4.914176770234443e-06, "loss": 0.4864, "step": 27300 }, { "epoch": 0.27, "grad_norm": 6.689154148101807, "learning_rate": 4.914052647780194e-06, "loss": 0.2431, "step": 27325 }, { "epoch": 0.27, "grad_norm": 18.573148727416992, "learning_rate": 4.9139285253259464e-06, "loss": 0.4459, "step": 27350 }, { "epoch": 0.27, "grad_norm": 9.530710220336914, "learning_rate": 4.913804402871698e-06, "loss": 0.2642, "step": 27375 }, { "epoch": 0.27, "grad_norm": 21.1378173828125, "learning_rate": 4.913680280417449e-06, "loss": 0.5229, "step": 27400 }, { "epoch": 0.27, "grad_norm": 15.419917106628418, "learning_rate": 4.913556157963201e-06, "loss": 0.2461, "step": 27425 }, { "epoch": 0.27, "grad_norm": 22.67255401611328, "learning_rate": 4.913432035508952e-06, "loss": 0.4176, "step": 27450 }, { "epoch": 0.27, "grad_norm": 7.689266204833984, "learning_rate": 4.913307913054703e-06, "loss": 0.2316, "step": 27475 }, { "epoch": 0.27, "grad_norm": 15.953240394592285, "learning_rate": 4.913183790600455e-06, "loss": 0.4081, "step": 27500 }, { "epoch": 0.27, "grad_norm": 23.625181198120117, "learning_rate": 4.913059668146207e-06, "loss": 0.2434, "step": 27525 }, { "epoch": 0.27, "grad_norm": 13.160176277160645, "learning_rate": 4.912935545691958e-06, "loss": 0.4158, "step": 27550 }, { "epoch": 0.27, "grad_norm": 7.781037330627441, "learning_rate": 4.91281142323771e-06, "loss": 0.2732, "step": 27575 }, { "epoch": 0.27, "grad_norm": 21.5194091796875, "learning_rate": 4.912687300783461e-06, "loss": 0.4043, "step": 27600 }, { "epoch": 0.27, "grad_norm": 10.5822114944458, "learning_rate": 4.912563178329212e-06, "loss": 0.2044, "step": 27625 }, { "epoch": 0.27, "grad_norm": 18.087251663208008, "learning_rate": 4.912439055874964e-06, "loss": 0.4213, "step": 27650 }, { "epoch": 0.27, "grad_norm": 14.646686553955078, "learning_rate": 4.9123149334207156e-06, "loss": 0.2594, "step": 27675 }, { "epoch": 0.27, "grad_norm": 22.111051559448242, "learning_rate": 4.912190810966468e-06, "loss": 0.5031, "step": 27700 }, { "epoch": 0.27, "grad_norm": 7.2326340675354, "learning_rate": 4.912066688512219e-06, "loss": 0.2094, "step": 27725 }, { "epoch": 0.27, "grad_norm": 14.541918754577637, "learning_rate": 4.911942566057971e-06, "loss": 0.3741, "step": 27750 }, { "epoch": 0.27, "grad_norm": 6.115781307220459, "learning_rate": 4.911818443603722e-06, "loss": 0.2862, "step": 27775 }, { "epoch": 0.27, "grad_norm": 10.618847846984863, "learning_rate": 4.911694321149474e-06, "loss": 0.4126, "step": 27800 }, { "epoch": 0.27, "grad_norm": 6.512406826019287, "learning_rate": 4.911570198695225e-06, "loss": 0.2037, "step": 27825 }, { "epoch": 0.27, "grad_norm": 10.999505996704102, "learning_rate": 4.911446076240977e-06, "loss": 0.4447, "step": 27850 }, { "epoch": 0.27, "grad_norm": 6.369948863983154, "learning_rate": 4.911321953786729e-06, "loss": 0.2245, "step": 27875 }, { "epoch": 0.27, "grad_norm": 12.112593650817871, "learning_rate": 4.91119783133248e-06, "loss": 0.5439, "step": 27900 }, { "epoch": 0.27, "grad_norm": 11.06128215789795, "learning_rate": 4.911073708878231e-06, "loss": 0.2319, "step": 27925 }, { "epoch": 0.27, "grad_norm": 20.79974365234375, "learning_rate": 4.910949586423983e-06, "loss": 0.4078, "step": 27950 }, { "epoch": 0.28, "grad_norm": 11.161375999450684, "learning_rate": 4.910825463969734e-06, "loss": 0.2722, "step": 27975 }, { "epoch": 0.28, "grad_norm": 22.464561462402344, "learning_rate": 4.9107013415154856e-06, "loss": 0.423, "step": 28000 }, { "epoch": 0.28, "grad_norm": 13.442220687866211, "learning_rate": 4.910577219061237e-06, "loss": 0.2766, "step": 28025 }, { "epoch": 0.28, "grad_norm": 23.510345458984375, "learning_rate": 4.910453096606989e-06, "loss": 0.5052, "step": 28050 }, { "epoch": 0.28, "grad_norm": 9.489477157592773, "learning_rate": 4.91032897415274e-06, "loss": 0.3084, "step": 28075 }, { "epoch": 0.28, "grad_norm": 13.563572883605957, "learning_rate": 4.910204851698492e-06, "loss": 0.412, "step": 28100 }, { "epoch": 0.28, "grad_norm": 6.619947910308838, "learning_rate": 4.910080729244243e-06, "loss": 0.2503, "step": 28125 }, { "epoch": 0.28, "grad_norm": 18.859716415405273, "learning_rate": 4.909956606789995e-06, "loss": 0.4504, "step": 28150 }, { "epoch": 0.28, "grad_norm": 5.3565144538879395, "learning_rate": 4.909832484335747e-06, "loss": 0.1909, "step": 28175 }, { "epoch": 0.28, "grad_norm": 14.814355850219727, "learning_rate": 4.909708361881499e-06, "loss": 0.4701, "step": 28200 }, { "epoch": 0.28, "grad_norm": 9.851243019104004, "learning_rate": 4.90958423942725e-06, "loss": 0.2105, "step": 28225 }, { "epoch": 0.28, "grad_norm": 26.478961944580078, "learning_rate": 4.909460116973001e-06, "loss": 0.4806, "step": 28250 }, { "epoch": 0.28, "grad_norm": 13.011940002441406, "learning_rate": 4.909335994518753e-06, "loss": 0.2681, "step": 28275 }, { "epoch": 0.28, "grad_norm": 18.809919357299805, "learning_rate": 4.909211872064504e-06, "loss": 0.47, "step": 28300 }, { "epoch": 0.28, "grad_norm": 9.689045906066895, "learning_rate": 4.9090877496102555e-06, "loss": 0.2313, "step": 28325 }, { "epoch": 0.28, "grad_norm": 27.521167755126953, "learning_rate": 4.908963627156008e-06, "loss": 0.4804, "step": 28350 }, { "epoch": 0.28, "grad_norm": 8.465231895446777, "learning_rate": 4.908839504701759e-06, "loss": 0.2593, "step": 28375 }, { "epoch": 0.28, "grad_norm": 13.466423034667969, "learning_rate": 4.90871538224751e-06, "loss": 0.4332, "step": 28400 }, { "epoch": 0.28, "grad_norm": 11.316366195678711, "learning_rate": 4.908591259793262e-06, "loss": 0.2717, "step": 28425 }, { "epoch": 0.28, "grad_norm": 17.727825164794922, "learning_rate": 4.908467137339013e-06, "loss": 0.4472, "step": 28450 }, { "epoch": 0.28, "grad_norm": 19.116090774536133, "learning_rate": 4.908343014884765e-06, "loss": 0.2737, "step": 28475 }, { "epoch": 0.28, "grad_norm": 24.942476272583008, "learning_rate": 4.9082188924305166e-06, "loss": 0.4726, "step": 28500 }, { "epoch": 0.28, "grad_norm": 15.413980484008789, "learning_rate": 4.908094769976269e-06, "loss": 0.2633, "step": 28525 }, { "epoch": 0.28, "grad_norm": 33.141685485839844, "learning_rate": 4.90797064752202e-06, "loss": 0.3864, "step": 28550 }, { "epoch": 0.28, "grad_norm": 6.488962650299072, "learning_rate": 4.907846525067772e-06, "loss": 0.265, "step": 28575 }, { "epoch": 0.28, "grad_norm": 13.548847198486328, "learning_rate": 4.907722402613523e-06, "loss": 0.4647, "step": 28600 }, { "epoch": 0.28, "grad_norm": 10.375360488891602, "learning_rate": 4.907598280159274e-06, "loss": 0.2395, "step": 28625 }, { "epoch": 0.28, "grad_norm": 22.487398147583008, "learning_rate": 4.907474157705026e-06, "loss": 0.5292, "step": 28650 }, { "epoch": 0.28, "grad_norm": 10.767232894897461, "learning_rate": 4.907350035250778e-06, "loss": 0.2512, "step": 28675 }, { "epoch": 0.28, "grad_norm": 19.621755599975586, "learning_rate": 4.907225912796529e-06, "loss": 0.4727, "step": 28700 }, { "epoch": 0.28, "grad_norm": 10.0742826461792, "learning_rate": 4.907101790342281e-06, "loss": 0.2382, "step": 28725 }, { "epoch": 0.28, "grad_norm": 21.700532913208008, "learning_rate": 4.906977667888032e-06, "loss": 0.481, "step": 28750 }, { "epoch": 0.28, "grad_norm": 11.80574893951416, "learning_rate": 4.906853545433783e-06, "loss": 0.219, "step": 28775 }, { "epoch": 0.28, "grad_norm": 15.855318069458008, "learning_rate": 4.906729422979535e-06, "loss": 0.4113, "step": 28800 }, { "epoch": 0.28, "grad_norm": 9.70993423461914, "learning_rate": 4.9066053005252865e-06, "loss": 0.2596, "step": 28825 }, { "epoch": 0.28, "grad_norm": 20.74422836303711, "learning_rate": 4.906481178071038e-06, "loss": 0.4907, "step": 28850 }, { "epoch": 0.28, "grad_norm": 10.459756851196289, "learning_rate": 4.90635705561679e-06, "loss": 0.2361, "step": 28875 }, { "epoch": 0.28, "grad_norm": 24.74130630493164, "learning_rate": 4.906232933162541e-06, "loss": 0.5145, "step": 28900 }, { "epoch": 0.28, "grad_norm": 11.406329154968262, "learning_rate": 4.906108810708293e-06, "loss": 0.2466, "step": 28925 }, { "epoch": 0.28, "grad_norm": 15.891718864440918, "learning_rate": 4.905984688254044e-06, "loss": 0.4612, "step": 28950 }, { "epoch": 0.28, "grad_norm": 13.365447998046875, "learning_rate": 4.905860565799796e-06, "loss": 0.2651, "step": 28975 }, { "epoch": 0.29, "grad_norm": 16.206947326660156, "learning_rate": 4.9057364433455476e-06, "loss": 0.4459, "step": 29000 }, { "epoch": 0.29, "grad_norm": 14.948629379272461, "learning_rate": 4.905612320891299e-06, "loss": 0.25, "step": 29025 }, { "epoch": 0.29, "grad_norm": 17.276363372802734, "learning_rate": 4.905493163335221e-06, "loss": 0.5095, "step": 29050 }, { "epoch": 0.29, "grad_norm": 7.799026966094971, "learning_rate": 4.905369040880972e-06, "loss": 0.2115, "step": 29075 }, { "epoch": 0.29, "grad_norm": 18.937807083129883, "learning_rate": 4.905244918426724e-06, "loss": 0.4148, "step": 29100 }, { "epoch": 0.29, "grad_norm": 8.857465744018555, "learning_rate": 4.905120795972475e-06, "loss": 0.2497, "step": 29125 }, { "epoch": 0.29, "grad_norm": 17.563817977905273, "learning_rate": 4.904996673518226e-06, "loss": 0.4464, "step": 29150 }, { "epoch": 0.29, "grad_norm": 7.028221130371094, "learning_rate": 4.904872551063978e-06, "loss": 0.2236, "step": 29175 }, { "epoch": 0.29, "grad_norm": Infinity, "learning_rate": 4.9047533935078994e-06, "loss": 0.3761, "step": 29200 }, { "epoch": 0.29, "grad_norm": 8.116544723510742, "learning_rate": 4.904629271053651e-06, "loss": 0.2672, "step": 29225 }, { "epoch": 0.29, "grad_norm": 15.327011108398438, "learning_rate": 4.904505148599403e-06, "loss": 0.4855, "step": 29250 }, { "epoch": 0.29, "grad_norm": 14.6758394241333, "learning_rate": 4.904381026145154e-06, "loss": 0.217, "step": 29275 }, { "epoch": 0.29, "grad_norm": 20.817354202270508, "learning_rate": 4.904256903690905e-06, "loss": 0.4285, "step": 29300 }, { "epoch": 0.29, "grad_norm": 10.438362121582031, "learning_rate": 4.904132781236657e-06, "loss": 0.2838, "step": 29325 }, { "epoch": 0.29, "grad_norm": 24.039831161499023, "learning_rate": 4.904008658782408e-06, "loss": 0.4941, "step": 29350 }, { "epoch": 0.29, "grad_norm": 30.887714385986328, "learning_rate": 4.9038845363281605e-06, "loss": 0.2738, "step": 29375 }, { "epoch": 0.29, "grad_norm": 15.389745712280273, "learning_rate": 4.903760413873912e-06, "loss": 0.5001, "step": 29400 }, { "epoch": 0.29, "grad_norm": 11.1605224609375, "learning_rate": 4.903636291419664e-06, "loss": 0.2569, "step": 29425 }, { "epoch": 0.29, "grad_norm": 21.150890350341797, "learning_rate": 4.903512168965415e-06, "loss": 0.4264, "step": 29450 }, { "epoch": 0.29, "grad_norm": 12.421825408935547, "learning_rate": 4.903388046511167e-06, "loss": 0.2177, "step": 29475 }, { "epoch": 0.29, "grad_norm": 24.571510314941406, "learning_rate": 4.903263924056918e-06, "loss": 0.4853, "step": 29500 }, { "epoch": 0.29, "grad_norm": 4.114390850067139, "learning_rate": 4.903139801602669e-06, "loss": 0.2307, "step": 29525 }, { "epoch": 0.29, "grad_norm": 7.168054103851318, "learning_rate": 4.9030156791484215e-06, "loss": 0.3603, "step": 29550 }, { "epoch": 0.29, "grad_norm": 12.541171073913574, "learning_rate": 4.902891556694173e-06, "loss": 0.2998, "step": 29575 }, { "epoch": 0.29, "grad_norm": 17.250200271606445, "learning_rate": 4.902767434239924e-06, "loss": 0.4068, "step": 29600 }, { "epoch": 0.29, "grad_norm": 17.230915069580078, "learning_rate": 4.902643311785676e-06, "loss": 0.2351, "step": 29625 }, { "epoch": 0.29, "grad_norm": 15.356855392456055, "learning_rate": 4.902519189331427e-06, "loss": 0.4046, "step": 29650 }, { "epoch": 0.29, "grad_norm": 11.8661527633667, "learning_rate": 4.902395066877178e-06, "loss": 0.2376, "step": 29675 }, { "epoch": 0.29, "grad_norm": 22.844989776611328, "learning_rate": 4.9022709444229304e-06, "loss": 0.5136, "step": 29700 }, { "epoch": 0.29, "grad_norm": 8.099837303161621, "learning_rate": 4.902146821968682e-06, "loss": 0.231, "step": 29725 }, { "epoch": 0.29, "grad_norm": 23.136394500732422, "learning_rate": 4.902022699514433e-06, "loss": 0.4086, "step": 29750 }, { "epoch": 0.29, "grad_norm": 16.11899757385254, "learning_rate": 4.901898577060185e-06, "loss": 0.1949, "step": 29775 }, { "epoch": 0.29, "grad_norm": 21.619325637817383, "learning_rate": 4.901774454605936e-06, "loss": 0.4923, "step": 29800 }, { "epoch": 0.29, "grad_norm": 20.57547378540039, "learning_rate": 4.901650332151688e-06, "loss": 0.2952, "step": 29825 }, { "epoch": 0.29, "grad_norm": 18.279897689819336, "learning_rate": 4.901526209697439e-06, "loss": 0.3929, "step": 29850 }, { "epoch": 0.29, "grad_norm": 12.913008689880371, "learning_rate": 4.9014020872431915e-06, "loss": 0.2426, "step": 29875 }, { "epoch": 0.29, "grad_norm": 30.48732566833496, "learning_rate": 4.901277964788943e-06, "loss": 0.4773, "step": 29900 }, { "epoch": 0.29, "grad_norm": 12.39587116241455, "learning_rate": 4.901153842334695e-06, "loss": 0.2184, "step": 29925 }, { "epoch": 0.29, "grad_norm": 13.415367126464844, "learning_rate": 4.901029719880446e-06, "loss": 0.4595, "step": 29950 }, { "epoch": 0.29, "grad_norm": 9.864794731140137, "learning_rate": 4.900905597426197e-06, "loss": 0.2676, "step": 29975 }, { "epoch": 0.29, "grad_norm": 24.05183982849121, "learning_rate": 4.900781474971948e-06, "loss": 0.4459, "step": 30000 }, { "epoch": 0.3, "grad_norm": 4.988082408905029, "learning_rate": 4.9006573525177e-06, "loss": 0.3045, "step": 30025 }, { "epoch": 0.3, "grad_norm": 27.20553970336914, "learning_rate": 4.900533230063452e-06, "loss": 0.4733, "step": 30050 }, { "epoch": 0.3, "grad_norm": 10.233308792114258, "learning_rate": 4.900409107609203e-06, "loss": 0.2184, "step": 30075 }, { "epoch": 0.3, "grad_norm": 23.435041427612305, "learning_rate": 4.900284985154955e-06, "loss": 0.4569, "step": 30100 }, { "epoch": 0.3, "grad_norm": 13.288435935974121, "learning_rate": 4.900160862700706e-06, "loss": 0.2726, "step": 30125 }, { "epoch": 0.3, "grad_norm": 20.139636993408203, "learning_rate": 4.900036740246457e-06, "loss": 0.4489, "step": 30150 }, { "epoch": 0.3, "grad_norm": 5.712210178375244, "learning_rate": 4.899912617792209e-06, "loss": 0.2441, "step": 30175 }, { "epoch": 0.3, "grad_norm": 21.390338897705078, "learning_rate": 4.899788495337961e-06, "loss": 0.4473, "step": 30200 }, { "epoch": 0.3, "grad_norm": 11.23154067993164, "learning_rate": 4.899664372883713e-06, "loss": 0.3295, "step": 30225 }, { "epoch": 0.3, "grad_norm": 7.364816188812256, "learning_rate": 4.899540250429464e-06, "loss": 0.4184, "step": 30250 }, { "epoch": 0.3, "grad_norm": 17.31032943725586, "learning_rate": 4.899416127975216e-06, "loss": 0.2375, "step": 30275 }, { "epoch": 0.3, "grad_norm": 15.222180366516113, "learning_rate": 4.899292005520967e-06, "loss": 0.5393, "step": 30300 }, { "epoch": 0.3, "grad_norm": 7.721869468688965, "learning_rate": 4.899167883066719e-06, "loss": 0.2413, "step": 30325 }, { "epoch": 0.3, "grad_norm": 22.446746826171875, "learning_rate": 4.89904376061247e-06, "loss": 0.4786, "step": 30350 }, { "epoch": 0.3, "grad_norm": 8.733447074890137, "learning_rate": 4.898919638158222e-06, "loss": 0.2575, "step": 30375 }, { "epoch": 0.3, "grad_norm": 27.768762588500977, "learning_rate": 4.898795515703974e-06, "loss": 0.5059, "step": 30400 }, { "epoch": 0.3, "grad_norm": 9.181151390075684, "learning_rate": 4.898671393249725e-06, "loss": 0.281, "step": 30425 }, { "epoch": 0.3, "grad_norm": 14.989954948425293, "learning_rate": 4.898547270795476e-06, "loss": 0.4785, "step": 30450 }, { "epoch": 0.3, "grad_norm": 14.702853202819824, "learning_rate": 4.898423148341228e-06, "loss": 0.209, "step": 30475 }, { "epoch": 0.3, "grad_norm": 27.944528579711914, "learning_rate": 4.898299025886979e-06, "loss": 0.4396, "step": 30500 }, { "epoch": 0.3, "grad_norm": 4.733188629150391, "learning_rate": 4.898174903432731e-06, "loss": 0.2157, "step": 30525 }, { "epoch": 0.3, "grad_norm": 17.947776794433594, "learning_rate": 4.898050780978483e-06, "loss": 0.4835, "step": 30550 }, { "epoch": 0.3, "grad_norm": 7.932848930358887, "learning_rate": 4.897926658524234e-06, "loss": 0.2438, "step": 30575 }, { "epoch": 0.3, "grad_norm": 15.782425880432129, "learning_rate": 4.897802536069985e-06, "loss": 0.4953, "step": 30600 }, { "epoch": 0.3, "grad_norm": 14.429121971130371, "learning_rate": 4.897678413615737e-06, "loss": 0.2622, "step": 30625 }, { "epoch": 0.3, "grad_norm": 23.555171966552734, "learning_rate": 4.897554291161488e-06, "loss": 0.5368, "step": 30650 }, { "epoch": 0.3, "grad_norm": 7.409854412078857, "learning_rate": 4.89743016870724e-06, "loss": 0.2014, "step": 30675 }, { "epoch": 0.3, "grad_norm": 19.479080200195312, "learning_rate": 4.897306046252992e-06, "loss": 0.5021, "step": 30700 }, { "epoch": 0.3, "grad_norm": 12.65067195892334, "learning_rate": 4.897181923798744e-06, "loss": 0.2418, "step": 30725 }, { "epoch": 0.3, "grad_norm": 21.590219497680664, "learning_rate": 4.897057801344495e-06, "loss": 0.3366, "step": 30750 }, { "epoch": 0.3, "grad_norm": 6.08899450302124, "learning_rate": 4.896933678890247e-06, "loss": 0.2257, "step": 30775 }, { "epoch": 0.3, "grad_norm": 16.06814956665039, "learning_rate": 4.896809556435998e-06, "loss": 0.4579, "step": 30800 }, { "epoch": 0.3, "grad_norm": 13.633710861206055, "learning_rate": 4.896685433981749e-06, "loss": 0.217, "step": 30825 }, { "epoch": 0.3, "grad_norm": 27.16683006286621, "learning_rate": 4.896561311527501e-06, "loss": 0.389, "step": 30850 }, { "epoch": 0.3, "grad_norm": 11.446057319641113, "learning_rate": 4.896437189073253e-06, "loss": 0.2457, "step": 30875 }, { "epoch": 0.3, "grad_norm": 24.85242462158203, "learning_rate": 4.896313066619004e-06, "loss": 0.5093, "step": 30900 }, { "epoch": 0.3, "grad_norm": 11.949667930603027, "learning_rate": 4.896188944164755e-06, "loss": 0.2627, "step": 30925 }, { "epoch": 0.3, "grad_norm": 16.270618438720703, "learning_rate": 4.896064821710507e-06, "loss": 0.4356, "step": 30950 }, { "epoch": 0.3, "grad_norm": 2.9574944972991943, "learning_rate": 4.895940699256258e-06, "loss": 0.2055, "step": 30975 }, { "epoch": 0.3, "grad_norm": 21.694896697998047, "learning_rate": 4.8958165768020095e-06, "loss": 0.4998, "step": 31000 }, { "epoch": 0.31, "grad_norm": 9.101066589355469, "learning_rate": 4.895692454347762e-06, "loss": 0.2908, "step": 31025 }, { "epoch": 0.31, "grad_norm": 23.804655075073242, "learning_rate": 4.895568331893513e-06, "loss": 0.4599, "step": 31050 }, { "epoch": 0.31, "grad_norm": 8.081365585327148, "learning_rate": 4.895444209439265e-06, "loss": 0.2545, "step": 31075 }, { "epoch": 0.31, "grad_norm": 13.322080612182617, "learning_rate": 4.895320086985016e-06, "loss": 0.4731, "step": 31100 }, { "epoch": 0.31, "grad_norm": 11.195110321044922, "learning_rate": 4.895195964530768e-06, "loss": 0.2604, "step": 31125 }, { "epoch": 0.31, "grad_norm": 18.525833129882812, "learning_rate": 4.895071842076519e-06, "loss": 0.4495, "step": 31150 }, { "epoch": 0.31, "grad_norm": 9.180511474609375, "learning_rate": 4.894947719622271e-06, "loss": 0.2283, "step": 31175 }, { "epoch": 0.31, "grad_norm": 18.638399124145508, "learning_rate": 4.894823597168023e-06, "loss": 0.4286, "step": 31200 }, { "epoch": 0.31, "grad_norm": 9.006540298461914, "learning_rate": 4.894699474713774e-06, "loss": 0.2027, "step": 31225 }, { "epoch": 0.31, "grad_norm": 15.307575225830078, "learning_rate": 4.894575352259526e-06, "loss": 0.4208, "step": 31250 }, { "epoch": 0.31, "grad_norm": 17.132156372070312, "learning_rate": 4.894451229805277e-06, "loss": 0.2527, "step": 31275 }, { "epoch": 0.31, "grad_norm": 25.058185577392578, "learning_rate": 4.894327107351028e-06, "loss": 0.5258, "step": 31300 }, { "epoch": 0.31, "grad_norm": 5.631418228149414, "learning_rate": 4.89420298489678e-06, "loss": 0.2038, "step": 31325 }, { "epoch": 0.31, "grad_norm": 28.752391815185547, "learning_rate": 4.8940788624425316e-06, "loss": 0.4326, "step": 31350 }, { "epoch": 0.31, "grad_norm": 10.587615013122559, "learning_rate": 4.893954739988283e-06, "loss": 0.2231, "step": 31375 }, { "epoch": 0.31, "grad_norm": 22.085416793823242, "learning_rate": 4.893830617534035e-06, "loss": 0.411, "step": 31400 }, { "epoch": 0.31, "grad_norm": 12.607439041137695, "learning_rate": 4.893706495079786e-06, "loss": 0.2205, "step": 31425 }, { "epoch": 0.31, "grad_norm": 32.6849365234375, "learning_rate": 4.893582372625538e-06, "loss": 0.4429, "step": 31450 }, { "epoch": 0.31, "grad_norm": 14.450944900512695, "learning_rate": 4.893458250171289e-06, "loss": 0.2038, "step": 31475 }, { "epoch": 0.31, "grad_norm": 18.544363021850586, "learning_rate": 4.893334127717041e-06, "loss": 0.5218, "step": 31500 }, { "epoch": 0.31, "grad_norm": 9.621307373046875, "learning_rate": 4.893210005262793e-06, "loss": 0.2355, "step": 31525 }, { "epoch": 0.31, "grad_norm": 16.47926902770996, "learning_rate": 4.893085882808545e-06, "loss": 0.458, "step": 31550 }, { "epoch": 0.31, "grad_norm": 18.771451950073242, "learning_rate": 4.892961760354296e-06, "loss": 0.2895, "step": 31575 }, { "epoch": 0.31, "grad_norm": 21.69240379333496, "learning_rate": 4.892837637900047e-06, "loss": 0.3942, "step": 31600 }, { "epoch": 0.31, "grad_norm": 12.488651275634766, "learning_rate": 4.892713515445799e-06, "loss": 0.2487, "step": 31625 }, { "epoch": 0.31, "grad_norm": 18.76475715637207, "learning_rate": 4.89258939299155e-06, "loss": 0.437, "step": 31650 }, { "epoch": 0.31, "grad_norm": 10.794158935546875, "learning_rate": 4.8924652705373016e-06, "loss": 0.3074, "step": 31675 }, { "epoch": 0.31, "grad_norm": 17.19283676147461, "learning_rate": 4.892341148083054e-06, "loss": 0.4225, "step": 31700 }, { "epoch": 0.31, "grad_norm": 14.975164413452148, "learning_rate": 4.892217025628805e-06, "loss": 0.2862, "step": 31725 }, { "epoch": 0.31, "grad_norm": 21.711851119995117, "learning_rate": 4.892092903174556e-06, "loss": 0.4557, "step": 31750 }, { "epoch": 0.31, "grad_norm": 9.737506866455078, "learning_rate": 4.891968780720307e-06, "loss": 0.2596, "step": 31775 }, { "epoch": 0.31, "grad_norm": 19.307641983032227, "learning_rate": 4.891844658266059e-06, "loss": 0.4714, "step": 31800 }, { "epoch": 0.31, "grad_norm": 13.541339874267578, "learning_rate": 4.8917205358118105e-06, "loss": 0.2655, "step": 31825 }, { "epoch": 0.31, "grad_norm": 24.484169006347656, "learning_rate": 4.8915964133575626e-06, "loss": 0.4861, "step": 31850 }, { "epoch": 0.31, "grad_norm": 9.926254272460938, "learning_rate": 4.891472290903314e-06, "loss": 0.2515, "step": 31875 }, { "epoch": 0.31, "grad_norm": 17.716936111450195, "learning_rate": 4.891348168449066e-06, "loss": 0.3695, "step": 31900 }, { "epoch": 0.31, "grad_norm": 15.83248519897461, "learning_rate": 4.891224045994817e-06, "loss": 0.2514, "step": 31925 }, { "epoch": 0.31, "grad_norm": 20.925662994384766, "learning_rate": 4.891099923540569e-06, "loss": 0.4077, "step": 31950 }, { "epoch": 0.31, "grad_norm": 10.205485343933105, "learning_rate": 4.89097580108632e-06, "loss": 0.2092, "step": 31975 }, { "epoch": 0.31, "grad_norm": 20.384462356567383, "learning_rate": 4.8908516786320715e-06, "loss": 0.5106, "step": 32000 }, { "epoch": 0.31, "grad_norm": 10.387514114379883, "learning_rate": 4.890727556177824e-06, "loss": 0.2177, "step": 32025 }, { "epoch": 0.32, "grad_norm": 16.379533767700195, "learning_rate": 4.890603433723575e-06, "loss": 0.4383, "step": 32050 }, { "epoch": 0.32, "grad_norm": 16.29486083984375, "learning_rate": 4.890479311269326e-06, "loss": 0.2537, "step": 32075 }, { "epoch": 0.32, "grad_norm": 19.6564884185791, "learning_rate": 4.890355188815078e-06, "loss": 0.442, "step": 32100 }, { "epoch": 0.32, "grad_norm": 9.314864158630371, "learning_rate": 4.890231066360829e-06, "loss": 0.2209, "step": 32125 }, { "epoch": 0.32, "grad_norm": 20.401992797851562, "learning_rate": 4.8901069439065805e-06, "loss": 0.4085, "step": 32150 }, { "epoch": 0.32, "grad_norm": 10.274618148803711, "learning_rate": 4.8899828214523326e-06, "loss": 0.208, "step": 32175 }, { "epoch": 0.32, "grad_norm": 15.45626163482666, "learning_rate": 4.889858698998084e-06, "loss": 0.4536, "step": 32200 }, { "epoch": 0.32, "grad_norm": 7.791018486022949, "learning_rate": 4.889734576543835e-06, "loss": 0.2531, "step": 32225 }, { "epoch": 0.32, "grad_norm": 20.296768188476562, "learning_rate": 4.889610454089587e-06, "loss": 0.4265, "step": 32250 }, { "epoch": 0.32, "grad_norm": 8.325069427490234, "learning_rate": 4.889486331635338e-06, "loss": 0.2097, "step": 32275 }, { "epoch": 0.32, "grad_norm": 20.939313888549805, "learning_rate": 4.88936220918109e-06, "loss": 0.5143, "step": 32300 }, { "epoch": 0.32, "grad_norm": 9.743058204650879, "learning_rate": 4.8892380867268415e-06, "loss": 0.2357, "step": 32325 }, { "epoch": 0.32, "grad_norm": 21.425029754638672, "learning_rate": 4.889113964272594e-06, "loss": 0.4797, "step": 32350 }, { "epoch": 0.32, "grad_norm": 12.696273803710938, "learning_rate": 4.888989841818345e-06, "loss": 0.2262, "step": 32375 }, { "epoch": 0.32, "grad_norm": 14.367252349853516, "learning_rate": 4.888865719364097e-06, "loss": 0.476, "step": 32400 }, { "epoch": 0.32, "grad_norm": 9.464071273803711, "learning_rate": 4.888741596909848e-06, "loss": 0.2616, "step": 32425 }, { "epoch": 0.32, "grad_norm": 17.24646759033203, "learning_rate": 4.888617474455599e-06, "loss": 0.3939, "step": 32450 }, { "epoch": 0.32, "grad_norm": 11.21302604675293, "learning_rate": 4.888493352001351e-06, "loss": 0.2816, "step": 32475 }, { "epoch": 0.32, "grad_norm": 18.130083084106445, "learning_rate": 4.8883692295471025e-06, "loss": 0.4259, "step": 32500 }, { "epoch": 0.32, "grad_norm": 10.955322265625, "learning_rate": 4.888245107092854e-06, "loss": 0.2664, "step": 32525 }, { "epoch": 0.32, "grad_norm": 19.145809173583984, "learning_rate": 4.888120984638606e-06, "loss": 0.4412, "step": 32550 }, { "epoch": 0.32, "grad_norm": 11.830008506774902, "learning_rate": 4.887996862184357e-06, "loss": 0.2578, "step": 32575 }, { "epoch": 0.32, "grad_norm": 18.33292007446289, "learning_rate": 4.887872739730108e-06, "loss": 0.4714, "step": 32600 }, { "epoch": 0.32, "grad_norm": 6.932930946350098, "learning_rate": 4.8877486172758594e-06, "loss": 0.2385, "step": 32625 }, { "epoch": 0.32, "grad_norm": 19.96645164489746, "learning_rate": 4.8876244948216115e-06, "loss": 0.3814, "step": 32650 }, { "epoch": 0.32, "grad_norm": 8.06302261352539, "learning_rate": 4.887500372367363e-06, "loss": 0.2169, "step": 32675 }, { "epoch": 0.32, "grad_norm": 31.562110900878906, "learning_rate": 4.887376249913115e-06, "loss": 0.4447, "step": 32700 }, { "epoch": 0.32, "grad_norm": 8.181037902832031, "learning_rate": 4.887252127458866e-06, "loss": 0.2621, "step": 32725 }, { "epoch": 0.32, "grad_norm": 16.73958396911621, "learning_rate": 4.887128005004618e-06, "loss": 0.4513, "step": 32750 }, { "epoch": 0.32, "grad_norm": 11.382159233093262, "learning_rate": 4.887003882550369e-06, "loss": 0.2434, "step": 32775 }, { "epoch": 0.32, "grad_norm": 17.365325927734375, "learning_rate": 4.886879760096121e-06, "loss": 0.4084, "step": 32800 }, { "epoch": 0.32, "grad_norm": 7.344062805175781, "learning_rate": 4.8867556376418725e-06, "loss": 0.2251, "step": 32825 }, { "epoch": 0.32, "grad_norm": 24.347299575805664, "learning_rate": 4.886631515187624e-06, "loss": 0.3693, "step": 32850 }, { "epoch": 0.32, "grad_norm": 5.235442638397217, "learning_rate": 4.886507392733376e-06, "loss": 0.2635, "step": 32875 }, { "epoch": 0.32, "grad_norm": 16.78791046142578, "learning_rate": 4.886383270279127e-06, "loss": 0.413, "step": 32900 }, { "epoch": 0.32, "grad_norm": 7.730693340301514, "learning_rate": 4.886259147824878e-06, "loss": 0.2102, "step": 32925 }, { "epoch": 0.32, "grad_norm": 21.03805160522461, "learning_rate": 4.88613502537063e-06, "loss": 0.4298, "step": 32950 }, { "epoch": 0.32, "grad_norm": 16.324060440063477, "learning_rate": 4.8860109029163815e-06, "loss": 0.229, "step": 32975 }, { "epoch": 0.32, "grad_norm": 14.368544578552246, "learning_rate": 4.885886780462133e-06, "loss": 0.4728, "step": 33000 }, { "epoch": 0.32, "grad_norm": 11.374330520629883, "learning_rate": 4.885762658007885e-06, "loss": 0.2887, "step": 33025 }, { "epoch": 0.32, "grad_norm": 3.3956761360168457, "learning_rate": 4.885638535553636e-06, "loss": 0.3966, "step": 33050 }, { "epoch": 0.33, "grad_norm": 11.738930702209473, "learning_rate": 4.885514413099387e-06, "loss": 0.2127, "step": 33075 }, { "epoch": 0.33, "grad_norm": 16.71782112121582, "learning_rate": 4.885390290645139e-06, "loss": 0.4428, "step": 33100 }, { "epoch": 0.33, "grad_norm": 11.758820533752441, "learning_rate": 4.8852661681908905e-06, "loss": 0.2396, "step": 33125 }, { "epoch": 0.33, "grad_norm": 20.99911117553711, "learning_rate": 4.8851420457366425e-06, "loss": 0.4533, "step": 33150 }, { "epoch": 0.33, "grad_norm": 12.666563034057617, "learning_rate": 4.885017923282394e-06, "loss": 0.3143, "step": 33175 }, { "epoch": 0.33, "grad_norm": 9.02462387084961, "learning_rate": 4.884893800828146e-06, "loss": 0.3876, "step": 33200 }, { "epoch": 0.33, "grad_norm": 8.895522117614746, "learning_rate": 4.884769678373897e-06, "loss": 0.2822, "step": 33225 }, { "epoch": 0.33, "grad_norm": 14.76952075958252, "learning_rate": 4.884645555919649e-06, "loss": 0.3772, "step": 33250 }, { "epoch": 0.33, "grad_norm": 9.470182418823242, "learning_rate": 4.8845214334654e-06, "loss": 0.262, "step": 33275 }, { "epoch": 0.33, "grad_norm": 31.18211555480957, "learning_rate": 4.884402275909321e-06, "loss": 0.493, "step": 33300 }, { "epoch": 0.33, "grad_norm": 8.555150985717773, "learning_rate": 4.884278153455073e-06, "loss": 0.2343, "step": 33325 }, { "epoch": 0.33, "grad_norm": 19.37961769104004, "learning_rate": 4.8841540310008246e-06, "loss": 0.4063, "step": 33350 }, { "epoch": 0.33, "grad_norm": 11.869834899902344, "learning_rate": 4.884029908546576e-06, "loss": 0.2431, "step": 33375 }, { "epoch": 0.33, "grad_norm": 23.434165954589844, "learning_rate": 4.883905786092328e-06, "loss": 0.4362, "step": 33400 }, { "epoch": 0.33, "grad_norm": 15.037479400634766, "learning_rate": 4.883781663638079e-06, "loss": 0.2752, "step": 33425 }, { "epoch": 0.33, "grad_norm": 20.35945701599121, "learning_rate": 4.88365754118383e-06, "loss": 0.4385, "step": 33450 }, { "epoch": 0.33, "grad_norm": 8.451273918151855, "learning_rate": 4.883533418729582e-06, "loss": 0.2143, "step": 33475 }, { "epoch": 0.33, "grad_norm": 20.1047306060791, "learning_rate": 4.8834092962753335e-06, "loss": 0.5059, "step": 33500 }, { "epoch": 0.33, "grad_norm": 9.175705909729004, "learning_rate": 4.883285173821085e-06, "loss": 0.2582, "step": 33525 }, { "epoch": 0.33, "grad_norm": 23.0572452545166, "learning_rate": 4.883161051366837e-06, "loss": 0.4232, "step": 33550 }, { "epoch": 0.33, "grad_norm": 11.696513175964355, "learning_rate": 4.883036928912588e-06, "loss": 0.2478, "step": 33575 }, { "epoch": 0.33, "grad_norm": 19.635473251342773, "learning_rate": 4.88291280645834e-06, "loss": 0.4456, "step": 33600 }, { "epoch": 0.33, "grad_norm": 9.202723503112793, "learning_rate": 4.882788684004091e-06, "loss": 0.2078, "step": 33625 }, { "epoch": 0.33, "grad_norm": 24.100383758544922, "learning_rate": 4.882664561549843e-06, "loss": 0.4582, "step": 33650 }, { "epoch": 0.33, "grad_norm": 11.273261070251465, "learning_rate": 4.8825404390955945e-06, "loss": 0.2456, "step": 33675 }, { "epoch": 0.33, "grad_norm": 16.87611961364746, "learning_rate": 4.882416316641347e-06, "loss": 0.491, "step": 33700 }, { "epoch": 0.33, "grad_norm": 5.664421558380127, "learning_rate": 4.882292194187098e-06, "loss": 0.2051, "step": 33725 }, { "epoch": 0.33, "grad_norm": 19.234724044799805, "learning_rate": 4.882168071732849e-06, "loss": 0.4113, "step": 33750 }, { "epoch": 0.33, "grad_norm": 9.27294635772705, "learning_rate": 4.882043949278601e-06, "loss": 0.2475, "step": 33775 }, { "epoch": 0.33, "grad_norm": 11.192533493041992, "learning_rate": 4.881919826824352e-06, "loss": 0.4147, "step": 33800 }, { "epoch": 0.33, "grad_norm": 16.093074798583984, "learning_rate": 4.8817957043701035e-06, "loss": 0.2225, "step": 33825 }, { "epoch": 0.33, "grad_norm": 20.077917098999023, "learning_rate": 4.8816715819158556e-06, "loss": 0.4994, "step": 33850 }, { "epoch": 0.33, "grad_norm": 13.216206550598145, "learning_rate": 4.881547459461607e-06, "loss": 0.2262, "step": 33875 }, { "epoch": 0.33, "grad_norm": 21.833314895629883, "learning_rate": 4.881423337007358e-06, "loss": 0.4576, "step": 33900 }, { "epoch": 0.33, "grad_norm": 12.850839614868164, "learning_rate": 4.88129921455311e-06, "loss": 0.2394, "step": 33925 }, { "epoch": 0.33, "grad_norm": 14.77197551727295, "learning_rate": 4.881175092098861e-06, "loss": 0.4053, "step": 33950 }, { "epoch": 0.33, "grad_norm": 15.045121192932129, "learning_rate": 4.8810509696446125e-06, "loss": 0.2865, "step": 33975 }, { "epoch": 0.33, "grad_norm": 25.62055778503418, "learning_rate": 4.8809268471903645e-06, "loss": 0.4995, "step": 34000 }, { "epoch": 0.33, "grad_norm": 8.531721115112305, "learning_rate": 4.880802724736116e-06, "loss": 0.2529, "step": 34025 }, { "epoch": 0.33, "grad_norm": 20.9290714263916, "learning_rate": 4.880678602281868e-06, "loss": 0.4594, "step": 34050 }, { "epoch": 0.34, "grad_norm": 14.642382621765137, "learning_rate": 4.880554479827619e-06, "loss": 0.2656, "step": 34075 }, { "epoch": 0.34, "grad_norm": 22.593358993530273, "learning_rate": 4.880430357373371e-06, "loss": 0.5026, "step": 34100 }, { "epoch": 0.34, "grad_norm": 8.131956100463867, "learning_rate": 4.880306234919122e-06, "loss": 0.2812, "step": 34125 }, { "epoch": 0.34, "grad_norm": 10.792276382446289, "learning_rate": 4.8801821124648735e-06, "loss": 0.3923, "step": 34150 }, { "epoch": 0.34, "grad_norm": 10.117814064025879, "learning_rate": 4.8800579900106256e-06, "loss": 0.255, "step": 34175 }, { "epoch": 0.34, "grad_norm": 15.612272262573242, "learning_rate": 4.879933867556377e-06, "loss": 0.477, "step": 34200 }, { "epoch": 0.34, "grad_norm": 6.148955345153809, "learning_rate": 4.879809745102128e-06, "loss": 0.1984, "step": 34225 }, { "epoch": 0.34, "grad_norm": 19.504844665527344, "learning_rate": 4.87968562264788e-06, "loss": 0.4713, "step": 34250 }, { "epoch": 0.34, "grad_norm": 10.28429889678955, "learning_rate": 4.879561500193631e-06, "loss": 0.2892, "step": 34275 }, { "epoch": 0.34, "grad_norm": 14.733054161071777, "learning_rate": 4.8794373777393825e-06, "loss": 0.3744, "step": 34300 }, { "epoch": 0.34, "grad_norm": 7.868514537811279, "learning_rate": 4.8793132552851345e-06, "loss": 0.205, "step": 34325 }, { "epoch": 0.34, "grad_norm": 18.12238121032715, "learning_rate": 4.879189132830886e-06, "loss": 0.4047, "step": 34350 }, { "epoch": 0.34, "grad_norm": 10.429434776306152, "learning_rate": 4.879065010376638e-06, "loss": 0.2251, "step": 34375 }, { "epoch": 0.34, "grad_norm": 28.4522647857666, "learning_rate": 4.878940887922389e-06, "loss": 0.53, "step": 34400 }, { "epoch": 0.34, "grad_norm": 12.630537033081055, "learning_rate": 4.878816765468141e-06, "loss": 0.2667, "step": 34425 }, { "epoch": 0.34, "grad_norm": 26.81945037841797, "learning_rate": 4.878692643013892e-06, "loss": 0.4344, "step": 34450 }, { "epoch": 0.34, "grad_norm": 8.98192310333252, "learning_rate": 4.878568520559644e-06, "loss": 0.2544, "step": 34475 }, { "epoch": 0.34, "grad_norm": 20.93459701538086, "learning_rate": 4.8784443981053955e-06, "loss": 0.3985, "step": 34500 }, { "epoch": 0.34, "grad_norm": 6.448456764221191, "learning_rate": 4.878320275651147e-06, "loss": 0.1801, "step": 34525 }, { "epoch": 0.34, "grad_norm": 19.282743453979492, "learning_rate": 4.878196153196899e-06, "loss": 0.4022, "step": 34550 }, { "epoch": 0.34, "grad_norm": 10.993169784545898, "learning_rate": 4.87807203074265e-06, "loss": 0.1998, "step": 34575 }, { "epoch": 0.34, "grad_norm": 20.03145408630371, "learning_rate": 4.877947908288401e-06, "loss": 0.4827, "step": 34600 }, { "epoch": 0.34, "grad_norm": 13.389777183532715, "learning_rate": 4.877823785834153e-06, "loss": 0.2588, "step": 34625 }, { "epoch": 0.34, "grad_norm": 22.675891876220703, "learning_rate": 4.8776996633799045e-06, "loss": 0.4396, "step": 34650 }, { "epoch": 0.34, "grad_norm": 24.66277503967285, "learning_rate": 4.877575540925656e-06, "loss": 0.2264, "step": 34675 }, { "epoch": 0.34, "grad_norm": 18.279651641845703, "learning_rate": 4.877451418471408e-06, "loss": 0.4393, "step": 34700 }, { "epoch": 0.34, "grad_norm": 10.722000122070312, "learning_rate": 4.877327296017159e-06, "loss": 0.2916, "step": 34725 }, { "epoch": 0.34, "grad_norm": 23.46078872680664, "learning_rate": 4.87720317356291e-06, "loss": 0.509, "step": 34750 }, { "epoch": 0.34, "grad_norm": 12.847780227661133, "learning_rate": 4.877079051108662e-06, "loss": 0.2485, "step": 34775 }, { "epoch": 0.34, "grad_norm": 14.902559280395508, "learning_rate": 4.8769549286544135e-06, "loss": 0.4563, "step": 34800 }, { "epoch": 0.34, "grad_norm": 7.009099006652832, "learning_rate": 4.8768308062001655e-06, "loss": 0.2154, "step": 34825 }, { "epoch": 0.34, "grad_norm": 18.34128761291504, "learning_rate": 4.876706683745917e-06, "loss": 0.5036, "step": 34850 }, { "epoch": 0.34, "grad_norm": 9.17263412475586, "learning_rate": 4.876582561291669e-06, "loss": 0.2612, "step": 34875 }, { "epoch": 0.34, "grad_norm": 8.398309707641602, "learning_rate": 4.87645843883742e-06, "loss": 0.479, "step": 34900 }, { "epoch": 0.34, "grad_norm": 15.596166610717773, "learning_rate": 4.876334316383171e-06, "loss": 0.2445, "step": 34925 }, { "epoch": 0.34, "grad_norm": 22.1002197265625, "learning_rate": 4.876210193928923e-06, "loss": 0.4786, "step": 34950 }, { "epoch": 0.34, "grad_norm": 15.442193984985352, "learning_rate": 4.8760860714746745e-06, "loss": 0.2412, "step": 34975 }, { "epoch": 0.34, "grad_norm": 20.005069732666016, "learning_rate": 4.875961949020426e-06, "loss": 0.435, "step": 35000 }, { "epoch": 0.34, "grad_norm": 12.996124267578125, "learning_rate": 4.875837826566178e-06, "loss": 0.2562, "step": 35025 }, { "epoch": 0.34, "grad_norm": 21.996685028076172, "learning_rate": 4.875713704111929e-06, "loss": 0.4218, "step": 35050 }, { "epoch": 0.34, "grad_norm": 9.84152603149414, "learning_rate": 4.87558958165768e-06, "loss": 0.2616, "step": 35075 }, { "epoch": 0.35, "grad_norm": 14.983373641967773, "learning_rate": 4.875465459203432e-06, "loss": 0.4321, "step": 35100 }, { "epoch": 0.35, "grad_norm": 11.60263442993164, "learning_rate": 4.8753413367491834e-06, "loss": 0.2327, "step": 35125 }, { "epoch": 0.35, "grad_norm": 22.131458282470703, "learning_rate": 4.875217214294935e-06, "loss": 0.514, "step": 35150 }, { "epoch": 0.35, "grad_norm": 13.218733787536621, "learning_rate": 4.875093091840687e-06, "loss": 0.2304, "step": 35175 }, { "epoch": 0.35, "grad_norm": 22.124448776245117, "learning_rate": 4.874968969386438e-06, "loss": 0.4479, "step": 35200 }, { "epoch": 0.35, "grad_norm": 10.314848899841309, "learning_rate": 4.87484484693219e-06, "loss": 0.2731, "step": 35225 }, { "epoch": 0.35, "grad_norm": 18.029987335205078, "learning_rate": 4.874720724477941e-06, "loss": 0.464, "step": 35250 }, { "epoch": 0.35, "grad_norm": 9.950128555297852, "learning_rate": 4.874596602023693e-06, "loss": 0.2283, "step": 35275 }, { "epoch": 0.35, "grad_norm": 15.309113502502441, "learning_rate": 4.8744724795694445e-06, "loss": 0.4669, "step": 35300 }, { "epoch": 0.35, "grad_norm": 10.054605484008789, "learning_rate": 4.8743483571151965e-06, "loss": 0.2705, "step": 35325 }, { "epoch": 0.35, "grad_norm": Infinity, "learning_rate": 4.8742291995591176e-06, "loss": 0.4226, "step": 35350 }, { "epoch": 0.35, "grad_norm": 8.667949676513672, "learning_rate": 4.87410507710487e-06, "loss": 0.2677, "step": 35375 }, { "epoch": 0.35, "grad_norm": 19.41878890991211, "learning_rate": 4.873980954650621e-06, "loss": 0.4238, "step": 35400 }, { "epoch": 0.35, "grad_norm": 13.023319244384766, "learning_rate": 4.873856832196372e-06, "loss": 0.2501, "step": 35425 }, { "epoch": 0.35, "grad_norm": 15.57355785369873, "learning_rate": 4.873732709742123e-06, "loss": 0.4809, "step": 35450 }, { "epoch": 0.35, "grad_norm": 10.158863067626953, "learning_rate": 4.873608587287875e-06, "loss": 0.2515, "step": 35475 }, { "epoch": 0.35, "grad_norm": 17.693679809570312, "learning_rate": 4.8734844648336265e-06, "loss": 0.5096, "step": 35500 }, { "epoch": 0.35, "grad_norm": 15.27641773223877, "learning_rate": 4.873360342379378e-06, "loss": 0.2456, "step": 35525 }, { "epoch": 0.35, "grad_norm": 29.165929794311523, "learning_rate": 4.87323621992513e-06, "loss": 0.4745, "step": 35550 }, { "epoch": 0.35, "grad_norm": 7.183991432189941, "learning_rate": 4.873112097470881e-06, "loss": 0.2499, "step": 35575 }, { "epoch": 0.35, "grad_norm": 16.674047470092773, "learning_rate": 4.872987975016632e-06, "loss": 0.3818, "step": 35600 }, { "epoch": 0.35, "grad_norm": 5.9982171058654785, "learning_rate": 4.872863852562384e-06, "loss": 0.2163, "step": 35625 }, { "epoch": 0.35, "grad_norm": 16.456655502319336, "learning_rate": 4.8727397301081355e-06, "loss": 0.4484, "step": 35650 }, { "epoch": 0.35, "grad_norm": 11.30372428894043, "learning_rate": 4.8726156076538875e-06, "loss": 0.2358, "step": 35675 }, { "epoch": 0.35, "grad_norm": 24.091157913208008, "learning_rate": 4.872491485199639e-06, "loss": 0.4232, "step": 35700 }, { "epoch": 0.35, "grad_norm": 8.650962829589844, "learning_rate": 4.872367362745391e-06, "loss": 0.2473, "step": 35725 }, { "epoch": 0.35, "grad_norm": 17.820295333862305, "learning_rate": 4.872243240291142e-06, "loss": 0.4611, "step": 35750 }, { "epoch": 0.35, "grad_norm": 13.541815757751465, "learning_rate": 4.872119117836894e-06, "loss": 0.2417, "step": 35775 }, { "epoch": 0.35, "grad_norm": 17.00164031982422, "learning_rate": 4.871994995382645e-06, "loss": 0.4999, "step": 35800 }, { "epoch": 0.35, "grad_norm": 14.84703540802002, "learning_rate": 4.8718708729283965e-06, "loss": 0.2301, "step": 35825 }, { "epoch": 0.35, "grad_norm": 16.09697723388672, "learning_rate": 4.8717467504741486e-06, "loss": 0.4192, "step": 35850 }, { "epoch": 0.35, "grad_norm": 15.807671546936035, "learning_rate": 4.8716226280199e-06, "loss": 0.2339, "step": 35875 }, { "epoch": 0.35, "grad_norm": 22.74229621887207, "learning_rate": 4.871498505565651e-06, "loss": 0.4144, "step": 35900 }, { "epoch": 0.35, "grad_norm": 16.8558349609375, "learning_rate": 4.871374383111403e-06, "loss": 0.2359, "step": 35925 }, { "epoch": 0.35, "grad_norm": 19.890033721923828, "learning_rate": 4.871250260657154e-06, "loss": 0.5355, "step": 35950 }, { "epoch": 0.35, "grad_norm": 4.722134590148926, "learning_rate": 4.8711261382029055e-06, "loss": 0.1697, "step": 35975 }, { "epoch": 0.35, "grad_norm": 20.994211196899414, "learning_rate": 4.8710020157486575e-06, "loss": 0.3954, "step": 36000 }, { "epoch": 0.35, "grad_norm": 12.015830993652344, "learning_rate": 4.870877893294409e-06, "loss": 0.2418, "step": 36025 }, { "epoch": 0.35, "grad_norm": 25.985204696655273, "learning_rate": 4.87075377084016e-06, "loss": 0.3886, "step": 36050 }, { "epoch": 0.35, "grad_norm": 5.544332981109619, "learning_rate": 4.870629648385912e-06, "loss": 0.2226, "step": 36075 }, { "epoch": 0.35, "grad_norm": 17.91901969909668, "learning_rate": 4.870505525931663e-06, "loss": 0.487, "step": 36100 }, { "epoch": 0.36, "grad_norm": 9.29063606262207, "learning_rate": 4.870381403477415e-06, "loss": 0.2211, "step": 36125 }, { "epoch": 0.36, "grad_norm": 22.591331481933594, "learning_rate": 4.8702572810231665e-06, "loss": 0.4984, "step": 36150 }, { "epoch": 0.36, "grad_norm": 7.492972373962402, "learning_rate": 4.8701331585689185e-06, "loss": 0.2062, "step": 36175 }, { "epoch": 0.36, "grad_norm": 14.036580085754395, "learning_rate": 4.87000903611467e-06, "loss": 0.4487, "step": 36200 }, { "epoch": 0.36, "grad_norm": 7.270763397216797, "learning_rate": 4.869884913660422e-06, "loss": 0.238, "step": 36225 }, { "epoch": 0.36, "grad_norm": 23.007265090942383, "learning_rate": 4.869760791206173e-06, "loss": 0.5084, "step": 36250 }, { "epoch": 0.36, "grad_norm": 8.0201416015625, "learning_rate": 4.869636668751924e-06, "loss": 0.2479, "step": 36275 }, { "epoch": 0.36, "grad_norm": 12.970367431640625, "learning_rate": 4.8695125462976754e-06, "loss": 0.469, "step": 36300 }, { "epoch": 0.36, "grad_norm": 13.622200012207031, "learning_rate": 4.8693884238434275e-06, "loss": 0.2806, "step": 36325 }, { "epoch": 0.36, "grad_norm": 13.061184883117676, "learning_rate": 4.869264301389179e-06, "loss": 0.3647, "step": 36350 }, { "epoch": 0.36, "grad_norm": 10.327747344970703, "learning_rate": 4.86914017893493e-06, "loss": 0.2171, "step": 36375 }, { "epoch": 0.36, "grad_norm": 18.184717178344727, "learning_rate": 4.869016056480682e-06, "loss": 0.4282, "step": 36400 }, { "epoch": 0.36, "grad_norm": 7.318487167358398, "learning_rate": 4.868891934026433e-06, "loss": 0.2304, "step": 36425 }, { "epoch": 0.36, "grad_norm": 21.072738647460938, "learning_rate": 4.868767811572184e-06, "loss": 0.4628, "step": 36450 }, { "epoch": 0.36, "grad_norm": 9.488296508789062, "learning_rate": 4.8686436891179365e-06, "loss": 0.2503, "step": 36475 }, { "epoch": 0.36, "grad_norm": 13.464424133300781, "learning_rate": 4.868519566663688e-06, "loss": 0.4108, "step": 36500 }, { "epoch": 0.36, "grad_norm": 10.414560317993164, "learning_rate": 4.86839544420944e-06, "loss": 0.2407, "step": 36525 }, { "epoch": 0.36, "grad_norm": 12.992931365966797, "learning_rate": 4.868271321755191e-06, "loss": 0.4721, "step": 36550 }, { "epoch": 0.36, "grad_norm": 6.5915398597717285, "learning_rate": 4.868147199300943e-06, "loss": 0.2131, "step": 36575 }, { "epoch": 0.36, "grad_norm": 21.60256004333496, "learning_rate": 4.868023076846694e-06, "loss": 0.4104, "step": 36600 }, { "epoch": 0.36, "grad_norm": 6.420774459838867, "learning_rate": 4.867898954392446e-06, "loss": 0.2379, "step": 36625 }, { "epoch": 0.36, "grad_norm": 24.271690368652344, "learning_rate": 4.8677748319381975e-06, "loss": 0.4589, "step": 36650 }, { "epoch": 0.36, "grad_norm": 5.273845672607422, "learning_rate": 4.867650709483949e-06, "loss": 0.2374, "step": 36675 }, { "epoch": 0.36, "grad_norm": 21.226696014404297, "learning_rate": 4.867526587029701e-06, "loss": 0.5474, "step": 36700 }, { "epoch": 0.36, "grad_norm": 10.693513870239258, "learning_rate": 4.867402464575452e-06, "loss": 0.2733, "step": 36725 }, { "epoch": 0.36, "grad_norm": 16.23636817932129, "learning_rate": 4.867278342121203e-06, "loss": 0.4506, "step": 36750 }, { "epoch": 0.36, "grad_norm": 10.974020957946777, "learning_rate": 4.867154219666955e-06, "loss": 0.2429, "step": 36775 }, { "epoch": 0.36, "grad_norm": 11.248056411743164, "learning_rate": 4.8670300972127065e-06, "loss": 0.4216, "step": 36800 }, { "epoch": 0.36, "grad_norm": 7.152559757232666, "learning_rate": 4.866905974758458e-06, "loss": 0.2026, "step": 36825 }, { "epoch": 0.36, "grad_norm": 20.33271598815918, "learning_rate": 4.86678185230421e-06, "loss": 0.4575, "step": 36850 }, { "epoch": 0.36, "grad_norm": 7.325585842132568, "learning_rate": 4.866657729849961e-06, "loss": 0.2563, "step": 36875 }, { "epoch": 0.36, "grad_norm": 18.28515625, "learning_rate": 4.866533607395712e-06, "loss": 0.3844, "step": 36900 }, { "epoch": 0.36, "grad_norm": 13.073737144470215, "learning_rate": 4.866409484941464e-06, "loss": 0.2266, "step": 36925 }, { "epoch": 0.36, "grad_norm": 19.57931900024414, "learning_rate": 4.866285362487215e-06, "loss": 0.4734, "step": 36950 }, { "epoch": 0.36, "grad_norm": 9.298791885375977, "learning_rate": 4.8661612400329675e-06, "loss": 0.192, "step": 36975 }, { "epoch": 0.36, "grad_norm": 15.567995071411133, "learning_rate": 4.866037117578719e-06, "loss": 0.4844, "step": 37000 }, { "epoch": 0.36, "grad_norm": 12.90270709991455, "learning_rate": 4.865912995124471e-06, "loss": 0.3011, "step": 37025 }, { "epoch": 0.36, "grad_norm": 17.70840072631836, "learning_rate": 4.865788872670222e-06, "loss": 0.4267, "step": 37050 }, { "epoch": 0.36, "grad_norm": 11.254528999328613, "learning_rate": 4.865664750215974e-06, "loss": 0.2262, "step": 37075 }, { "epoch": 0.36, "grad_norm": 15.437673568725586, "learning_rate": 4.865540627761725e-06, "loss": 0.4039, "step": 37100 }, { "epoch": 0.37, "grad_norm": 10.7083740234375, "learning_rate": 4.8654165053074764e-06, "loss": 0.2311, "step": 37125 }, { "epoch": 0.37, "grad_norm": 19.854190826416016, "learning_rate": 4.865297347751398e-06, "loss": 0.4012, "step": 37150 }, { "epoch": 0.37, "grad_norm": 21.475191116333008, "learning_rate": 4.8651732252971495e-06, "loss": 0.2416, "step": 37175 }, { "epoch": 0.37, "grad_norm": 32.630069732666016, "learning_rate": 4.865049102842901e-06, "loss": 0.5327, "step": 37200 }, { "epoch": 0.37, "grad_norm": 12.919622421264648, "learning_rate": 4.864924980388653e-06, "loss": 0.2453, "step": 37225 }, { "epoch": 0.37, "grad_norm": 18.346654891967773, "learning_rate": 4.864800857934404e-06, "loss": 0.4188, "step": 37250 }, { "epoch": 0.37, "grad_norm": 13.908652305603027, "learning_rate": 4.864676735480155e-06, "loss": 0.2554, "step": 37275 }, { "epoch": 0.37, "grad_norm": 10.847558975219727, "learning_rate": 4.864552613025907e-06, "loss": 0.383, "step": 37300 }, { "epoch": 0.37, "grad_norm": 12.836614608764648, "learning_rate": 4.8644284905716585e-06, "loss": 0.2596, "step": 37325 }, { "epoch": 0.37, "grad_norm": 18.27206039428711, "learning_rate": 4.8643043681174105e-06, "loss": 0.4698, "step": 37350 }, { "epoch": 0.37, "grad_norm": 4.840401649475098, "learning_rate": 4.864180245663162e-06, "loss": 0.2134, "step": 37375 }, { "epoch": 0.37, "grad_norm": 28.433170318603516, "learning_rate": 4.864056123208914e-06, "loss": 0.4454, "step": 37400 }, { "epoch": 0.37, "grad_norm": 7.174315452575684, "learning_rate": 4.863932000754665e-06, "loss": 0.2529, "step": 37425 }, { "epoch": 0.37, "grad_norm": 15.003414154052734, "learning_rate": 4.863807878300417e-06, "loss": 0.4523, "step": 37450 }, { "epoch": 0.37, "grad_norm": 7.622563362121582, "learning_rate": 4.863683755846168e-06, "loss": 0.2619, "step": 37475 }, { "epoch": 0.37, "grad_norm": 21.650842666625977, "learning_rate": 4.8635596333919195e-06, "loss": 0.4185, "step": 37500 }, { "epoch": 0.37, "grad_norm": 9.392125129699707, "learning_rate": 4.8634355109376716e-06, "loss": 0.2458, "step": 37525 }, { "epoch": 0.37, "grad_norm": 17.876985549926758, "learning_rate": 4.863311388483423e-06, "loss": 0.4016, "step": 37550 }, { "epoch": 0.37, "grad_norm": 12.87895679473877, "learning_rate": 4.863187266029174e-06, "loss": 0.2762, "step": 37575 }, { "epoch": 0.37, "grad_norm": 14.950977325439453, "learning_rate": 4.863063143574926e-06, "loss": 0.4487, "step": 37600 }, { "epoch": 0.37, "grad_norm": 6.37252950668335, "learning_rate": 4.862939021120677e-06, "loss": 0.218, "step": 37625 }, { "epoch": 0.37, "grad_norm": 19.808021545410156, "learning_rate": 4.8628148986664285e-06, "loss": 0.4681, "step": 37650 }, { "epoch": 0.37, "grad_norm": 10.693982124328613, "learning_rate": 4.8626907762121805e-06, "loss": 0.2059, "step": 37675 }, { "epoch": 0.37, "grad_norm": 15.55998706817627, "learning_rate": 4.862566653757932e-06, "loss": 0.4452, "step": 37700 }, { "epoch": 0.37, "grad_norm": 16.244701385498047, "learning_rate": 4.862442531303683e-06, "loss": 0.2532, "step": 37725 }, { "epoch": 0.37, "grad_norm": 19.60542869567871, "learning_rate": 4.862318408849435e-06, "loss": 0.4526, "step": 37750 }, { "epoch": 0.37, "grad_norm": 14.235875129699707, "learning_rate": 4.862194286395186e-06, "loss": 0.2652, "step": 37775 }, { "epoch": 0.37, "grad_norm": 15.730254173278809, "learning_rate": 4.862070163940938e-06, "loss": 0.4513, "step": 37800 }, { "epoch": 0.37, "grad_norm": 5.097680568695068, "learning_rate": 4.8619460414866895e-06, "loss": 0.2493, "step": 37825 }, { "epoch": 0.37, "grad_norm": 15.635416030883789, "learning_rate": 4.8618219190324415e-06, "loss": 0.4202, "step": 37850 }, { "epoch": 0.37, "grad_norm": 12.400949478149414, "learning_rate": 4.861697796578193e-06, "loss": 0.2754, "step": 37875 }, { "epoch": 0.37, "grad_norm": 15.488780975341797, "learning_rate": 4.861573674123944e-06, "loss": 0.4633, "step": 37900 }, { "epoch": 0.37, "grad_norm": 13.03145980834961, "learning_rate": 4.861449551669696e-06, "loss": 0.2458, "step": 37925 }, { "epoch": 0.37, "grad_norm": 15.327569961547852, "learning_rate": 4.861325429215447e-06, "loss": 0.4628, "step": 37950 }, { "epoch": 0.37, "grad_norm": 7.730944633483887, "learning_rate": 4.8612013067611985e-06, "loss": 0.2455, "step": 37975 }, { "epoch": 0.37, "grad_norm": 17.614084243774414, "learning_rate": 4.8610771843069505e-06, "loss": 0.5007, "step": 38000 }, { "epoch": 0.37, "grad_norm": 10.312545776367188, "learning_rate": 4.860953061852702e-06, "loss": 0.2162, "step": 38025 }, { "epoch": 0.37, "grad_norm": 22.105180740356445, "learning_rate": 4.860828939398453e-06, "loss": 0.3841, "step": 38050 }, { "epoch": 0.37, "grad_norm": 11.018665313720703, "learning_rate": 4.860704816944205e-06, "loss": 0.2221, "step": 38075 }, { "epoch": 0.37, "grad_norm": 15.697999954223633, "learning_rate": 4.860580694489956e-06, "loss": 0.4325, "step": 38100 }, { "epoch": 0.37, "grad_norm": 13.077972412109375, "learning_rate": 4.860456572035707e-06, "loss": 0.2334, "step": 38125 }, { "epoch": 0.38, "grad_norm": 19.472700119018555, "learning_rate": 4.8603324495814595e-06, "loss": 0.457, "step": 38150 }, { "epoch": 0.38, "grad_norm": 14.673176765441895, "learning_rate": 4.860208327127211e-06, "loss": 0.2182, "step": 38175 }, { "epoch": 0.38, "grad_norm": 15.412910461425781, "learning_rate": 4.860084204672963e-06, "loss": 0.429, "step": 38200 }, { "epoch": 0.38, "grad_norm": 11.843524932861328, "learning_rate": 4.859960082218714e-06, "loss": 0.2439, "step": 38225 }, { "epoch": 0.38, "grad_norm": 15.817419052124023, "learning_rate": 4.859835959764466e-06, "loss": 0.3704, "step": 38250 }, { "epoch": 0.38, "grad_norm": 9.621947288513184, "learning_rate": 4.859711837310217e-06, "loss": 0.2289, "step": 38275 }, { "epoch": 0.38, "grad_norm": 28.848066329956055, "learning_rate": 4.859587714855969e-06, "loss": 0.4907, "step": 38300 }, { "epoch": 0.38, "grad_norm": 8.27892017364502, "learning_rate": 4.8594635924017205e-06, "loss": 0.2489, "step": 38325 }, { "epoch": 0.38, "grad_norm": 20.720157623291016, "learning_rate": 4.859339469947472e-06, "loss": 0.4047, "step": 38350 }, { "epoch": 0.38, "grad_norm": 7.519328594207764, "learning_rate": 4.859215347493224e-06, "loss": 0.2417, "step": 38375 }, { "epoch": 0.38, "grad_norm": 21.148422241210938, "learning_rate": 4.859091225038975e-06, "loss": 0.497, "step": 38400 }, { "epoch": 0.38, "grad_norm": 9.144061088562012, "learning_rate": 4.858967102584726e-06, "loss": 0.1999, "step": 38425 }, { "epoch": 0.38, "grad_norm": 26.90660285949707, "learning_rate": 4.858842980130478e-06, "loss": 0.5077, "step": 38450 }, { "epoch": 0.38, "grad_norm": 14.561205863952637, "learning_rate": 4.8587188576762295e-06, "loss": 0.2625, "step": 38475 }, { "epoch": 0.38, "grad_norm": 22.843591690063477, "learning_rate": 4.858594735221981e-06, "loss": 0.4585, "step": 38500 }, { "epoch": 0.38, "grad_norm": 11.705921173095703, "learning_rate": 4.858470612767733e-06, "loss": 0.2114, "step": 38525 }, { "epoch": 0.38, "grad_norm": 20.584203720092773, "learning_rate": 4.858346490313484e-06, "loss": 0.4621, "step": 38550 }, { "epoch": 0.38, "grad_norm": 8.004617691040039, "learning_rate": 4.858222367859235e-06, "loss": 0.1695, "step": 38575 }, { "epoch": 0.38, "grad_norm": 20.858800888061523, "learning_rate": 4.858098245404987e-06, "loss": 0.479, "step": 38600 }, { "epoch": 0.38, "grad_norm": 9.779375076293945, "learning_rate": 4.857974122950738e-06, "loss": 0.2519, "step": 38625 }, { "epoch": 0.38, "grad_norm": 12.601141929626465, "learning_rate": 4.8578500004964905e-06, "loss": 0.4347, "step": 38650 }, { "epoch": 0.38, "grad_norm": 9.41529655456543, "learning_rate": 4.857725878042242e-06, "loss": 0.1954, "step": 38675 }, { "epoch": 0.38, "grad_norm": 23.92009735107422, "learning_rate": 4.857601755587994e-06, "loss": 0.4293, "step": 38700 }, { "epoch": 0.38, "grad_norm": 12.507072448730469, "learning_rate": 4.857477633133745e-06, "loss": 0.2291, "step": 38725 }, { "epoch": 0.38, "grad_norm": 18.374441146850586, "learning_rate": 4.857353510679496e-06, "loss": 0.4975, "step": 38750 }, { "epoch": 0.38, "grad_norm": 13.144401550292969, "learning_rate": 4.857229388225248e-06, "loss": 0.2182, "step": 38775 }, { "epoch": 0.38, "grad_norm": 17.719682693481445, "learning_rate": 4.8571052657709994e-06, "loss": 0.4735, "step": 38800 }, { "epoch": 0.38, "grad_norm": 7.644373416900635, "learning_rate": 4.856981143316751e-06, "loss": 0.2183, "step": 38825 }, { "epoch": 0.38, "grad_norm": 12.848651885986328, "learning_rate": 4.856857020862503e-06, "loss": 0.4892, "step": 38850 }, { "epoch": 0.38, "grad_norm": 8.42552375793457, "learning_rate": 4.856732898408254e-06, "loss": 0.2227, "step": 38875 }, { "epoch": 0.38, "grad_norm": 25.706390380859375, "learning_rate": 4.856608775954005e-06, "loss": 0.459, "step": 38900 }, { "epoch": 0.38, "grad_norm": 12.369009017944336, "learning_rate": 4.856484653499757e-06, "loss": 0.2291, "step": 38925 }, { "epoch": 0.38, "grad_norm": 13.90406608581543, "learning_rate": 4.856360531045508e-06, "loss": 0.3938, "step": 38950 }, { "epoch": 0.38, "grad_norm": 13.479564666748047, "learning_rate": 4.85623640859126e-06, "loss": 0.2391, "step": 38975 }, { "epoch": 0.38, "grad_norm": 16.572490692138672, "learning_rate": 4.856112286137012e-06, "loss": 0.3851, "step": 39000 }, { "epoch": 0.38, "grad_norm": 8.4893217086792, "learning_rate": 4.855988163682763e-06, "loss": 0.2541, "step": 39025 }, { "epoch": 0.38, "grad_norm": 12.077136039733887, "learning_rate": 4.855864041228515e-06, "loss": 0.4338, "step": 39050 }, { "epoch": 0.38, "grad_norm": 10.005154609680176, "learning_rate": 4.855739918774266e-06, "loss": 0.2162, "step": 39075 }, { "epoch": 0.38, "grad_norm": 36.09486770629883, "learning_rate": 4.855615796320018e-06, "loss": 0.5366, "step": 39100 }, { "epoch": 0.38, "grad_norm": 8.90514087677002, "learning_rate": 4.8554916738657694e-06, "loss": 0.2123, "step": 39125 }, { "epoch": 0.38, "grad_norm": 22.037113189697266, "learning_rate": 4.8553675514115215e-06, "loss": 0.4267, "step": 39150 }, { "epoch": 0.39, "grad_norm": 13.947267532348633, "learning_rate": 4.855243428957273e-06, "loss": 0.2327, "step": 39175 }, { "epoch": 0.39, "grad_norm": 21.564350128173828, "learning_rate": 4.855119306503024e-06, "loss": 0.4391, "step": 39200 }, { "epoch": 0.39, "grad_norm": 11.004363059997559, "learning_rate": 4.854995184048776e-06, "loss": 0.273, "step": 39225 }, { "epoch": 0.39, "grad_norm": 21.96237564086914, "learning_rate": 4.854871061594527e-06, "loss": 0.4534, "step": 39250 }, { "epoch": 0.39, "grad_norm": 11.192524909973145, "learning_rate": 4.854746939140278e-06, "loss": 0.2388, "step": 39275 }, { "epoch": 0.39, "grad_norm": 20.322792053222656, "learning_rate": 4.8546228166860304e-06, "loss": 0.4584, "step": 39300 }, { "epoch": 0.39, "grad_norm": 11.439380645751953, "learning_rate": 4.854498694231782e-06, "loss": 0.2282, "step": 39325 }, { "epoch": 0.39, "grad_norm": 13.453913688659668, "learning_rate": 4.854374571777533e-06, "loss": 0.4091, "step": 39350 }, { "epoch": 0.39, "grad_norm": 10.402567863464355, "learning_rate": 4.854250449323285e-06, "loss": 0.2343, "step": 39375 }, { "epoch": 0.39, "grad_norm": 22.51355743408203, "learning_rate": 4.854126326869036e-06, "loss": 0.455, "step": 39400 }, { "epoch": 0.39, "grad_norm": 11.408729553222656, "learning_rate": 4.854002204414787e-06, "loss": 0.1758, "step": 39425 }, { "epoch": 0.39, "grad_norm": 20.89878273010254, "learning_rate": 4.853878081960539e-06, "loss": 0.5208, "step": 39450 }, { "epoch": 0.39, "grad_norm": 9.248867988586426, "learning_rate": 4.853753959506291e-06, "loss": 0.2482, "step": 39475 }, { "epoch": 0.39, "grad_norm": 20.48552131652832, "learning_rate": 4.853629837052043e-06, "loss": 0.4408, "step": 39500 }, { "epoch": 0.39, "grad_norm": 9.800973892211914, "learning_rate": 4.853505714597794e-06, "loss": 0.2349, "step": 39525 }, { "epoch": 0.39, "grad_norm": 14.974353790283203, "learning_rate": 4.853381592143546e-06, "loss": 0.4317, "step": 39550 }, { "epoch": 0.39, "grad_norm": 5.214593410491943, "learning_rate": 4.853257469689297e-06, "loss": 0.2667, "step": 39575 }, { "epoch": 0.39, "grad_norm": 15.094268798828125, "learning_rate": 4.853133347235048e-06, "loss": 0.4236, "step": 39600 }, { "epoch": 0.39, "grad_norm": 6.383767604827881, "learning_rate": 4.8530092247808004e-06, "loss": 0.2169, "step": 39625 }, { "epoch": 0.39, "grad_norm": 20.675769805908203, "learning_rate": 4.852885102326552e-06, "loss": 0.4744, "step": 39650 }, { "epoch": 0.39, "grad_norm": 10.56852912902832, "learning_rate": 4.852760979872303e-06, "loss": 0.214, "step": 39675 }, { "epoch": 0.39, "grad_norm": 13.981583595275879, "learning_rate": 4.852636857418055e-06, "loss": 0.4124, "step": 39700 }, { "epoch": 0.39, "grad_norm": 15.923415184020996, "learning_rate": 4.852512734963806e-06, "loss": 0.2415, "step": 39725 }, { "epoch": 0.39, "grad_norm": 16.873180389404297, "learning_rate": 4.852388612509557e-06, "loss": 0.4141, "step": 39750 }, { "epoch": 0.39, "grad_norm": 8.260383605957031, "learning_rate": 4.852264490055309e-06, "loss": 0.2837, "step": 39775 }, { "epoch": 0.39, "grad_norm": 24.883264541625977, "learning_rate": 4.852140367601061e-06, "loss": 0.4243, "step": 39800 }, { "epoch": 0.39, "grad_norm": 13.37057876586914, "learning_rate": 4.852016245146813e-06, "loss": 0.2783, "step": 39825 }, { "epoch": 0.39, "grad_norm": 13.637534141540527, "learning_rate": 4.851892122692564e-06, "loss": 0.4302, "step": 39850 }, { "epoch": 0.39, "grad_norm": 15.238649368286133, "learning_rate": 4.851768000238316e-06, "loss": 0.3184, "step": 39875 }, { "epoch": 0.39, "grad_norm": 23.921329498291016, "learning_rate": 4.851643877784067e-06, "loss": 0.4141, "step": 39900 }, { "epoch": 0.39, "grad_norm": 6.928285121917725, "learning_rate": 4.851519755329819e-06, "loss": 0.2337, "step": 39925 }, { "epoch": 0.39, "grad_norm": 18.093767166137695, "learning_rate": 4.85139563287557e-06, "loss": 0.4668, "step": 39950 }, { "epoch": 0.39, "grad_norm": 9.653918266296387, "learning_rate": 4.851271510421322e-06, "loss": 0.2734, "step": 39975 }, { "epoch": 0.39, "grad_norm": 10.698701858520508, "learning_rate": 4.851147387967074e-06, "loss": 0.4963, "step": 40000 }, { "epoch": 0.39, "eval_loss": 0.492117702960968, "eval_runtime": 6037.7098, "eval_samples_per_second": 1.568, "eval_steps_per_second": 0.196, "eval_wer": 0.1547028906199704, "step": 40000 }, { "epoch": 0.39, "grad_norm": 18.771995544433594, "learning_rate": 4.851023265512825e-06, "loss": 0.2332, "step": 40025 }, { "epoch": 0.39, "grad_norm": 19.14767074584961, "learning_rate": 4.850899143058576e-06, "loss": 0.3856, "step": 40050 }, { "epoch": 0.39, "grad_norm": 8.720975875854492, "learning_rate": 4.850775020604328e-06, "loss": 0.2293, "step": 40075 }, { "epoch": 0.39, "grad_norm": 15.556615829467773, "learning_rate": 4.850650898150079e-06, "loss": 0.4892, "step": 40100 }, { "epoch": 0.39, "grad_norm": 11.181511878967285, "learning_rate": 4.850526775695831e-06, "loss": 0.2399, "step": 40125 }, { "epoch": 0.39, "grad_norm": 17.15337562561035, "learning_rate": 4.850402653241583e-06, "loss": 0.3613, "step": 40150 }, { "epoch": 0.4, "grad_norm": 10.663707733154297, "learning_rate": 4.850278530787334e-06, "loss": 0.2071, "step": 40175 }, { "epoch": 0.4, "grad_norm": 13.701661109924316, "learning_rate": 4.850154408333085e-06, "loss": 0.408, "step": 40200 }, { "epoch": 0.4, "grad_norm": 16.760272979736328, "learning_rate": 4.850030285878837e-06, "loss": 0.2549, "step": 40225 }, { "epoch": 0.4, "grad_norm": 10.108711242675781, "learning_rate": 4.849906163424588e-06, "loss": 0.3591, "step": 40250 }, { "epoch": 0.4, "grad_norm": 7.664885520935059, "learning_rate": 4.84978204097034e-06, "loss": 0.2183, "step": 40275 }, { "epoch": 0.4, "grad_norm": 17.620084762573242, "learning_rate": 4.849657918516092e-06, "loss": 0.408, "step": 40300 }, { "epoch": 0.4, "grad_norm": 10.164761543273926, "learning_rate": 4.849533796061844e-06, "loss": 0.2363, "step": 40325 }, { "epoch": 0.4, "grad_norm": 19.193235397338867, "learning_rate": 4.849409673607595e-06, "loss": 0.3819, "step": 40350 }, { "epoch": 0.4, "grad_norm": 6.373547554016113, "learning_rate": 4.849285551153346e-06, "loss": 0.2069, "step": 40375 }, { "epoch": 0.4, "grad_norm": 17.55819320678711, "learning_rate": 4.849161428699098e-06, "loss": 0.388, "step": 40400 }, { "epoch": 0.4, "grad_norm": 15.352805137634277, "learning_rate": 4.849037306244849e-06, "loss": 0.3018, "step": 40425 }, { "epoch": 0.4, "grad_norm": 21.41507911682129, "learning_rate": 4.8489131837906006e-06, "loss": 0.4251, "step": 40450 }, { "epoch": 0.4, "grad_norm": 14.074910163879395, "learning_rate": 4.848789061336353e-06, "loss": 0.2545, "step": 40475 }, { "epoch": 0.4, "grad_norm": 20.16581153869629, "learning_rate": 4.848664938882104e-06, "loss": 0.5186, "step": 40500 }, { "epoch": 0.4, "grad_norm": 9.6839599609375, "learning_rate": 4.848540816427855e-06, "loss": 0.2435, "step": 40525 }, { "epoch": 0.4, "grad_norm": 15.128893852233887, "learning_rate": 4.848416693973607e-06, "loss": 0.3998, "step": 40550 }, { "epoch": 0.4, "grad_norm": 9.032960891723633, "learning_rate": 4.848292571519358e-06, "loss": 0.2494, "step": 40575 }, { "epoch": 0.4, "grad_norm": 19.480024337768555, "learning_rate": 4.8481684490651095e-06, "loss": 0.3698, "step": 40600 }, { "epoch": 0.4, "grad_norm": 13.394439697265625, "learning_rate": 4.848044326610862e-06, "loss": 0.2523, "step": 40625 }, { "epoch": 0.4, "grad_norm": 19.582365036010742, "learning_rate": 4.847920204156613e-06, "loss": 0.4723, "step": 40650 }, { "epoch": 0.4, "grad_norm": 5.93665075302124, "learning_rate": 4.847796081702365e-06, "loss": 0.2812, "step": 40675 }, { "epoch": 0.4, "grad_norm": 13.198041915893555, "learning_rate": 4.847671959248116e-06, "loss": 0.4248, "step": 40700 }, { "epoch": 0.4, "grad_norm": 6.4288835525512695, "learning_rate": 4.847547836793868e-06, "loss": 0.229, "step": 40725 }, { "epoch": 0.4, "grad_norm": 21.435466766357422, "learning_rate": 4.847423714339619e-06, "loss": 0.3958, "step": 40750 }, { "epoch": 0.4, "grad_norm": 8.943572044372559, "learning_rate": 4.847299591885371e-06, "loss": 0.2454, "step": 40775 }, { "epoch": 0.4, "grad_norm": 21.590566635131836, "learning_rate": 4.847175469431123e-06, "loss": 0.4407, "step": 40800 }, { "epoch": 0.4, "grad_norm": 7.702921390533447, "learning_rate": 4.847051346976874e-06, "loss": 0.234, "step": 40825 }, { "epoch": 0.4, "grad_norm": 14.596325874328613, "learning_rate": 4.846927224522626e-06, "loss": 0.4483, "step": 40850 }, { "epoch": 0.4, "grad_norm": 10.31943416595459, "learning_rate": 4.846803102068377e-06, "loss": 0.2175, "step": 40875 }, { "epoch": 0.4, "grad_norm": 25.367977142333984, "learning_rate": 4.846678979614128e-06, "loss": 0.4659, "step": 40900 }, { "epoch": 0.4, "grad_norm": 7.554959774017334, "learning_rate": 4.84655485715988e-06, "loss": 0.22, "step": 40925 }, { "epoch": 0.4, "grad_norm": 37.75507736206055, "learning_rate": 4.846430734705632e-06, "loss": 0.4623, "step": 40950 }, { "epoch": 0.4, "grad_norm": 12.398337364196777, "learning_rate": 4.846306612251383e-06, "loss": 0.2601, "step": 40975 }, { "epoch": 0.4, "grad_norm": 22.127965927124023, "learning_rate": 4.846182489797135e-06, "loss": 0.444, "step": 41000 }, { "epoch": 0.4, "grad_norm": 6.606939315795898, "learning_rate": 4.846058367342886e-06, "loss": 0.249, "step": 41025 }, { "epoch": 0.4, "grad_norm": 16.827686309814453, "learning_rate": 4.845934244888637e-06, "loss": 0.4412, "step": 41050 }, { "epoch": 0.4, "grad_norm": 11.897197723388672, "learning_rate": 4.845810122434389e-06, "loss": 0.2368, "step": 41075 }, { "epoch": 0.4, "grad_norm": 10.83211612701416, "learning_rate": 4.8456859999801405e-06, "loss": 0.4629, "step": 41100 }, { "epoch": 0.4, "grad_norm": 7.326117038726807, "learning_rate": 4.845561877525893e-06, "loss": 0.2257, "step": 41125 }, { "epoch": 0.4, "grad_norm": Infinity, "learning_rate": 4.845442719969814e-06, "loss": 0.3435, "step": 41150 }, { "epoch": 0.4, "grad_norm": 11.41767406463623, "learning_rate": 4.845318597515566e-06, "loss": 0.228, "step": 41175 }, { "epoch": 0.41, "grad_norm": 16.961809158325195, "learning_rate": 4.845194475061317e-06, "loss": 0.4433, "step": 41200 }, { "epoch": 0.41, "grad_norm": 8.436837196350098, "learning_rate": 4.845070352607069e-06, "loss": 0.2496, "step": 41225 }, { "epoch": 0.41, "grad_norm": 20.887603759765625, "learning_rate": 4.84494623015282e-06, "loss": 0.3839, "step": 41250 }, { "epoch": 0.41, "grad_norm": 6.565252304077148, "learning_rate": 4.844822107698571e-06, "loss": 0.2123, "step": 41275 }, { "epoch": 0.41, "grad_norm": 15.28088092803955, "learning_rate": 4.8446979852443234e-06, "loss": 0.4386, "step": 41300 }, { "epoch": 0.41, "grad_norm": 6.515636920928955, "learning_rate": 4.844573862790075e-06, "loss": 0.222, "step": 41325 }, { "epoch": 0.41, "grad_norm": 16.272790908813477, "learning_rate": 4.844449740335826e-06, "loss": 0.4501, "step": 41350 }, { "epoch": 0.41, "grad_norm": 13.806995391845703, "learning_rate": 4.844325617881578e-06, "loss": 0.2365, "step": 41375 }, { "epoch": 0.41, "grad_norm": 26.720434188842773, "learning_rate": 4.844201495427329e-06, "loss": 0.4916, "step": 41400 }, { "epoch": 0.41, "grad_norm": 9.578834533691406, "learning_rate": 4.84407737297308e-06, "loss": 0.2546, "step": 41425 }, { "epoch": 0.41, "grad_norm": 26.60028076171875, "learning_rate": 4.843953250518832e-06, "loss": 0.3995, "step": 41450 }, { "epoch": 0.41, "grad_norm": 7.005025863647461, "learning_rate": 4.843829128064584e-06, "loss": 0.2658, "step": 41475 }, { "epoch": 0.41, "grad_norm": 23.105484008789062, "learning_rate": 4.843705005610335e-06, "loss": 0.463, "step": 41500 }, { "epoch": 0.41, "grad_norm": 13.081511497497559, "learning_rate": 4.843580883156087e-06, "loss": 0.2486, "step": 41525 }, { "epoch": 0.41, "grad_norm": 9.946720123291016, "learning_rate": 4.843456760701838e-06, "loss": 0.3588, "step": 41550 }, { "epoch": 0.41, "grad_norm": 9.456371307373047, "learning_rate": 4.84333263824759e-06, "loss": 0.2362, "step": 41575 }, { "epoch": 0.41, "grad_norm": 20.547571182250977, "learning_rate": 4.843208515793341e-06, "loss": 0.3544, "step": 41600 }, { "epoch": 0.41, "grad_norm": 7.574868202209473, "learning_rate": 4.843084393339093e-06, "loss": 0.2398, "step": 41625 }, { "epoch": 0.41, "grad_norm": 17.41077423095703, "learning_rate": 4.842960270884845e-06, "loss": 0.4603, "step": 41650 }, { "epoch": 0.41, "grad_norm": 10.96631908416748, "learning_rate": 4.842836148430597e-06, "loss": 0.2806, "step": 41675 }, { "epoch": 0.41, "grad_norm": 22.141637802124023, "learning_rate": 4.842712025976348e-06, "loss": 0.416, "step": 41700 }, { "epoch": 0.41, "grad_norm": 14.658280372619629, "learning_rate": 4.842587903522099e-06, "loss": 0.2529, "step": 41725 }, { "epoch": 0.41, "grad_norm": 26.68462562561035, "learning_rate": 4.84246378106785e-06, "loss": 0.479, "step": 41750 }, { "epoch": 0.41, "grad_norm": 5.830313682556152, "learning_rate": 4.842339658613602e-06, "loss": 0.2384, "step": 41775 }, { "epoch": 0.41, "grad_norm": 17.886367797851562, "learning_rate": 4.842215536159354e-06, "loss": 0.4578, "step": 41800 }, { "epoch": 0.41, "grad_norm": 4.6459856033325195, "learning_rate": 4.842091413705105e-06, "loss": 0.2273, "step": 41825 }, { "epoch": 0.41, "grad_norm": 18.71199607849121, "learning_rate": 4.841967291250857e-06, "loss": 0.4409, "step": 41850 }, { "epoch": 0.41, "grad_norm": 6.411563396453857, "learning_rate": 4.841843168796608e-06, "loss": 0.2752, "step": 41875 }, { "epoch": 0.41, "grad_norm": 16.25470542907715, "learning_rate": 4.841719046342359e-06, "loss": 0.444, "step": 41900 }, { "epoch": 0.41, "grad_norm": 17.77494239807129, "learning_rate": 4.841594923888111e-06, "loss": 0.2606, "step": 41925 }, { "epoch": 0.41, "grad_norm": 14.37802505493164, "learning_rate": 4.8414708014338626e-06, "loss": 0.3532, "step": 41950 }, { "epoch": 0.41, "grad_norm": 15.434114456176758, "learning_rate": 4.841346678979615e-06, "loss": 0.2412, "step": 41975 }, { "epoch": 0.41, "grad_norm": 16.683263778686523, "learning_rate": 4.841222556525366e-06, "loss": 0.4227, "step": 42000 }, { "epoch": 0.41, "grad_norm": 10.7728271484375, "learning_rate": 4.841098434071118e-06, "loss": 0.2575, "step": 42025 }, { "epoch": 0.41, "grad_norm": 15.013562202453613, "learning_rate": 4.840974311616869e-06, "loss": 0.4984, "step": 42050 }, { "epoch": 0.41, "grad_norm": 8.251846313476562, "learning_rate": 4.840850189162621e-06, "loss": 0.208, "step": 42075 }, { "epoch": 0.41, "grad_norm": 17.554712295532227, "learning_rate": 4.840726066708372e-06, "loss": 0.4604, "step": 42100 }, { "epoch": 0.41, "grad_norm": 11.310595512390137, "learning_rate": 4.840601944254124e-06, "loss": 0.2833, "step": 42125 }, { "epoch": 0.41, "grad_norm": 21.341148376464844, "learning_rate": 4.840477821799876e-06, "loss": 0.4331, "step": 42150 }, { "epoch": 0.41, "grad_norm": 11.201098442077637, "learning_rate": 4.840353699345627e-06, "loss": 0.2716, "step": 42175 }, { "epoch": 0.41, "grad_norm": 18.018596649169922, "learning_rate": 4.840229576891378e-06, "loss": 0.4404, "step": 42200 }, { "epoch": 0.42, "grad_norm": 11.799477577209473, "learning_rate": 4.84010545443713e-06, "loss": 0.2975, "step": 42225 }, { "epoch": 0.42, "grad_norm": 29.45615577697754, "learning_rate": 4.839981331982881e-06, "loss": 0.4269, "step": 42250 }, { "epoch": 0.42, "grad_norm": 19.152873992919922, "learning_rate": 4.8398572095286325e-06, "loss": 0.2548, "step": 42275 }, { "epoch": 0.42, "grad_norm": 9.694498062133789, "learning_rate": 4.839733087074385e-06, "loss": 0.3761, "step": 42300 }, { "epoch": 0.42, "grad_norm": 9.606958389282227, "learning_rate": 4.839608964620136e-06, "loss": 0.2559, "step": 42325 }, { "epoch": 0.42, "grad_norm": 20.137680053710938, "learning_rate": 4.839484842165887e-06, "loss": 0.4437, "step": 42350 }, { "epoch": 0.42, "grad_norm": 13.131429672241211, "learning_rate": 4.839360719711639e-06, "loss": 0.2006, "step": 42375 }, { "epoch": 0.42, "grad_norm": 19.579174041748047, "learning_rate": 4.83923659725739e-06, "loss": 0.4542, "step": 42400 }, { "epoch": 0.42, "grad_norm": 9.15916919708252, "learning_rate": 4.839112474803142e-06, "loss": 0.2287, "step": 42425 }, { "epoch": 0.42, "grad_norm": 18.051002502441406, "learning_rate": 4.8389883523488936e-06, "loss": 0.3749, "step": 42450 }, { "epoch": 0.42, "grad_norm": 14.0939359664917, "learning_rate": 4.838864229894646e-06, "loss": 0.2216, "step": 42475 }, { "epoch": 0.42, "grad_norm": 20.707948684692383, "learning_rate": 4.838740107440397e-06, "loss": 0.4201, "step": 42500 }, { "epoch": 0.42, "grad_norm": 15.53820514678955, "learning_rate": 4.838615984986149e-06, "loss": 0.227, "step": 42525 }, { "epoch": 0.42, "grad_norm": 18.75922966003418, "learning_rate": 4.8384918625319e-06, "loss": 0.4393, "step": 42550 }, { "epoch": 0.42, "grad_norm": 5.7925705909729, "learning_rate": 4.838367740077651e-06, "loss": 0.2359, "step": 42575 }, { "epoch": 0.42, "grad_norm": 22.79082489013672, "learning_rate": 4.8382436176234025e-06, "loss": 0.4811, "step": 42600 }, { "epoch": 0.42, "grad_norm": 14.073896408081055, "learning_rate": 4.838119495169155e-06, "loss": 0.2947, "step": 42625 }, { "epoch": 0.42, "grad_norm": 14.30379581451416, "learning_rate": 4.837995372714906e-06, "loss": 0.3541, "step": 42650 }, { "epoch": 0.42, "grad_norm": 10.609211921691895, "learning_rate": 4.837871250260657e-06, "loss": 0.2486, "step": 42675 }, { "epoch": 0.42, "grad_norm": 15.391732215881348, "learning_rate": 4.837752092704579e-06, "loss": 0.3866, "step": 42700 }, { "epoch": 0.42, "grad_norm": 8.01842975616455, "learning_rate": 4.83762797025033e-06, "loss": 0.2093, "step": 42725 }, { "epoch": 0.42, "grad_norm": 10.49682903289795, "learning_rate": 4.837503847796082e-06, "loss": 0.4241, "step": 42750 }, { "epoch": 0.42, "grad_norm": 7.018492698669434, "learning_rate": 4.837379725341833e-06, "loss": 0.2827, "step": 42775 }, { "epoch": 0.42, "grad_norm": 15.711579322814941, "learning_rate": 4.837255602887585e-06, "loss": 0.421, "step": 42800 }, { "epoch": 0.42, "grad_norm": 11.527328491210938, "learning_rate": 4.837131480433337e-06, "loss": 0.1858, "step": 42825 }, { "epoch": 0.42, "grad_norm": 13.286572456359863, "learning_rate": 4.837007357979088e-06, "loss": 0.3896, "step": 42850 }, { "epoch": 0.42, "grad_norm": 13.888639450073242, "learning_rate": 4.83688323552484e-06, "loss": 0.2289, "step": 42875 }, { "epoch": 0.42, "grad_norm": 18.22954750061035, "learning_rate": 4.836759113070591e-06, "loss": 0.4215, "step": 42900 }, { "epoch": 0.42, "grad_norm": 7.448821544647217, "learning_rate": 4.836634990616343e-06, "loss": 0.2366, "step": 42925 }, { "epoch": 0.42, "grad_norm": 16.096233367919922, "learning_rate": 4.836510868162094e-06, "loss": 0.3855, "step": 42950 }, { "epoch": 0.42, "grad_norm": 11.351861000061035, "learning_rate": 4.8363867457078464e-06, "loss": 0.2266, "step": 42975 }, { "epoch": 0.42, "grad_norm": 14.995369911193848, "learning_rate": 4.836262623253598e-06, "loss": 0.3927, "step": 43000 }, { "epoch": 0.42, "grad_norm": 11.23874568939209, "learning_rate": 4.836138500799349e-06, "loss": 0.2538, "step": 43025 }, { "epoch": 0.42, "grad_norm": 15.96178913116455, "learning_rate": 4.836014378345101e-06, "loss": 0.4727, "step": 43050 }, { "epoch": 0.42, "grad_norm": 5.544763088226318, "learning_rate": 4.835890255890852e-06, "loss": 0.2408, "step": 43075 }, { "epoch": 0.42, "grad_norm": 12.565011978149414, "learning_rate": 4.835766133436603e-06, "loss": 0.398, "step": 43100 }, { "epoch": 0.42, "grad_norm": 10.169654846191406, "learning_rate": 4.835642010982355e-06, "loss": 0.2387, "step": 43125 }, { "epoch": 0.42, "grad_norm": 19.616718292236328, "learning_rate": 4.835517888528107e-06, "loss": 0.4226, "step": 43150 }, { "epoch": 0.42, "grad_norm": 3.283073663711548, "learning_rate": 4.835393766073858e-06, "loss": 0.2356, "step": 43175 }, { "epoch": 0.42, "grad_norm": 16.65397071838379, "learning_rate": 4.83526964361961e-06, "loss": 0.4393, "step": 43200 }, { "epoch": 0.42, "grad_norm": 7.264853000640869, "learning_rate": 4.835145521165361e-06, "loss": 0.2258, "step": 43225 }, { "epoch": 0.43, "grad_norm": 19.566415786743164, "learning_rate": 4.835021398711113e-06, "loss": 0.4836, "step": 43250 }, { "epoch": 0.43, "grad_norm": 14.179039001464844, "learning_rate": 4.834897276256864e-06, "loss": 0.2317, "step": 43275 }, { "epoch": 0.43, "grad_norm": 19.423152923583984, "learning_rate": 4.8347731538026164e-06, "loss": 0.4265, "step": 43300 }, { "epoch": 0.43, "grad_norm": 12.003535270690918, "learning_rate": 4.834649031348368e-06, "loss": 0.2164, "step": 43325 }, { "epoch": 0.43, "grad_norm": 21.347639083862305, "learning_rate": 4.834524908894119e-06, "loss": 0.4702, "step": 43350 }, { "epoch": 0.43, "grad_norm": 3.936901330947876, "learning_rate": 4.834400786439871e-06, "loss": 0.2074, "step": 43375 }, { "epoch": 0.43, "grad_norm": 24.109577178955078, "learning_rate": 4.834276663985622e-06, "loss": 0.4806, "step": 43400 }, { "epoch": 0.43, "grad_norm": 12.73943042755127, "learning_rate": 4.834152541531373e-06, "loss": 0.2708, "step": 43425 }, { "epoch": 0.43, "grad_norm": 26.87527847290039, "learning_rate": 4.834028419077125e-06, "loss": 0.4703, "step": 43450 }, { "epoch": 0.43, "grad_norm": 12.944648742675781, "learning_rate": 4.833904296622877e-06, "loss": 0.257, "step": 43475 }, { "epoch": 0.43, "grad_norm": 14.84012222290039, "learning_rate": 4.833780174168628e-06, "loss": 0.3528, "step": 43500 }, { "epoch": 0.43, "grad_norm": 13.400370597839355, "learning_rate": 4.83365605171438e-06, "loss": 0.2272, "step": 43525 }, { "epoch": 0.43, "grad_norm": 14.403853416442871, "learning_rate": 4.833531929260131e-06, "loss": 0.4239, "step": 43550 }, { "epoch": 0.43, "grad_norm": 13.11984920501709, "learning_rate": 4.833407806805882e-06, "loss": 0.2657, "step": 43575 }, { "epoch": 0.43, "grad_norm": 25.87530517578125, "learning_rate": 4.833283684351634e-06, "loss": 0.4509, "step": 43600 }, { "epoch": 0.43, "grad_norm": 8.344833374023438, "learning_rate": 4.8331595618973856e-06, "loss": 0.2678, "step": 43625 }, { "epoch": 0.43, "grad_norm": 23.143665313720703, "learning_rate": 4.833035439443138e-06, "loss": 0.464, "step": 43650 }, { "epoch": 0.43, "grad_norm": 7.921799182891846, "learning_rate": 4.832911316988889e-06, "loss": 0.2786, "step": 43675 }, { "epoch": 0.43, "grad_norm": 17.115337371826172, "learning_rate": 4.832787194534641e-06, "loss": 0.4168, "step": 43700 }, { "epoch": 0.43, "grad_norm": 9.64672565460205, "learning_rate": 4.832663072080392e-06, "loss": 0.2306, "step": 43725 }, { "epoch": 0.43, "grad_norm": 21.54738998413086, "learning_rate": 4.832538949626144e-06, "loss": 0.3549, "step": 43750 }, { "epoch": 0.43, "grad_norm": 5.2133636474609375, "learning_rate": 4.832414827171895e-06, "loss": 0.2202, "step": 43775 }, { "epoch": 0.43, "grad_norm": 19.581281661987305, "learning_rate": 4.832290704717647e-06, "loss": 0.4332, "step": 43800 }, { "epoch": 0.43, "grad_norm": 12.804365158081055, "learning_rate": 4.832166582263399e-06, "loss": 0.295, "step": 43825 }, { "epoch": 0.43, "grad_norm": 20.88646125793457, "learning_rate": 4.83204245980915e-06, "loss": 0.4483, "step": 43850 }, { "epoch": 0.43, "grad_norm": 11.278944969177246, "learning_rate": 4.831918337354901e-06, "loss": 0.2123, "step": 43875 }, { "epoch": 0.43, "grad_norm": 13.689184188842773, "learning_rate": 4.831794214900653e-06, "loss": 0.4266, "step": 43900 }, { "epoch": 0.43, "grad_norm": 14.265327453613281, "learning_rate": 4.831670092446404e-06, "loss": 0.2275, "step": 43925 }, { "epoch": 0.43, "grad_norm": 24.440866470336914, "learning_rate": 4.8315459699921556e-06, "loss": 0.448, "step": 43950 }, { "epoch": 0.43, "grad_norm": 8.323362350463867, "learning_rate": 4.831421847537908e-06, "loss": 0.2293, "step": 43975 }, { "epoch": 0.43, "grad_norm": 19.832590103149414, "learning_rate": 4.831297725083659e-06, "loss": 0.4506, "step": 44000 }, { "epoch": 0.43, "grad_norm": 8.189657211303711, "learning_rate": 4.83117360262941e-06, "loss": 0.2234, "step": 44025 }, { "epoch": 0.43, "grad_norm": 14.121223449707031, "learning_rate": 4.831049480175162e-06, "loss": 0.4405, "step": 44050 }, { "epoch": 0.43, "grad_norm": 9.287879943847656, "learning_rate": 4.830925357720913e-06, "loss": 0.2271, "step": 44075 }, { "epoch": 0.43, "grad_norm": 18.874462127685547, "learning_rate": 4.830801235266665e-06, "loss": 0.4237, "step": 44100 }, { "epoch": 0.43, "grad_norm": 6.391754627227783, "learning_rate": 4.8306771128124166e-06, "loss": 0.2344, "step": 44125 }, { "epoch": 0.43, "grad_norm": 16.189918518066406, "learning_rate": 4.830552990358169e-06, "loss": 0.4228, "step": 44150 }, { "epoch": 0.43, "grad_norm": 13.769883155822754, "learning_rate": 4.83042886790392e-06, "loss": 0.2677, "step": 44175 }, { "epoch": 0.43, "grad_norm": 16.270545959472656, "learning_rate": 4.830304745449671e-06, "loss": 0.4596, "step": 44200 }, { "epoch": 0.43, "grad_norm": 7.346308708190918, "learning_rate": 4.830180622995423e-06, "loss": 0.205, "step": 44225 }, { "epoch": 0.44, "grad_norm": 17.34622573852539, "learning_rate": 4.830056500541174e-06, "loss": 0.4227, "step": 44250 }, { "epoch": 0.44, "grad_norm": 7.7249531745910645, "learning_rate": 4.8299323780869255e-06, "loss": 0.2773, "step": 44275 }, { "epoch": 0.44, "grad_norm": 15.149869918823242, "learning_rate": 4.829808255632678e-06, "loss": 0.4827, "step": 44300 }, { "epoch": 0.44, "grad_norm": 9.071186065673828, "learning_rate": 4.829684133178429e-06, "loss": 0.2573, "step": 44325 }, { "epoch": 0.44, "grad_norm": 21.201797485351562, "learning_rate": 4.82956001072418e-06, "loss": 0.4152, "step": 44350 }, { "epoch": 0.44, "grad_norm": 5.7699360847473145, "learning_rate": 4.829435888269932e-06, "loss": 0.2001, "step": 44375 }, { "epoch": 0.44, "grad_norm": 11.247711181640625, "learning_rate": 4.829311765815683e-06, "loss": 0.4463, "step": 44400 }, { "epoch": 0.44, "grad_norm": 9.520133018493652, "learning_rate": 4.8291876433614345e-06, "loss": 0.2286, "step": 44425 }, { "epoch": 0.44, "grad_norm": 16.8648738861084, "learning_rate": 4.8290635209071866e-06, "loss": 0.3631, "step": 44450 }, { "epoch": 0.44, "grad_norm": 9.828779220581055, "learning_rate": 4.828939398452938e-06, "loss": 0.2192, "step": 44475 }, { "epoch": 0.44, "grad_norm": 15.833690643310547, "learning_rate": 4.82881527599869e-06, "loss": 0.3891, "step": 44500 }, { "epoch": 0.44, "grad_norm": 9.558197975158691, "learning_rate": 4.828691153544441e-06, "loss": 0.2073, "step": 44525 }, { "epoch": 0.44, "grad_norm": 16.24477767944336, "learning_rate": 4.828567031090193e-06, "loss": 0.4405, "step": 44550 }, { "epoch": 0.44, "grad_norm": 9.273338317871094, "learning_rate": 4.828442908635944e-06, "loss": 0.2555, "step": 44575 }, { "epoch": 0.44, "grad_norm": 24.441665649414062, "learning_rate": 4.828318786181696e-06, "loss": 0.4221, "step": 44600 }, { "epoch": 0.44, "grad_norm": 9.61176872253418, "learning_rate": 4.828194663727448e-06, "loss": 0.2322, "step": 44625 }, { "epoch": 0.44, "grad_norm": 17.10223388671875, "learning_rate": 4.828070541273199e-06, "loss": 0.4638, "step": 44650 }, { "epoch": 0.44, "grad_norm": 22.16115951538086, "learning_rate": 4.827946418818951e-06, "loss": 0.2437, "step": 44675 }, { "epoch": 0.44, "grad_norm": 19.00186538696289, "learning_rate": 4.827822296364702e-06, "loss": 0.4367, "step": 44700 }, { "epoch": 0.44, "grad_norm": 17.43388557434082, "learning_rate": 4.827698173910453e-06, "loss": 0.2375, "step": 44725 }, { "epoch": 0.44, "grad_norm": 14.753865242004395, "learning_rate": 4.827574051456205e-06, "loss": 0.3746, "step": 44750 }, { "epoch": 0.44, "grad_norm": 12.251252174377441, "learning_rate": 4.8274499290019565e-06, "loss": 0.2344, "step": 44775 }, { "epoch": 0.44, "grad_norm": 14.98835277557373, "learning_rate": 4.827325806547708e-06, "loss": 0.3399, "step": 44800 }, { "epoch": 0.44, "grad_norm": 7.7153706550598145, "learning_rate": 4.82720168409346e-06, "loss": 0.2072, "step": 44825 }, { "epoch": 0.44, "grad_norm": 18.947174072265625, "learning_rate": 4.827077561639211e-06, "loss": 0.4132, "step": 44850 }, { "epoch": 0.44, "grad_norm": 6.172188758850098, "learning_rate": 4.826953439184962e-06, "loss": 0.2115, "step": 44875 }, { "epoch": 0.44, "grad_norm": 11.944966316223145, "learning_rate": 4.826829316730714e-06, "loss": 0.4989, "step": 44900 }, { "epoch": 0.44, "grad_norm": 0.4919742941856384, "learning_rate": 4.8267051942764655e-06, "loss": 0.1917, "step": 44925 }, { "epoch": 0.44, "grad_norm": 20.631725311279297, "learning_rate": 4.8265810718222176e-06, "loss": 0.4198, "step": 44950 }, { "epoch": 0.44, "grad_norm": 7.490588665008545, "learning_rate": 4.826456949367969e-06, "loss": 0.2236, "step": 44975 }, { "epoch": 0.44, "grad_norm": 17.07009506225586, "learning_rate": 4.826332826913721e-06, "loss": 0.3804, "step": 45000 }, { "epoch": 0.44, "grad_norm": 5.317784309387207, "learning_rate": 4.826208704459472e-06, "loss": 0.2151, "step": 45025 }, { "epoch": 0.44, "grad_norm": 19.08493995666504, "learning_rate": 4.826084582005223e-06, "loss": 0.4506, "step": 45050 }, { "epoch": 0.44, "grad_norm": 16.954832077026367, "learning_rate": 4.825960459550975e-06, "loss": 0.2696, "step": 45075 }, { "epoch": 0.44, "grad_norm": 15.893134117126465, "learning_rate": 4.8258363370967265e-06, "loss": 0.4758, "step": 45100 }, { "epoch": 0.44, "grad_norm": 6.705533981323242, "learning_rate": 4.825712214642478e-06, "loss": 0.2432, "step": 45125 }, { "epoch": 0.44, "grad_norm": 14.179191589355469, "learning_rate": 4.82558809218823e-06, "loss": 0.3614, "step": 45150 }, { "epoch": 0.44, "grad_norm": 17.487163543701172, "learning_rate": 4.825463969733981e-06, "loss": 0.2577, "step": 45175 }, { "epoch": 0.44, "grad_norm": 21.261390686035156, "learning_rate": 4.825339847279732e-06, "loss": 0.4659, "step": 45200 }, { "epoch": 0.44, "grad_norm": 14.965664863586426, "learning_rate": 4.825215724825484e-06, "loss": 0.245, "step": 45225 }, { "epoch": 0.44, "grad_norm": 16.796024322509766, "learning_rate": 4.8250916023712355e-06, "loss": 0.365, "step": 45250 }, { "epoch": 0.45, "grad_norm": 10.818819046020508, "learning_rate": 4.824967479916987e-06, "loss": 0.2396, "step": 45275 }, { "epoch": 0.45, "grad_norm": 19.452131271362305, "learning_rate": 4.824843357462739e-06, "loss": 0.4509, "step": 45300 }, { "epoch": 0.45, "grad_norm": 12.383332252502441, "learning_rate": 4.82471923500849e-06, "loss": 0.2304, "step": 45325 }, { "epoch": 0.45, "grad_norm": 17.530010223388672, "learning_rate": 4.824595112554242e-06, "loss": 0.4868, "step": 45350 }, { "epoch": 0.45, "grad_norm": 12.016627311706543, "learning_rate": 4.824470990099993e-06, "loss": 0.2486, "step": 45375 }, { "epoch": 0.45, "grad_norm": 13.356452941894531, "learning_rate": 4.824346867645745e-06, "loss": 0.4011, "step": 45400 }, { "epoch": 0.45, "grad_norm": 11.278268814086914, "learning_rate": 4.8242227451914965e-06, "loss": 0.2647, "step": 45425 }, { "epoch": 0.45, "grad_norm": 17.191143035888672, "learning_rate": 4.8240986227372486e-06, "loss": 0.398, "step": 45450 }, { "epoch": 0.45, "grad_norm": 14.761990547180176, "learning_rate": 4.823974500283e-06, "loss": 0.2743, "step": 45475 }, { "epoch": 0.45, "grad_norm": 18.26736831665039, "learning_rate": 4.823850377828751e-06, "loss": 0.4083, "step": 45500 }, { "epoch": 0.45, "grad_norm": 12.455172538757324, "learning_rate": 4.823726255374503e-06, "loss": 0.228, "step": 45525 }, { "epoch": 0.45, "grad_norm": 16.276987075805664, "learning_rate": 4.823602132920254e-06, "loss": 0.3913, "step": 45550 }, { "epoch": 0.45, "grad_norm": 9.038453102111816, "learning_rate": 4.8234780104660055e-06, "loss": 0.3042, "step": 45575 }, { "epoch": 0.45, "grad_norm": 18.977134704589844, "learning_rate": 4.8233538880117575e-06, "loss": 0.4379, "step": 45600 }, { "epoch": 0.45, "grad_norm": 6.4844865798950195, "learning_rate": 4.823229765557509e-06, "loss": 0.228, "step": 45625 }, { "epoch": 0.45, "grad_norm": 19.949073791503906, "learning_rate": 4.82310564310326e-06, "loss": 0.4242, "step": 45650 }, { "epoch": 0.45, "grad_norm": 11.647747039794922, "learning_rate": 4.822981520649012e-06, "loss": 0.2031, "step": 45675 }, { "epoch": 0.45, "grad_norm": 20.547168731689453, "learning_rate": 4.822857398194763e-06, "loss": 0.3932, "step": 45700 }, { "epoch": 0.45, "grad_norm": 13.543853759765625, "learning_rate": 4.822733275740515e-06, "loss": 0.2998, "step": 45725 }, { "epoch": 0.45, "grad_norm": 29.148958206176758, "learning_rate": 4.8226091532862665e-06, "loss": 0.4393, "step": 45750 }, { "epoch": 0.45, "grad_norm": 12.103222846984863, "learning_rate": 4.8224850308320186e-06, "loss": 0.218, "step": 45775 }, { "epoch": 0.45, "grad_norm": 14.431532859802246, "learning_rate": 4.82236090837777e-06, "loss": 0.4118, "step": 45800 }, { "epoch": 0.45, "grad_norm": 8.836407661437988, "learning_rate": 4.822236785923521e-06, "loss": 0.268, "step": 45825 }, { "epoch": 0.45, "grad_norm": 17.93854331970215, "learning_rate": 4.822112663469273e-06, "loss": 0.4216, "step": 45850 }, { "epoch": 0.45, "grad_norm": 8.344711303710938, "learning_rate": 4.821988541015024e-06, "loss": 0.2484, "step": 45875 }, { "epoch": 0.45, "grad_norm": 15.953509330749512, "learning_rate": 4.8218644185607755e-06, "loss": 0.4169, "step": 45900 }, { "epoch": 0.45, "grad_norm": 3.038498640060425, "learning_rate": 4.8217402961065275e-06, "loss": 0.2119, "step": 45925 }, { "epoch": 0.45, "grad_norm": 15.819785118103027, "learning_rate": 4.821616173652279e-06, "loss": 0.4253, "step": 45950 }, { "epoch": 0.45, "grad_norm": 12.219881057739258, "learning_rate": 4.82149205119803e-06, "loss": 0.2515, "step": 45975 }, { "epoch": 0.45, "grad_norm": 21.012508392333984, "learning_rate": 4.821367928743782e-06, "loss": 0.419, "step": 46000 }, { "epoch": 0.45, "grad_norm": 9.99301815032959, "learning_rate": 4.821243806289533e-06, "loss": 0.2418, "step": 46025 }, { "epoch": 0.45, "grad_norm": 14.641438484191895, "learning_rate": 4.821119683835284e-06, "loss": 0.4577, "step": 46050 }, { "epoch": 0.45, "grad_norm": 12.403437614440918, "learning_rate": 4.8209955613810365e-06, "loss": 0.2453, "step": 46075 }, { "epoch": 0.45, "grad_norm": 21.14077377319336, "learning_rate": 4.820871438926788e-06, "loss": 0.3899, "step": 46100 }, { "epoch": 0.45, "grad_norm": 11.968451499938965, "learning_rate": 4.82074731647254e-06, "loss": 0.2669, "step": 46125 }, { "epoch": 0.45, "grad_norm": 26.91546630859375, "learning_rate": 4.820623194018291e-06, "loss": 0.4404, "step": 46150 }, { "epoch": 0.45, "grad_norm": 11.85545825958252, "learning_rate": 4.820499071564043e-06, "loss": 0.2363, "step": 46175 }, { "epoch": 0.45, "grad_norm": 20.50725746154785, "learning_rate": 4.820374949109794e-06, "loss": 0.3987, "step": 46200 }, { "epoch": 0.45, "grad_norm": 7.562465667724609, "learning_rate": 4.820250826655546e-06, "loss": 0.234, "step": 46225 }, { "epoch": 0.45, "grad_norm": 22.232860565185547, "learning_rate": 4.8201267042012975e-06, "loss": 0.4146, "step": 46250 }, { "epoch": 0.45, "grad_norm": 16.358692169189453, "learning_rate": 4.820002581747049e-06, "loss": 0.2188, "step": 46275 }, { "epoch": 0.46, "grad_norm": 15.711414337158203, "learning_rate": 4.819878459292801e-06, "loss": 0.419, "step": 46300 }, { "epoch": 0.46, "grad_norm": 7.683731555938721, "learning_rate": 4.819754336838552e-06, "loss": 0.2195, "step": 46325 }, { "epoch": 0.46, "grad_norm": 15.517049789428711, "learning_rate": 4.819630214384303e-06, "loss": 0.4106, "step": 46350 }, { "epoch": 0.46, "grad_norm": 6.763664245605469, "learning_rate": 4.819506091930055e-06, "loss": 0.2061, "step": 46375 }, { "epoch": 0.46, "grad_norm": 17.28956413269043, "learning_rate": 4.8193819694758065e-06, "loss": 0.4331, "step": 46400 }, { "epoch": 0.46, "grad_norm": 12.7403564453125, "learning_rate": 4.819257847021558e-06, "loss": 0.2622, "step": 46425 }, { "epoch": 0.46, "grad_norm": 24.305776596069336, "learning_rate": 4.81913372456731e-06, "loss": 0.3798, "step": 46450 }, { "epoch": 0.46, "grad_norm": 5.510519981384277, "learning_rate": 4.819009602113061e-06, "loss": 0.2246, "step": 46475 }, { "epoch": 0.46, "grad_norm": 20.546106338500977, "learning_rate": 4.818885479658812e-06, "loss": 0.3749, "step": 46500 }, { "epoch": 0.46, "grad_norm": 10.250899314880371, "learning_rate": 4.818761357204564e-06, "loss": 0.2199, "step": 46525 }, { "epoch": 0.46, "grad_norm": 22.564380645751953, "learning_rate": 4.818637234750315e-06, "loss": 0.4558, "step": 46550 }, { "epoch": 0.46, "grad_norm": 6.344113826751709, "learning_rate": 4.8185131122960675e-06, "loss": 0.2481, "step": 46575 }, { "epoch": 0.46, "grad_norm": 22.083465576171875, "learning_rate": 4.818388989841819e-06, "loss": 0.4521, "step": 46600 }, { "epoch": 0.46, "grad_norm": 11.737467765808105, "learning_rate": 4.818264867387571e-06, "loss": 0.196, "step": 46625 }, { "epoch": 0.46, "grad_norm": 18.57736587524414, "learning_rate": 4.818140744933322e-06, "loss": 0.4413, "step": 46650 }, { "epoch": 0.46, "grad_norm": 12.329889297485352, "learning_rate": 4.818016622479073e-06, "loss": 0.232, "step": 46675 }, { "epoch": 0.46, "grad_norm": 15.186310768127441, "learning_rate": 4.817892500024825e-06, "loss": 0.457, "step": 46700 }, { "epoch": 0.46, "grad_norm": 8.649806022644043, "learning_rate": 4.8177683775705764e-06, "loss": 0.2055, "step": 46725 }, { "epoch": 0.46, "grad_norm": 13.73554801940918, "learning_rate": 4.817644255116328e-06, "loss": 0.4453, "step": 46750 }, { "epoch": 0.46, "grad_norm": 11.377157211303711, "learning_rate": 4.81752013266208e-06, "loss": 0.2558, "step": 46775 }, { "epoch": 0.46, "grad_norm": 15.957569122314453, "learning_rate": 4.817400975106001e-06, "loss": 0.3865, "step": 46800 }, { "epoch": 0.46, "grad_norm": 11.47819995880127, "learning_rate": 4.817276852651753e-06, "loss": 0.2591, "step": 46825 }, { "epoch": 0.46, "grad_norm": 11.052900314331055, "learning_rate": 4.817152730197504e-06, "loss": 0.3683, "step": 46850 }, { "epoch": 0.46, "grad_norm": 18.135255813598633, "learning_rate": 4.817028607743255e-06, "loss": 0.245, "step": 46875 }, { "epoch": 0.46, "grad_norm": 17.78166961669922, "learning_rate": 4.816904485289007e-06, "loss": 0.399, "step": 46900 }, { "epoch": 0.46, "grad_norm": 9.192672729492188, "learning_rate": 4.8167803628347585e-06, "loss": 0.2058, "step": 46925 }, { "epoch": 0.46, "grad_norm": 18.355796813964844, "learning_rate": 4.81665624038051e-06, "loss": 0.399, "step": 46950 }, { "epoch": 0.46, "grad_norm": 14.693547248840332, "learning_rate": 4.816532117926262e-06, "loss": 0.1934, "step": 46975 }, { "epoch": 0.46, "grad_norm": 19.275522232055664, "learning_rate": 4.816407995472013e-06, "loss": 0.4514, "step": 47000 }, { "epoch": 0.46, "grad_norm": 11.500435829162598, "learning_rate": 4.816283873017765e-06, "loss": 0.2202, "step": 47025 }, { "epoch": 0.46, "grad_norm": 16.90009307861328, "learning_rate": 4.816159750563516e-06, "loss": 0.4606, "step": 47050 }, { "epoch": 0.46, "grad_norm": 7.562346458435059, "learning_rate": 4.816035628109268e-06, "loss": 0.2118, "step": 47075 }, { "epoch": 0.46, "grad_norm": 20.357086181640625, "learning_rate": 4.8159115056550195e-06, "loss": 0.4913, "step": 47100 }, { "epoch": 0.46, "grad_norm": 12.287638664245605, "learning_rate": 4.8157873832007716e-06, "loss": 0.2052, "step": 47125 }, { "epoch": 0.46, "grad_norm": 12.684122085571289, "learning_rate": 4.815663260746523e-06, "loss": 0.4519, "step": 47150 }, { "epoch": 0.46, "grad_norm": 11.085636138916016, "learning_rate": 4.815539138292274e-06, "loss": 0.2307, "step": 47175 }, { "epoch": 0.46, "grad_norm": 13.875370979309082, "learning_rate": 4.815415015838025e-06, "loss": 0.3752, "step": 47200 }, { "epoch": 0.46, "grad_norm": 8.618232727050781, "learning_rate": 4.815290893383777e-06, "loss": 0.1903, "step": 47225 }, { "epoch": 0.46, "grad_norm": 24.872907638549805, "learning_rate": 4.8151667709295285e-06, "loss": 0.4527, "step": 47250 }, { "epoch": 0.46, "grad_norm": 14.468986511230469, "learning_rate": 4.81504264847528e-06, "loss": 0.2733, "step": 47275 }, { "epoch": 0.47, "grad_norm": 19.317340850830078, "learning_rate": 4.814918526021032e-06, "loss": 0.444, "step": 47300 }, { "epoch": 0.47, "grad_norm": 6.661855220794678, "learning_rate": 4.814794403566783e-06, "loss": 0.2283, "step": 47325 }, { "epoch": 0.47, "grad_norm": 16.214601516723633, "learning_rate": 4.814670281112534e-06, "loss": 0.413, "step": 47350 }, { "epoch": 0.47, "grad_norm": 8.451932907104492, "learning_rate": 4.814546158658286e-06, "loss": 0.2357, "step": 47375 }, { "epoch": 0.47, "grad_norm": 21.62761878967285, "learning_rate": 4.8144220362040374e-06, "loss": 0.4218, "step": 47400 }, { "epoch": 0.47, "grad_norm": 7.721968173980713, "learning_rate": 4.8142979137497895e-06, "loss": 0.2071, "step": 47425 }, { "epoch": 0.47, "grad_norm": 19.855789184570312, "learning_rate": 4.814173791295541e-06, "loss": 0.4452, "step": 47450 }, { "epoch": 0.47, "grad_norm": 16.19549560546875, "learning_rate": 4.814049668841293e-06, "loss": 0.2277, "step": 47475 }, { "epoch": 0.47, "grad_norm": 19.815959930419922, "learning_rate": 4.813925546387044e-06, "loss": 0.4379, "step": 47500 }, { "epoch": 0.47, "grad_norm": 5.833583354949951, "learning_rate": 4.813801423932796e-06, "loss": 0.2247, "step": 47525 }, { "epoch": 0.47, "grad_norm": 18.43316650390625, "learning_rate": 4.813677301478547e-06, "loss": 0.5102, "step": 47550 }, { "epoch": 0.47, "grad_norm": 12.706181526184082, "learning_rate": 4.8135531790242985e-06, "loss": 0.2474, "step": 47575 }, { "epoch": 0.47, "grad_norm": 18.653648376464844, "learning_rate": 4.8134290565700505e-06, "loss": 0.481, "step": 47600 }, { "epoch": 0.47, "grad_norm": 16.217987060546875, "learning_rate": 4.813304934115802e-06, "loss": 0.2641, "step": 47625 }, { "epoch": 0.47, "grad_norm": 18.807859420776367, "learning_rate": 4.813180811661553e-06, "loss": 0.4712, "step": 47650 }, { "epoch": 0.47, "grad_norm": 11.783470153808594, "learning_rate": 4.813056689207305e-06, "loss": 0.2204, "step": 47675 }, { "epoch": 0.47, "grad_norm": 17.63493537902832, "learning_rate": 4.812932566753056e-06, "loss": 0.4209, "step": 47700 }, { "epoch": 0.47, "grad_norm": 6.145013809204102, "learning_rate": 4.8128084442988074e-06, "loss": 0.2756, "step": 47725 }, { "epoch": 0.47, "grad_norm": 23.669902801513672, "learning_rate": 4.8126843218445595e-06, "loss": 0.3683, "step": 47750 }, { "epoch": 0.47, "grad_norm": 9.48697280883789, "learning_rate": 4.812560199390311e-06, "loss": 0.218, "step": 47775 }, { "epoch": 0.47, "grad_norm": 18.484712600708008, "learning_rate": 4.812436076936062e-06, "loss": 0.4679, "step": 47800 }, { "epoch": 0.47, "grad_norm": 14.80869197845459, "learning_rate": 4.812311954481814e-06, "loss": 0.2231, "step": 47825 }, { "epoch": 0.47, "grad_norm": 23.64668846130371, "learning_rate": 4.812187832027565e-06, "loss": 0.4795, "step": 47850 }, { "epoch": 0.47, "grad_norm": 4.876255035400391, "learning_rate": 4.812063709573317e-06, "loss": 0.2532, "step": 47875 }, { "epoch": 0.47, "grad_norm": 24.420352935791016, "learning_rate": 4.8119395871190684e-06, "loss": 0.4221, "step": 47900 }, { "epoch": 0.47, "grad_norm": 13.15054702758789, "learning_rate": 4.8118154646648205e-06, "loss": 0.2363, "step": 47925 }, { "epoch": 0.47, "grad_norm": 13.579806327819824, "learning_rate": 4.811691342210572e-06, "loss": 0.4135, "step": 47950 }, { "epoch": 0.47, "grad_norm": 13.725192070007324, "learning_rate": 4.811567219756324e-06, "loss": 0.2829, "step": 47975 }, { "epoch": 0.47, "grad_norm": 21.84137535095215, "learning_rate": 4.811443097302075e-06, "loss": 0.4449, "step": 48000 }, { "epoch": 0.47, "grad_norm": 7.8613739013671875, "learning_rate": 4.811318974847826e-06, "loss": 0.2785, "step": 48025 }, { "epoch": 0.47, "grad_norm": 15.002148628234863, "learning_rate": 4.811194852393577e-06, "loss": 0.346, "step": 48050 }, { "epoch": 0.47, "grad_norm": 7.677010536193848, "learning_rate": 4.8110707299393295e-06, "loss": 0.2419, "step": 48075 }, { "epoch": 0.47, "grad_norm": 24.798419952392578, "learning_rate": 4.810946607485081e-06, "loss": 0.4204, "step": 48100 }, { "epoch": 0.47, "grad_norm": 11.255786895751953, "learning_rate": 4.810822485030832e-06, "loss": 0.2323, "step": 48125 }, { "epoch": 0.47, "grad_norm": 18.121383666992188, "learning_rate": 4.810698362576584e-06, "loss": 0.4883, "step": 48150 }, { "epoch": 0.47, "grad_norm": 12.262332916259766, "learning_rate": 4.810574240122335e-06, "loss": 0.2212, "step": 48175 }, { "epoch": 0.47, "grad_norm": 14.080689430236816, "learning_rate": 4.810450117668086e-06, "loss": 0.4545, "step": 48200 }, { "epoch": 0.47, "grad_norm": 10.181421279907227, "learning_rate": 4.8103259952138384e-06, "loss": 0.2239, "step": 48225 }, { "epoch": 0.47, "grad_norm": 20.294567108154297, "learning_rate": 4.81020187275959e-06, "loss": 0.4596, "step": 48250 }, { "epoch": 0.47, "grad_norm": 7.064478874206543, "learning_rate": 4.810077750305342e-06, "loss": 0.2164, "step": 48275 }, { "epoch": 0.47, "grad_norm": 13.04752254486084, "learning_rate": 4.809953627851093e-06, "loss": 0.3311, "step": 48300 }, { "epoch": 0.48, "grad_norm": 13.82868480682373, "learning_rate": 4.809829505396845e-06, "loss": 0.2259, "step": 48325 }, { "epoch": 0.48, "grad_norm": 14.721564292907715, "learning_rate": 4.809705382942596e-06, "loss": 0.3943, "step": 48350 }, { "epoch": 0.48, "grad_norm": 9.58784294128418, "learning_rate": 4.809581260488348e-06, "loss": 0.2188, "step": 48375 }, { "epoch": 0.48, "grad_norm": 15.554615020751953, "learning_rate": 4.8094571380340995e-06, "loss": 0.3555, "step": 48400 }, { "epoch": 0.48, "grad_norm": 9.141802787780762, "learning_rate": 4.809333015579851e-06, "loss": 0.2716, "step": 48425 }, { "epoch": 0.48, "grad_norm": 10.880253791809082, "learning_rate": 4.809208893125603e-06, "loss": 0.4546, "step": 48450 }, { "epoch": 0.48, "grad_norm": 4.794713020324707, "learning_rate": 4.809084770671354e-06, "loss": 0.2309, "step": 48475 }, { "epoch": 0.48, "grad_norm": 18.65274429321289, "learning_rate": 4.808960648217105e-06, "loss": 0.4413, "step": 48500 }, { "epoch": 0.48, "grad_norm": 9.46053409576416, "learning_rate": 4.808836525762857e-06, "loss": 0.2172, "step": 48525 }, { "epoch": 0.48, "grad_norm": 20.554859161376953, "learning_rate": 4.808712403308608e-06, "loss": 0.3842, "step": 48550 }, { "epoch": 0.48, "grad_norm": 7.049861907958984, "learning_rate": 4.80858828085436e-06, "loss": 0.2499, "step": 48575 }, { "epoch": 0.48, "grad_norm": 18.425294876098633, "learning_rate": 4.808464158400112e-06, "loss": 0.4337, "step": 48600 }, { "epoch": 0.48, "grad_norm": 12.43486499786377, "learning_rate": 4.808340035945863e-06, "loss": 0.2501, "step": 48625 }, { "epoch": 0.48, "grad_norm": 15.999833106994629, "learning_rate": 4.808215913491615e-06, "loss": 0.4199, "step": 48650 }, { "epoch": 0.48, "grad_norm": 10.329270362854004, "learning_rate": 4.808091791037366e-06, "loss": 0.2216, "step": 48675 }, { "epoch": 0.48, "grad_norm": 19.208322525024414, "learning_rate": 4.807967668583118e-06, "loss": 0.3722, "step": 48700 }, { "epoch": 0.48, "grad_norm": 12.789777755737305, "learning_rate": 4.8078435461288694e-06, "loss": 0.2239, "step": 48725 }, { "epoch": 0.48, "grad_norm": 22.223058700561523, "learning_rate": 4.8077194236746215e-06, "loss": 0.3798, "step": 48750 }, { "epoch": 0.48, "grad_norm": 16.608980178833008, "learning_rate": 4.807595301220373e-06, "loss": 0.2391, "step": 48775 }, { "epoch": 0.48, "grad_norm": 12.12576961517334, "learning_rate": 4.807471178766124e-06, "loss": 0.4509, "step": 48800 }, { "epoch": 0.48, "grad_norm": 9.175398826599121, "learning_rate": 4.807347056311876e-06, "loss": 0.236, "step": 48825 }, { "epoch": 0.48, "grad_norm": 15.305471420288086, "learning_rate": 4.807222933857627e-06, "loss": 0.3834, "step": 48850 }, { "epoch": 0.48, "grad_norm": 10.936861991882324, "learning_rate": 4.807098811403378e-06, "loss": 0.2122, "step": 48875 }, { "epoch": 0.48, "grad_norm": 11.012981414794922, "learning_rate": 4.8069796538473e-06, "loss": 0.393, "step": 48900 }, { "epoch": 0.48, "grad_norm": 10.169251441955566, "learning_rate": 4.8068555313930515e-06, "loss": 0.2196, "step": 48925 }, { "epoch": 0.48, "grad_norm": 14.679386138916016, "learning_rate": 4.806731408938803e-06, "loss": 0.4853, "step": 48950 }, { "epoch": 0.48, "grad_norm": 10.12147045135498, "learning_rate": 4.806607286484555e-06, "loss": 0.2674, "step": 48975 }, { "epoch": 0.48, "grad_norm": 16.182376861572266, "learning_rate": 4.806483164030306e-06, "loss": 0.3967, "step": 49000 }, { "epoch": 0.48, "grad_norm": 5.634555816650391, "learning_rate": 4.806359041576057e-06, "loss": 0.2044, "step": 49025 }, { "epoch": 0.48, "grad_norm": 53.11761474609375, "learning_rate": 4.806234919121809e-06, "loss": 0.3195, "step": 49050 }, { "epoch": 0.48, "grad_norm": 16.659744262695312, "learning_rate": 4.8061107966675604e-06, "loss": 0.2325, "step": 49075 }, { "epoch": 0.48, "grad_norm": 14.238572120666504, "learning_rate": 4.8059866742133125e-06, "loss": 0.3567, "step": 49100 }, { "epoch": 0.48, "grad_norm": 7.850593566894531, "learning_rate": 4.805862551759064e-06, "loss": 0.2193, "step": 49125 }, { "epoch": 0.48, "grad_norm": 18.141666412353516, "learning_rate": 4.805738429304816e-06, "loss": 0.4022, "step": 49150 }, { "epoch": 0.48, "grad_norm": 7.057565689086914, "learning_rate": 4.805614306850567e-06, "loss": 0.2454, "step": 49175 }, { "epoch": 0.48, "grad_norm": 16.16790771484375, "learning_rate": 4.805490184396319e-06, "loss": 0.4246, "step": 49200 }, { "epoch": 0.48, "grad_norm": 9.086801528930664, "learning_rate": 4.80536606194207e-06, "loss": 0.2195, "step": 49225 }, { "epoch": 0.48, "grad_norm": 12.407565116882324, "learning_rate": 4.8052419394878215e-06, "loss": 0.4163, "step": 49250 }, { "epoch": 0.48, "grad_norm": 3.1174347400665283, "learning_rate": 4.8051178170335735e-06, "loss": 0.2098, "step": 49275 }, { "epoch": 0.48, "grad_norm": 17.06116485595703, "learning_rate": 4.804993694579325e-06, "loss": 0.4123, "step": 49300 }, { "epoch": 0.48, "grad_norm": 11.311644554138184, "learning_rate": 4.804869572125076e-06, "loss": 0.2492, "step": 49325 }, { "epoch": 0.49, "grad_norm": 19.11683464050293, "learning_rate": 4.804745449670828e-06, "loss": 0.4435, "step": 49350 }, { "epoch": 0.49, "grad_norm": 10.803749084472656, "learning_rate": 4.804621327216579e-06, "loss": 0.2596, "step": 49375 }, { "epoch": 0.49, "grad_norm": 17.977815628051758, "learning_rate": 4.8044972047623304e-06, "loss": 0.4395, "step": 49400 }, { "epoch": 0.49, "grad_norm": 3.875242233276367, "learning_rate": 4.8043730823080825e-06, "loss": 0.2095, "step": 49425 }, { "epoch": 0.49, "grad_norm": 14.326452255249023, "learning_rate": 4.804248959853834e-06, "loss": 0.351, "step": 49450 }, { "epoch": 0.49, "grad_norm": 10.750043869018555, "learning_rate": 4.804124837399585e-06, "loss": 0.2478, "step": 49475 }, { "epoch": 0.49, "grad_norm": 17.88519287109375, "learning_rate": 4.804000714945337e-06, "loss": 0.4523, "step": 49500 }, { "epoch": 0.49, "grad_norm": 6.570440769195557, "learning_rate": 4.803876592491088e-06, "loss": 0.2082, "step": 49525 }, { "epoch": 0.49, "grad_norm": 14.072911262512207, "learning_rate": 4.80375247003684e-06, "loss": 0.43, "step": 49550 }, { "epoch": 0.49, "grad_norm": 9.478815078735352, "learning_rate": 4.8036283475825915e-06, "loss": 0.2491, "step": 49575 }, { "epoch": 0.49, "grad_norm": 17.358489990234375, "learning_rate": 4.8035042251283435e-06, "loss": 0.4016, "step": 49600 }, { "epoch": 0.49, "grad_norm": 7.194839954376221, "learning_rate": 4.803380102674095e-06, "loss": 0.2328, "step": 49625 }, { "epoch": 0.49, "grad_norm": 18.286073684692383, "learning_rate": 4.803255980219846e-06, "loss": 0.3547, "step": 49650 }, { "epoch": 0.49, "grad_norm": 7.673459053039551, "learning_rate": 4.803131857765598e-06, "loss": 0.2407, "step": 49675 }, { "epoch": 0.49, "grad_norm": 14.918600082397461, "learning_rate": 4.803007735311349e-06, "loss": 0.4095, "step": 49700 }, { "epoch": 0.49, "grad_norm": 9.552680015563965, "learning_rate": 4.8028836128571e-06, "loss": 0.229, "step": 49725 }, { "epoch": 0.49, "grad_norm": 27.635353088378906, "learning_rate": 4.8027594904028525e-06, "loss": 0.445, "step": 49750 }, { "epoch": 0.49, "grad_norm": 18.130273818969727, "learning_rate": 4.802635367948604e-06, "loss": 0.2458, "step": 49775 }, { "epoch": 0.49, "grad_norm": 16.828603744506836, "learning_rate": 4.802511245494355e-06, "loss": 0.4526, "step": 49800 }, { "epoch": 0.49, "grad_norm": 11.712336540222168, "learning_rate": 4.802387123040107e-06, "loss": 0.2513, "step": 49825 }, { "epoch": 0.49, "grad_norm": 21.249357223510742, "learning_rate": 4.802263000585858e-06, "loss": 0.4774, "step": 49850 }, { "epoch": 0.49, "grad_norm": 11.749300956726074, "learning_rate": 4.802138878131609e-06, "loss": 0.2368, "step": 49875 }, { "epoch": 0.49, "grad_norm": 21.056060791015625, "learning_rate": 4.8020147556773614e-06, "loss": 0.4126, "step": 49900 }, { "epoch": 0.49, "grad_norm": 10.702591896057129, "learning_rate": 4.801890633223113e-06, "loss": 0.2144, "step": 49925 }, { "epoch": 0.49, "grad_norm": 18.013139724731445, "learning_rate": 4.801766510768865e-06, "loss": 0.3829, "step": 49950 }, { "epoch": 0.49, "grad_norm": 11.157854080200195, "learning_rate": 4.801642388314616e-06, "loss": 0.213, "step": 49975 }, { "epoch": 0.49, "grad_norm": 18.900390625, "learning_rate": 4.801518265860368e-06, "loss": 0.4632, "step": 50000 }, { "epoch": 0.49, "grad_norm": 9.739688873291016, "learning_rate": 4.801394143406119e-06, "loss": 0.2184, "step": 50025 }, { "epoch": 0.49, "grad_norm": 19.71078109741211, "learning_rate": 4.801270020951871e-06, "loss": 0.4307, "step": 50050 }, { "epoch": 0.49, "grad_norm": 12.190322875976562, "learning_rate": 4.8011458984976225e-06, "loss": 0.2231, "step": 50075 }, { "epoch": 0.49, "grad_norm": 14.92030143737793, "learning_rate": 4.801021776043374e-06, "loss": 0.4224, "step": 50100 }, { "epoch": 0.49, "grad_norm": 7.670648574829102, "learning_rate": 4.800897653589126e-06, "loss": 0.2422, "step": 50125 }, { "epoch": 0.49, "grad_norm": 13.18742847442627, "learning_rate": 4.800773531134877e-06, "loss": 0.4766, "step": 50150 }, { "epoch": 0.49, "grad_norm": 6.627062797546387, "learning_rate": 4.800649408680628e-06, "loss": 0.216, "step": 50175 }, { "epoch": 0.49, "grad_norm": 9.324334144592285, "learning_rate": 4.80052528622638e-06, "loss": 0.4145, "step": 50200 }, { "epoch": 0.49, "grad_norm": 9.579318046569824, "learning_rate": 4.800401163772131e-06, "loss": 0.2395, "step": 50225 }, { "epoch": 0.49, "grad_norm": 8.96864128112793, "learning_rate": 4.800277041317883e-06, "loss": 0.4115, "step": 50250 }, { "epoch": 0.49, "grad_norm": 12.036646842956543, "learning_rate": 4.800152918863635e-06, "loss": 0.2335, "step": 50275 }, { "epoch": 0.49, "grad_norm": 18.622852325439453, "learning_rate": 4.800028796409386e-06, "loss": 0.3807, "step": 50300 }, { "epoch": 0.49, "grad_norm": 8.449419975280762, "learning_rate": 4.799904673955137e-06, "loss": 0.2358, "step": 50325 }, { "epoch": 0.5, "grad_norm": 15.252523422241211, "learning_rate": 4.799780551500889e-06, "loss": 0.4043, "step": 50350 }, { "epoch": 0.5, "grad_norm": 14.015104293823242, "learning_rate": 4.79965642904664e-06, "loss": 0.2442, "step": 50375 }, { "epoch": 0.5, "grad_norm": 18.43930435180664, "learning_rate": 4.7995323065923924e-06, "loss": 0.3979, "step": 50400 }, { "epoch": 0.5, "grad_norm": 13.523040771484375, "learning_rate": 4.799408184138144e-06, "loss": 0.2233, "step": 50425 }, { "epoch": 0.5, "grad_norm": 20.140647888183594, "learning_rate": 4.799284061683896e-06, "loss": 0.4247, "step": 50450 }, { "epoch": 0.5, "grad_norm": 10.546794891357422, "learning_rate": 4.799159939229647e-06, "loss": 0.2271, "step": 50475 }, { "epoch": 0.5, "grad_norm": 14.720381736755371, "learning_rate": 4.799035816775398e-06, "loss": 0.3622, "step": 50500 }, { "epoch": 0.5, "grad_norm": 14.832171440124512, "learning_rate": 4.79891169432115e-06, "loss": 0.1996, "step": 50525 }, { "epoch": 0.5, "grad_norm": 15.983695983886719, "learning_rate": 4.798787571866901e-06, "loss": 0.3827, "step": 50550 }, { "epoch": 0.5, "grad_norm": 8.124323844909668, "learning_rate": 4.798663449412653e-06, "loss": 0.1902, "step": 50575 }, { "epoch": 0.5, "grad_norm": 11.168100357055664, "learning_rate": 4.798539326958405e-06, "loss": 0.4248, "step": 50600 }, { "epoch": 0.5, "grad_norm": 13.07486629486084, "learning_rate": 4.798415204504156e-06, "loss": 0.2476, "step": 50625 }, { "epoch": 0.5, "grad_norm": 15.790860176086426, "learning_rate": 4.798291082049907e-06, "loss": 0.3841, "step": 50650 }, { "epoch": 0.5, "grad_norm": 6.841111183166504, "learning_rate": 4.798166959595659e-06, "loss": 0.2043, "step": 50675 }, { "epoch": 0.5, "grad_norm": 30.636072158813477, "learning_rate": 4.79804283714141e-06, "loss": 0.4474, "step": 50700 }, { "epoch": 0.5, "grad_norm": 10.109424591064453, "learning_rate": 4.797918714687162e-06, "loss": 0.2433, "step": 50725 }, { "epoch": 0.5, "grad_norm": 26.644248962402344, "learning_rate": 4.797794592232914e-06, "loss": 0.4544, "step": 50750 }, { "epoch": 0.5, "grad_norm": 6.405539512634277, "learning_rate": 4.797670469778665e-06, "loss": 0.2496, "step": 50775 }, { "epoch": 0.5, "grad_norm": 15.59921932220459, "learning_rate": 4.797546347324417e-06, "loss": 0.4097, "step": 50800 }, { "epoch": 0.5, "grad_norm": 12.181394577026367, "learning_rate": 4.797422224870168e-06, "loss": 0.2574, "step": 50825 }, { "epoch": 0.5, "grad_norm": 13.617450714111328, "learning_rate": 4.79729810241592e-06, "loss": 0.416, "step": 50850 }, { "epoch": 0.5, "grad_norm": 10.581192970275879, "learning_rate": 4.797173979961671e-06, "loss": 0.2347, "step": 50875 }, { "epoch": 0.5, "grad_norm": 18.103775024414062, "learning_rate": 4.7970498575074234e-06, "loss": 0.456, "step": 50900 }, { "epoch": 0.5, "grad_norm": 10.804397583007812, "learning_rate": 4.796925735053175e-06, "loss": 0.241, "step": 50925 }, { "epoch": 0.5, "grad_norm": Infinity, "learning_rate": 4.796806577497096e-06, "loss": 0.4317, "step": 50950 }, { "epoch": 0.5, "grad_norm": 10.438819885253906, "learning_rate": 4.796682455042848e-06, "loss": 0.2555, "step": 50975 }, { "epoch": 0.5, "grad_norm": 24.852996826171875, "learning_rate": 4.796558332588599e-06, "loss": 0.3897, "step": 51000 }, { "epoch": 0.5, "grad_norm": 11.022123336791992, "learning_rate": 4.79643421013435e-06, "loss": 0.2279, "step": 51025 }, { "epoch": 0.5, "grad_norm": 20.901948928833008, "learning_rate": 4.796310087680102e-06, "loss": 0.4211, "step": 51050 }, { "epoch": 0.5, "grad_norm": 10.348832130432129, "learning_rate": 4.7961859652258534e-06, "loss": 0.2243, "step": 51075 }, { "epoch": 0.5, "grad_norm": 17.659698486328125, "learning_rate": 4.796061842771605e-06, "loss": 0.3941, "step": 51100 }, { "epoch": 0.5, "grad_norm": 11.845423698425293, "learning_rate": 4.795937720317357e-06, "loss": 0.25, "step": 51125 }, { "epoch": 0.5, "grad_norm": 14.38099193572998, "learning_rate": 4.795813597863108e-06, "loss": 0.4139, "step": 51150 }, { "epoch": 0.5, "grad_norm": 9.638529777526855, "learning_rate": 4.795689475408859e-06, "loss": 0.2445, "step": 51175 }, { "epoch": 0.5, "grad_norm": 17.4384822845459, "learning_rate": 4.795565352954611e-06, "loss": 0.4102, "step": 51200 }, { "epoch": 0.5, "grad_norm": 17.024127960205078, "learning_rate": 4.795441230500362e-06, "loss": 0.2493, "step": 51225 }, { "epoch": 0.5, "grad_norm": 20.66971206665039, "learning_rate": 4.7953171080461145e-06, "loss": 0.369, "step": 51250 }, { "epoch": 0.5, "grad_norm": 15.653895378112793, "learning_rate": 4.795192985591866e-06, "loss": 0.2423, "step": 51275 }, { "epoch": 0.5, "grad_norm": 15.439635276794434, "learning_rate": 4.795068863137618e-06, "loss": 0.4593, "step": 51300 }, { "epoch": 0.5, "grad_norm": 1.3981562852859497, "learning_rate": 4.794944740683369e-06, "loss": 0.1977, "step": 51325 }, { "epoch": 0.5, "grad_norm": 14.752333641052246, "learning_rate": 4.794820618229121e-06, "loss": 0.4225, "step": 51350 }, { "epoch": 0.51, "grad_norm": 8.305872917175293, "learning_rate": 4.794696495774872e-06, "loss": 0.2054, "step": 51375 }, { "epoch": 0.51, "grad_norm": 15.526564598083496, "learning_rate": 4.794572373320623e-06, "loss": 0.4556, "step": 51400 }, { "epoch": 0.51, "grad_norm": 5.904850959777832, "learning_rate": 4.7944482508663755e-06, "loss": 0.2659, "step": 51425 }, { "epoch": 0.51, "grad_norm": 14.438019752502441, "learning_rate": 4.794324128412127e-06, "loss": 0.4111, "step": 51450 }, { "epoch": 0.51, "grad_norm": 15.1187105178833, "learning_rate": 4.794200005957878e-06, "loss": 0.2124, "step": 51475 }, { "epoch": 0.51, "grad_norm": 25.17230987548828, "learning_rate": 4.79407588350363e-06, "loss": 0.4369, "step": 51500 }, { "epoch": 0.51, "grad_norm": 9.905746459960938, "learning_rate": 4.793951761049381e-06, "loss": 0.2265, "step": 51525 }, { "epoch": 0.51, "grad_norm": 16.453908920288086, "learning_rate": 4.793827638595132e-06, "loss": 0.4669, "step": 51550 }, { "epoch": 0.51, "grad_norm": 4.082820892333984, "learning_rate": 4.7937035161408844e-06, "loss": 0.1824, "step": 51575 }, { "epoch": 0.51, "grad_norm": 18.008991241455078, "learning_rate": 4.793579393686636e-06, "loss": 0.3715, "step": 51600 }, { "epoch": 0.51, "grad_norm": 17.82843780517578, "learning_rate": 4.793455271232388e-06, "loss": 0.2403, "step": 51625 }, { "epoch": 0.51, "grad_norm": 16.37064552307129, "learning_rate": 4.793331148778139e-06, "loss": 0.4767, "step": 51650 }, { "epoch": 0.51, "grad_norm": 15.30736255645752, "learning_rate": 4.793207026323891e-06, "loss": 0.2339, "step": 51675 }, { "epoch": 0.51, "grad_norm": 17.508472442626953, "learning_rate": 4.793082903869642e-06, "loss": 0.4567, "step": 51700 }, { "epoch": 0.51, "grad_norm": 7.828973770141602, "learning_rate": 4.792958781415394e-06, "loss": 0.2372, "step": 51725 }, { "epoch": 0.51, "grad_norm": 24.03048324584961, "learning_rate": 4.7928346589611455e-06, "loss": 0.4312, "step": 51750 }, { "epoch": 0.51, "grad_norm": 7.786725044250488, "learning_rate": 4.792710536506897e-06, "loss": 0.1861, "step": 51775 }, { "epoch": 0.51, "grad_norm": 11.754634857177734, "learning_rate": 4.792586414052648e-06, "loss": 0.4668, "step": 51800 }, { "epoch": 0.51, "grad_norm": 12.64284610748291, "learning_rate": 4.7924622915984e-06, "loss": 0.2349, "step": 51825 }, { "epoch": 0.51, "grad_norm": 19.00682830810547, "learning_rate": 4.792338169144151e-06, "loss": 0.4451, "step": 51850 }, { "epoch": 0.51, "grad_norm": 15.584421157836914, "learning_rate": 4.792214046689902e-06, "loss": 0.1977, "step": 51875 }, { "epoch": 0.51, "grad_norm": 18.595409393310547, "learning_rate": 4.7920899242356544e-06, "loss": 0.4636, "step": 51900 }, { "epoch": 0.51, "grad_norm": 10.06844711303711, "learning_rate": 4.791965801781406e-06, "loss": 0.2092, "step": 51925 }, { "epoch": 0.51, "grad_norm": 12.74474048614502, "learning_rate": 4.791841679327157e-06, "loss": 0.4003, "step": 51950 }, { "epoch": 0.51, "grad_norm": 16.70965576171875, "learning_rate": 4.791717556872909e-06, "loss": 0.3325, "step": 51975 }, { "epoch": 0.51, "grad_norm": 23.312782287597656, "learning_rate": 4.79159343441866e-06, "loss": 0.3891, "step": 52000 }, { "epoch": 0.51, "grad_norm": 9.323782920837402, "learning_rate": 4.791469311964412e-06, "loss": 0.2282, "step": 52025 }, { "epoch": 0.51, "grad_norm": 16.614076614379883, "learning_rate": 4.791345189510163e-06, "loss": 0.4431, "step": 52050 }, { "epoch": 0.51, "grad_norm": 10.442062377929688, "learning_rate": 4.7912210670559154e-06, "loss": 0.2064, "step": 52075 }, { "epoch": 0.51, "grad_norm": 14.046330451965332, "learning_rate": 4.791096944601667e-06, "loss": 0.4764, "step": 52100 }, { "epoch": 0.51, "grad_norm": 11.205260276794434, "learning_rate": 4.790972822147419e-06, "loss": 0.2329, "step": 52125 }, { "epoch": 0.51, "grad_norm": 16.048952102661133, "learning_rate": 4.79084869969317e-06, "loss": 0.4196, "step": 52150 }, { "epoch": 0.51, "grad_norm": 12.85339069366455, "learning_rate": 4.790724577238921e-06, "loss": 0.2542, "step": 52175 }, { "epoch": 0.51, "grad_norm": 9.752567291259766, "learning_rate": 4.790600454784673e-06, "loss": 0.3859, "step": 52200 }, { "epoch": 0.51, "grad_norm": 8.988015174865723, "learning_rate": 4.790476332330424e-06, "loss": 0.2393, "step": 52225 }, { "epoch": 0.51, "grad_norm": 16.58470344543457, "learning_rate": 4.790352209876176e-06, "loss": 0.4088, "step": 52250 }, { "epoch": 0.51, "grad_norm": 7.6295976638793945, "learning_rate": 4.790228087421928e-06, "loss": 0.208, "step": 52275 }, { "epoch": 0.51, "grad_norm": 15.988007545471191, "learning_rate": 4.790103964967679e-06, "loss": 0.3675, "step": 52300 }, { "epoch": 0.51, "grad_norm": 7.689838409423828, "learning_rate": 4.78997984251343e-06, "loss": 0.2034, "step": 52325 }, { "epoch": 0.51, "grad_norm": 12.480237007141113, "learning_rate": 4.789855720059182e-06, "loss": 0.4227, "step": 52350 }, { "epoch": 0.51, "grad_norm": 10.868673324584961, "learning_rate": 4.789731597604933e-06, "loss": 0.2369, "step": 52375 }, { "epoch": 0.52, "grad_norm": 8.088151931762695, "learning_rate": 4.789607475150685e-06, "loss": 0.2992, "step": 52400 }, { "epoch": 0.52, "grad_norm": 14.464970588684082, "learning_rate": 4.789483352696437e-06, "loss": 0.2235, "step": 52425 }, { "epoch": 0.52, "grad_norm": 16.272645950317383, "learning_rate": 4.789359230242188e-06, "loss": 0.4409, "step": 52450 }, { "epoch": 0.52, "grad_norm": 10.007243156433105, "learning_rate": 4.78923510778794e-06, "loss": 0.2008, "step": 52475 }, { "epoch": 0.52, "grad_norm": 19.715730667114258, "learning_rate": 4.789110985333691e-06, "loss": 0.442, "step": 52500 }, { "epoch": 0.52, "grad_norm": 21.97573471069336, "learning_rate": 4.788986862879443e-06, "loss": 0.2224, "step": 52525 }, { "epoch": 0.52, "grad_norm": 19.11405372619629, "learning_rate": 4.788862740425194e-06, "loss": 0.3911, "step": 52550 }, { "epoch": 0.52, "grad_norm": 7.968319416046143, "learning_rate": 4.7887386179709465e-06, "loss": 0.1852, "step": 52575 }, { "epoch": 0.52, "grad_norm": 15.786819458007812, "learning_rate": 4.788614495516698e-06, "loss": 0.4089, "step": 52600 }, { "epoch": 0.52, "grad_norm": 10.574159622192383, "learning_rate": 4.788490373062449e-06, "loss": 0.2046, "step": 52625 }, { "epoch": 0.52, "grad_norm": 32.28123092651367, "learning_rate": 4.7883662506082e-06, "loss": 0.4515, "step": 52650 }, { "epoch": 0.52, "grad_norm": 16.646873474121094, "learning_rate": 4.788242128153952e-06, "loss": 0.2332, "step": 52675 }, { "epoch": 0.52, "grad_norm": 16.816911697387695, "learning_rate": 4.788118005699703e-06, "loss": 0.4421, "step": 52700 }, { "epoch": 0.52, "grad_norm": 9.945624351501465, "learning_rate": 4.7879938832454546e-06, "loss": 0.242, "step": 52725 }, { "epoch": 0.52, "grad_norm": 20.13434410095215, "learning_rate": 4.787869760791207e-06, "loss": 0.3647, "step": 52750 }, { "epoch": 0.52, "grad_norm": 2.5352513790130615, "learning_rate": 4.787745638336958e-06, "loss": 0.2196, "step": 52775 }, { "epoch": 0.52, "grad_norm": 28.730440139770508, "learning_rate": 4.787621515882709e-06, "loss": 0.3834, "step": 52800 }, { "epoch": 0.52, "grad_norm": 17.572355270385742, "learning_rate": 4.787497393428461e-06, "loss": 0.2046, "step": 52825 }, { "epoch": 0.52, "grad_norm": 18.776458740234375, "learning_rate": 4.787373270974212e-06, "loss": 0.4234, "step": 52850 }, { "epoch": 0.52, "grad_norm": 6.628031253814697, "learning_rate": 4.787249148519964e-06, "loss": 0.2073, "step": 52875 }, { "epoch": 0.52, "grad_norm": 19.06285858154297, "learning_rate": 4.787125026065716e-06, "loss": 0.4133, "step": 52900 }, { "epoch": 0.52, "grad_norm": 13.952075958251953, "learning_rate": 4.787000903611468e-06, "loss": 0.2009, "step": 52925 }, { "epoch": 0.52, "grad_norm": 24.244844436645508, "learning_rate": 4.786876781157219e-06, "loss": 0.3528, "step": 52950 }, { "epoch": 0.52, "grad_norm": 12.88616943359375, "learning_rate": 4.786752658702971e-06, "loss": 0.2111, "step": 52975 }, { "epoch": 0.52, "grad_norm": 7.240854740142822, "learning_rate": 4.786628536248722e-06, "loss": 0.465, "step": 53000 }, { "epoch": 0.52, "grad_norm": 8.364013671875, "learning_rate": 4.786504413794473e-06, "loss": 0.3027, "step": 53025 }, { "epoch": 0.52, "grad_norm": 15.462309837341309, "learning_rate": 4.786380291340225e-06, "loss": 0.449, "step": 53050 }, { "epoch": 0.52, "grad_norm": 9.625609397888184, "learning_rate": 4.786256168885977e-06, "loss": 0.236, "step": 53075 }, { "epoch": 0.52, "grad_norm": Infinity, "learning_rate": 4.7861370113298985e-06, "loss": 0.3793, "step": 53100 }, { "epoch": 0.52, "grad_norm": 8.05178451538086, "learning_rate": 4.78601288887565e-06, "loss": 0.2144, "step": 53125 }, { "epoch": 0.52, "grad_norm": 16.694141387939453, "learning_rate": 4.785888766421401e-06, "loss": 0.4454, "step": 53150 }, { "epoch": 0.52, "grad_norm": 13.00265121459961, "learning_rate": 4.785764643967153e-06, "loss": 0.2234, "step": 53175 }, { "epoch": 0.52, "grad_norm": 16.64579200744629, "learning_rate": 4.785640521512904e-06, "loss": 0.4156, "step": 53200 }, { "epoch": 0.52, "grad_norm": 9.677364349365234, "learning_rate": 4.785516399058655e-06, "loss": 0.2043, "step": 53225 }, { "epoch": 0.52, "grad_norm": 20.395219802856445, "learning_rate": 4.785392276604407e-06, "loss": 0.4528, "step": 53250 }, { "epoch": 0.52, "grad_norm": 10.502971649169922, "learning_rate": 4.785268154150159e-06, "loss": 0.2208, "step": 53275 }, { "epoch": 0.52, "grad_norm": 17.86035919189453, "learning_rate": 4.78514403169591e-06, "loss": 0.4438, "step": 53300 }, { "epoch": 0.52, "grad_norm": 9.377486228942871, "learning_rate": 4.785019909241662e-06, "loss": 0.2496, "step": 53325 }, { "epoch": 0.52, "grad_norm": 14.280081748962402, "learning_rate": 4.784895786787413e-06, "loss": 0.4461, "step": 53350 }, { "epoch": 0.52, "grad_norm": 15.350790023803711, "learning_rate": 4.784776629231335e-06, "loss": 0.2506, "step": 53375 }, { "epoch": 0.53, "grad_norm": 15.426788330078125, "learning_rate": 4.784652506777086e-06, "loss": 0.4313, "step": 53400 }, { "epoch": 0.53, "grad_norm": 11.28499698638916, "learning_rate": 4.784528384322838e-06, "loss": 0.2147, "step": 53425 }, { "epoch": 0.53, "grad_norm": 14.346732139587402, "learning_rate": 4.7844042618685895e-06, "loss": 0.3606, "step": 53450 }, { "epoch": 0.53, "grad_norm": 18.316858291625977, "learning_rate": 4.7842801394143416e-06, "loss": 0.2534, "step": 53475 }, { "epoch": 0.53, "grad_norm": 11.492258071899414, "learning_rate": 4.784156016960093e-06, "loss": 0.4137, "step": 53500 }, { "epoch": 0.53, "grad_norm": 8.616345405578613, "learning_rate": 4.784031894505844e-06, "loss": 0.2253, "step": 53525 }, { "epoch": 0.53, "grad_norm": 22.08232879638672, "learning_rate": 4.783907772051596e-06, "loss": 0.4012, "step": 53550 }, { "epoch": 0.53, "grad_norm": 10.166692733764648, "learning_rate": 4.783783649597347e-06, "loss": 0.2288, "step": 53575 }, { "epoch": 0.53, "grad_norm": 18.577516555786133, "learning_rate": 4.7836595271430985e-06, "loss": 0.4413, "step": 53600 }, { "epoch": 0.53, "grad_norm": 16.092525482177734, "learning_rate": 4.7835354046888505e-06, "loss": 0.2638, "step": 53625 }, { "epoch": 0.53, "grad_norm": 14.373074531555176, "learning_rate": 4.783411282234602e-06, "loss": 0.4706, "step": 53650 }, { "epoch": 0.53, "grad_norm": 10.238460540771484, "learning_rate": 4.783287159780353e-06, "loss": 0.2555, "step": 53675 }, { "epoch": 0.53, "grad_norm": 15.170286178588867, "learning_rate": 4.783163037326105e-06, "loss": 0.4182, "step": 53700 }, { "epoch": 0.53, "grad_norm": 3.793544292449951, "learning_rate": 4.783038914871856e-06, "loss": 0.2292, "step": 53725 }, { "epoch": 0.53, "grad_norm": 21.812618255615234, "learning_rate": 4.782914792417607e-06, "loss": 0.3908, "step": 53750 }, { "epoch": 0.53, "grad_norm": 10.597084999084473, "learning_rate": 4.7827906699633595e-06, "loss": 0.235, "step": 53775 }, { "epoch": 0.53, "grad_norm": 23.199464797973633, "learning_rate": 4.782666547509111e-06, "loss": 0.4525, "step": 53800 }, { "epoch": 0.53, "grad_norm": 14.894556045532227, "learning_rate": 4.782542425054863e-06, "loss": 0.2046, "step": 53825 }, { "epoch": 0.53, "grad_norm": 14.04306697845459, "learning_rate": 4.782418302600614e-06, "loss": 0.4387, "step": 53850 }, { "epoch": 0.53, "grad_norm": 8.451552391052246, "learning_rate": 4.782294180146366e-06, "loss": 0.2143, "step": 53875 }, { "epoch": 0.53, "grad_norm": 19.336517333984375, "learning_rate": 4.782170057692117e-06, "loss": 0.4577, "step": 53900 }, { "epoch": 0.53, "grad_norm": 10.927696228027344, "learning_rate": 4.7820459352378684e-06, "loss": 0.2914, "step": 53925 }, { "epoch": 0.53, "grad_norm": 18.993873596191406, "learning_rate": 4.7819218127836205e-06, "loss": 0.3986, "step": 53950 }, { "epoch": 0.53, "grad_norm": 6.630980014801025, "learning_rate": 4.781797690329372e-06, "loss": 0.2087, "step": 53975 }, { "epoch": 0.53, "grad_norm": 13.28389835357666, "learning_rate": 4.781673567875123e-06, "loss": 0.4193, "step": 54000 }, { "epoch": 0.53, "grad_norm": 16.743131637573242, "learning_rate": 4.781549445420875e-06, "loss": 0.2422, "step": 54025 }, { "epoch": 0.53, "grad_norm": 17.4419002532959, "learning_rate": 4.781425322966626e-06, "loss": 0.4212, "step": 54050 }, { "epoch": 0.53, "grad_norm": 11.643098831176758, "learning_rate": 4.781301200512377e-06, "loss": 0.2091, "step": 54075 }, { "epoch": 0.53, "grad_norm": 6.141153812408447, "learning_rate": 4.7811770780581295e-06, "loss": 0.3619, "step": 54100 }, { "epoch": 0.53, "grad_norm": 9.106982231140137, "learning_rate": 4.781052955603881e-06, "loss": 0.2264, "step": 54125 }, { "epoch": 0.53, "grad_norm": 23.963510513305664, "learning_rate": 4.780928833149632e-06, "loss": 0.4158, "step": 54150 }, { "epoch": 0.53, "grad_norm": 8.744720458984375, "learning_rate": 4.780804710695384e-06, "loss": 0.2154, "step": 54175 }, { "epoch": 0.53, "grad_norm": 5.760705471038818, "learning_rate": 4.780680588241135e-06, "loss": 0.3815, "step": 54200 }, { "epoch": 0.53, "grad_norm": 5.84199857711792, "learning_rate": 4.780556465786887e-06, "loss": 0.2006, "step": 54225 }, { "epoch": 0.53, "grad_norm": 17.5378360748291, "learning_rate": 4.7804323433326384e-06, "loss": 0.4628, "step": 54250 }, { "epoch": 0.53, "grad_norm": 15.240996360778809, "learning_rate": 4.7803082208783905e-06, "loss": 0.1962, "step": 54275 }, { "epoch": 0.53, "grad_norm": 18.91693878173828, "learning_rate": 4.780184098424142e-06, "loss": 0.3567, "step": 54300 }, { "epoch": 0.53, "grad_norm": 11.046673774719238, "learning_rate": 4.780059975969894e-06, "loss": 0.2139, "step": 54325 }, { "epoch": 0.53, "grad_norm": 40.237693786621094, "learning_rate": 4.779935853515645e-06, "loss": 0.4371, "step": 54350 }, { "epoch": 0.53, "grad_norm": 14.865102767944336, "learning_rate": 4.779811731061396e-06, "loss": 0.2605, "step": 54375 }, { "epoch": 0.53, "grad_norm": 14.3568696975708, "learning_rate": 4.779687608607148e-06, "loss": 0.4568, "step": 54400 }, { "epoch": 0.54, "grad_norm": 9.773797988891602, "learning_rate": 4.7795634861528995e-06, "loss": 0.2427, "step": 54425 }, { "epoch": 0.54, "grad_norm": 28.386571884155273, "learning_rate": 4.779439363698651e-06, "loss": 0.4043, "step": 54450 }, { "epoch": 0.54, "grad_norm": 10.571715354919434, "learning_rate": 4.779315241244403e-06, "loss": 0.2174, "step": 54475 }, { "epoch": 0.54, "grad_norm": 20.708080291748047, "learning_rate": 4.779191118790154e-06, "loss": 0.3403, "step": 54500 }, { "epoch": 0.54, "grad_norm": 11.296270370483398, "learning_rate": 4.779066996335905e-06, "loss": 0.2434, "step": 54525 }, { "epoch": 0.54, "grad_norm": 17.842832565307617, "learning_rate": 4.778942873881657e-06, "loss": 0.4026, "step": 54550 }, { "epoch": 0.54, "grad_norm": 6.478601455688477, "learning_rate": 4.778818751427408e-06, "loss": 0.2258, "step": 54575 }, { "epoch": 0.54, "grad_norm": 22.868471145629883, "learning_rate": 4.7786946289731605e-06, "loss": 0.4402, "step": 54600 }, { "epoch": 0.54, "grad_norm": 9.327811241149902, "learning_rate": 4.778570506518912e-06, "loss": 0.2303, "step": 54625 }, { "epoch": 0.54, "grad_norm": 22.10325050354004, "learning_rate": 4.778446384064664e-06, "loss": 0.4338, "step": 54650 }, { "epoch": 0.54, "grad_norm": 6.920901298522949, "learning_rate": 4.778322261610415e-06, "loss": 0.2242, "step": 54675 }, { "epoch": 0.54, "grad_norm": 18.5985107421875, "learning_rate": 4.778198139156166e-06, "loss": 0.4423, "step": 54700 }, { "epoch": 0.54, "grad_norm": 11.07788372039795, "learning_rate": 4.778074016701918e-06, "loss": 0.2106, "step": 54725 }, { "epoch": 0.54, "grad_norm": 22.674293518066406, "learning_rate": 4.7779498942476694e-06, "loss": 0.3943, "step": 54750 }, { "epoch": 0.54, "grad_norm": 15.085087776184082, "learning_rate": 4.777825771793421e-06, "loss": 0.207, "step": 54775 }, { "epoch": 0.54, "grad_norm": 16.83119010925293, "learning_rate": 4.777701649339173e-06, "loss": 0.3702, "step": 54800 }, { "epoch": 0.54, "grad_norm": 8.94113540649414, "learning_rate": 4.777577526884924e-06, "loss": 0.1861, "step": 54825 }, { "epoch": 0.54, "grad_norm": 14.661944389343262, "learning_rate": 4.777453404430675e-06, "loss": 0.4214, "step": 54850 }, { "epoch": 0.54, "grad_norm": 8.220213890075684, "learning_rate": 4.777329281976427e-06, "loss": 0.1828, "step": 54875 }, { "epoch": 0.54, "grad_norm": 15.602405548095703, "learning_rate": 4.777205159522178e-06, "loss": 0.4276, "step": 54900 }, { "epoch": 0.54, "grad_norm": 11.378352165222168, "learning_rate": 4.77708103706793e-06, "loss": 0.239, "step": 54925 }, { "epoch": 0.54, "grad_norm": 11.00639820098877, "learning_rate": 4.776956914613682e-06, "loss": 0.4353, "step": 54950 }, { "epoch": 0.54, "grad_norm": 13.873875617980957, "learning_rate": 4.776832792159433e-06, "loss": 0.2489, "step": 54975 }, { "epoch": 0.54, "grad_norm": 17.0640926361084, "learning_rate": 4.776708669705185e-06, "loss": 0.4951, "step": 55000 }, { "epoch": 0.54, "grad_norm": 8.846919059753418, "learning_rate": 4.776584547250936e-06, "loss": 0.2077, "step": 55025 }, { "epoch": 0.54, "grad_norm": 13.540767669677734, "learning_rate": 4.776460424796688e-06, "loss": 0.4126, "step": 55050 }, { "epoch": 0.54, "grad_norm": 5.712777614593506, "learning_rate": 4.776336302342439e-06, "loss": 0.2152, "step": 55075 }, { "epoch": 0.54, "grad_norm": 17.819944381713867, "learning_rate": 4.7762121798881915e-06, "loss": 0.4503, "step": 55100 }, { "epoch": 0.54, "grad_norm": 11.011091232299805, "learning_rate": 4.776088057433943e-06, "loss": 0.1789, "step": 55125 }, { "epoch": 0.54, "grad_norm": 7.815525054931641, "learning_rate": 4.775963934979694e-06, "loss": 0.4113, "step": 55150 }, { "epoch": 0.54, "grad_norm": 12.799171447753906, "learning_rate": 4.775839812525446e-06, "loss": 0.2048, "step": 55175 }, { "epoch": 0.54, "grad_norm": 17.79448699951172, "learning_rate": 4.775715690071197e-06, "loss": 0.3902, "step": 55200 }, { "epoch": 0.54, "grad_norm": 4.026257038116455, "learning_rate": 4.775591567616948e-06, "loss": 0.1753, "step": 55225 }, { "epoch": 0.54, "grad_norm": 19.195655822753906, "learning_rate": 4.7754674451627004e-06, "loss": 0.4035, "step": 55250 }, { "epoch": 0.54, "grad_norm": 5.539183139801025, "learning_rate": 4.775343322708452e-06, "loss": 0.2487, "step": 55275 }, { "epoch": 0.54, "grad_norm": 21.690034866333008, "learning_rate": 4.775219200254203e-06, "loss": 0.433, "step": 55300 }, { "epoch": 0.54, "grad_norm": 6.734740734100342, "learning_rate": 4.775095077799955e-06, "loss": 0.2729, "step": 55325 }, { "epoch": 0.54, "grad_norm": 21.499731063842773, "learning_rate": 4.774970955345706e-06, "loss": 0.4318, "step": 55350 }, { "epoch": 0.54, "grad_norm": 10.582046508789062, "learning_rate": 4.774846832891457e-06, "loss": 0.1965, "step": 55375 }, { "epoch": 0.54, "grad_norm": 20.409528732299805, "learning_rate": 4.774722710437209e-06, "loss": 0.393, "step": 55400 }, { "epoch": 0.54, "grad_norm": 12.908782958984375, "learning_rate": 4.774598587982961e-06, "loss": 0.228, "step": 55425 }, { "epoch": 0.55, "grad_norm": 16.880727767944336, "learning_rate": 4.774474465528713e-06, "loss": 0.4773, "step": 55450 }, { "epoch": 0.55, "grad_norm": 6.678183078765869, "learning_rate": 4.774350343074464e-06, "loss": 0.1948, "step": 55475 }, { "epoch": 0.55, "grad_norm": 14.351598739624023, "learning_rate": 4.774226220620216e-06, "loss": 0.4337, "step": 55500 }, { "epoch": 0.55, "grad_norm": 13.915329933166504, "learning_rate": 4.774102098165967e-06, "loss": 0.2185, "step": 55525 }, { "epoch": 0.55, "grad_norm": 7.6373491287231445, "learning_rate": 4.773977975711718e-06, "loss": 0.4365, "step": 55550 }, { "epoch": 0.55, "grad_norm": 11.219853401184082, "learning_rate": 4.7738538532574704e-06, "loss": 0.2321, "step": 55575 }, { "epoch": 0.55, "grad_norm": 12.770904541015625, "learning_rate": 4.773729730803222e-06, "loss": 0.4464, "step": 55600 }, { "epoch": 0.55, "grad_norm": 11.847146987915039, "learning_rate": 4.773605608348973e-06, "loss": 0.2065, "step": 55625 }, { "epoch": 0.55, "grad_norm": 16.938188552856445, "learning_rate": 4.773481485894725e-06, "loss": 0.4392, "step": 55650 }, { "epoch": 0.55, "grad_norm": 9.18773078918457, "learning_rate": 4.773357363440476e-06, "loss": 0.2097, "step": 55675 }, { "epoch": 0.55, "grad_norm": 23.321687698364258, "learning_rate": 4.773233240986227e-06, "loss": 0.4556, "step": 55700 }, { "epoch": 0.55, "grad_norm": 6.6307830810546875, "learning_rate": 4.773109118531979e-06, "loss": 0.2123, "step": 55725 }, { "epoch": 0.55, "grad_norm": 14.465829849243164, "learning_rate": 4.772984996077731e-06, "loss": 0.4291, "step": 55750 }, { "epoch": 0.55, "grad_norm": 13.030717849731445, "learning_rate": 4.772860873623482e-06, "loss": 0.2584, "step": 55775 }, { "epoch": 0.55, "grad_norm": 21.097917556762695, "learning_rate": 4.772736751169234e-06, "loss": 0.397, "step": 55800 }, { "epoch": 0.55, "grad_norm": 9.387396812438965, "learning_rate": 4.772612628714985e-06, "loss": 0.2444, "step": 55825 }, { "epoch": 0.55, "grad_norm": 19.224485397338867, "learning_rate": 4.772488506260737e-06, "loss": 0.4183, "step": 55850 }, { "epoch": 0.55, "grad_norm": 13.201446533203125, "learning_rate": 4.772364383806488e-06, "loss": 0.2173, "step": 55875 }, { "epoch": 0.55, "grad_norm": 15.756895065307617, "learning_rate": 4.77224026135224e-06, "loss": 0.4505, "step": 55900 }, { "epoch": 0.55, "grad_norm": 5.881233215332031, "learning_rate": 4.772116138897992e-06, "loss": 0.1959, "step": 55925 }, { "epoch": 0.55, "grad_norm": 18.546092987060547, "learning_rate": 4.771992016443744e-06, "loss": 0.4191, "step": 55950 }, { "epoch": 0.55, "grad_norm": 9.350736618041992, "learning_rate": 4.771867893989495e-06, "loss": 0.2599, "step": 55975 }, { "epoch": 0.55, "grad_norm": 18.50588035583496, "learning_rate": 4.771743771535246e-06, "loss": 0.4548, "step": 56000 }, { "epoch": 0.55, "grad_norm": 12.430026054382324, "learning_rate": 4.771619649080998e-06, "loss": 0.2565, "step": 56025 }, { "epoch": 0.55, "grad_norm": 18.0157470703125, "learning_rate": 4.771495526626749e-06, "loss": 0.4593, "step": 56050 }, { "epoch": 0.55, "grad_norm": 10.108752250671387, "learning_rate": 4.771371404172501e-06, "loss": 0.2579, "step": 56075 }, { "epoch": 0.55, "grad_norm": 13.760272979736328, "learning_rate": 4.771247281718253e-06, "loss": 0.3967, "step": 56100 }, { "epoch": 0.55, "grad_norm": 12.463024139404297, "learning_rate": 4.771123159264004e-06, "loss": 0.2122, "step": 56125 }, { "epoch": 0.55, "grad_norm": 15.63858699798584, "learning_rate": 4.770999036809755e-06, "loss": 0.4137, "step": 56150 }, { "epoch": 0.55, "grad_norm": 12.228962898254395, "learning_rate": 4.770874914355507e-06, "loss": 0.2159, "step": 56175 }, { "epoch": 0.55, "grad_norm": 17.34147834777832, "learning_rate": 4.770750791901258e-06, "loss": 0.3968, "step": 56200 }, { "epoch": 0.55, "grad_norm": 10.526463508605957, "learning_rate": 4.7706266694470095e-06, "loss": 0.2811, "step": 56225 }, { "epoch": 0.55, "grad_norm": 20.76658058166504, "learning_rate": 4.770502546992762e-06, "loss": 0.3824, "step": 56250 }, { "epoch": 0.55, "grad_norm": 12.348419189453125, "learning_rate": 4.770378424538513e-06, "loss": 0.2221, "step": 56275 }, { "epoch": 0.55, "grad_norm": 15.738744735717773, "learning_rate": 4.770254302084265e-06, "loss": 0.377, "step": 56300 }, { "epoch": 0.55, "grad_norm": 7.474469184875488, "learning_rate": 4.770130179630016e-06, "loss": 0.2135, "step": 56325 }, { "epoch": 0.55, "grad_norm": 23.05022430419922, "learning_rate": 4.770006057175768e-06, "loss": 0.409, "step": 56350 }, { "epoch": 0.55, "grad_norm": 4.929444313049316, "learning_rate": 4.769881934721519e-06, "loss": 0.1929, "step": 56375 }, { "epoch": 0.55, "grad_norm": 18.446186065673828, "learning_rate": 4.7697578122672706e-06, "loss": 0.4383, "step": 56400 }, { "epoch": 0.55, "grad_norm": 14.183939933776855, "learning_rate": 4.769633689813023e-06, "loss": 0.2405, "step": 56425 }, { "epoch": 0.56, "grad_norm": 13.562124252319336, "learning_rate": 4.769509567358774e-06, "loss": 0.4503, "step": 56450 }, { "epoch": 0.56, "grad_norm": 9.567537307739258, "learning_rate": 4.769385444904525e-06, "loss": 0.2272, "step": 56475 }, { "epoch": 0.56, "grad_norm": 13.744348526000977, "learning_rate": 4.769261322450277e-06, "loss": 0.3918, "step": 56500 }, { "epoch": 0.56, "grad_norm": 13.378652572631836, "learning_rate": 4.769137199996028e-06, "loss": 0.2089, "step": 56525 }, { "epoch": 0.56, "grad_norm": 10.431779861450195, "learning_rate": 4.7690130775417795e-06, "loss": 0.4291, "step": 56550 }, { "epoch": 0.56, "grad_norm": 11.791868209838867, "learning_rate": 4.768888955087532e-06, "loss": 0.2596, "step": 56575 }, { "epoch": 0.56, "grad_norm": 27.364055633544922, "learning_rate": 4.768764832633283e-06, "loss": 0.4389, "step": 56600 }, { "epoch": 0.56, "grad_norm": 7.347774028778076, "learning_rate": 4.768640710179034e-06, "loss": 0.2302, "step": 56625 }, { "epoch": 0.56, "grad_norm": 18.817108154296875, "learning_rate": 4.768516587724786e-06, "loss": 0.4109, "step": 56650 }, { "epoch": 0.56, "grad_norm": 9.216314315795898, "learning_rate": 4.768392465270537e-06, "loss": 0.2193, "step": 56675 }, { "epoch": 0.56, "grad_norm": 14.018813133239746, "learning_rate": 4.768268342816289e-06, "loss": 0.3644, "step": 56700 }, { "epoch": 0.56, "grad_norm": 7.726538181304932, "learning_rate": 4.7681442203620406e-06, "loss": 0.2493, "step": 56725 }, { "epoch": 0.56, "grad_norm": 7.775447368621826, "learning_rate": 4.768020097907793e-06, "loss": 0.3987, "step": 56750 }, { "epoch": 0.56, "grad_norm": 10.399242401123047, "learning_rate": 4.767895975453544e-06, "loss": 0.1917, "step": 56775 }, { "epoch": 0.56, "grad_norm": 23.02768898010254, "learning_rate": 4.767771852999296e-06, "loss": 0.4125, "step": 56800 }, { "epoch": 0.56, "grad_norm": 8.825227737426758, "learning_rate": 4.767647730545047e-06, "loss": 0.2085, "step": 56825 }, { "epoch": 0.56, "grad_norm": 13.467435836791992, "learning_rate": 4.767523608090798e-06, "loss": 0.4237, "step": 56850 }, { "epoch": 0.56, "grad_norm": 5.948598384857178, "learning_rate": 4.76739948563655e-06, "loss": 0.2281, "step": 56875 }, { "epoch": 0.56, "grad_norm": 20.783170700073242, "learning_rate": 4.7672753631823016e-06, "loss": 0.3872, "step": 56900 }, { "epoch": 0.56, "grad_norm": 7.466240882873535, "learning_rate": 4.767151240728053e-06, "loss": 0.2603, "step": 56925 }, { "epoch": 0.56, "grad_norm": 14.445937156677246, "learning_rate": 4.767027118273805e-06, "loss": 0.3998, "step": 56950 }, { "epoch": 0.56, "grad_norm": 6.034465312957764, "learning_rate": 4.766902995819556e-06, "loss": 0.2168, "step": 56975 }, { "epoch": 0.56, "grad_norm": 20.520450592041016, "learning_rate": 4.766778873365307e-06, "loss": 0.4343, "step": 57000 }, { "epoch": 0.56, "grad_norm": 14.583243370056152, "learning_rate": 4.766654750911059e-06, "loss": 0.2382, "step": 57025 }, { "epoch": 0.56, "grad_norm": 18.638263702392578, "learning_rate": 4.7665306284568105e-06, "loss": 0.4927, "step": 57050 }, { "epoch": 0.56, "grad_norm": 2.401033401489258, "learning_rate": 4.766406506002562e-06, "loss": 0.1703, "step": 57075 }, { "epoch": 0.56, "grad_norm": 16.432432174682617, "learning_rate": 4.766282383548314e-06, "loss": 0.4945, "step": 57100 }, { "epoch": 0.56, "grad_norm": 7.460359573364258, "learning_rate": 4.766158261094065e-06, "loss": 0.2267, "step": 57125 }, { "epoch": 0.56, "grad_norm": 13.278204917907715, "learning_rate": 4.766034138639817e-06, "loss": 0.4209, "step": 57150 }, { "epoch": 0.56, "grad_norm": 6.045666694641113, "learning_rate": 4.765910016185568e-06, "loss": 0.2423, "step": 57175 }, { "epoch": 0.56, "grad_norm": 14.884090423583984, "learning_rate": 4.76578589373132e-06, "loss": 0.4619, "step": 57200 }, { "epoch": 0.56, "grad_norm": 5.065629005432129, "learning_rate": 4.7656617712770716e-06, "loss": 0.2664, "step": 57225 }, { "epoch": 0.56, "grad_norm": 58.35805892944336, "learning_rate": 4.765537648822823e-06, "loss": 0.4566, "step": 57250 }, { "epoch": 0.56, "grad_norm": 8.666756629943848, "learning_rate": 4.765413526368575e-06, "loss": 0.2193, "step": 57275 }, { "epoch": 0.56, "grad_norm": 15.7117280960083, "learning_rate": 4.765289403914326e-06, "loss": 0.3891, "step": 57300 }, { "epoch": 0.56, "grad_norm": 13.679795265197754, "learning_rate": 4.765165281460077e-06, "loss": 0.2643, "step": 57325 }, { "epoch": 0.56, "grad_norm": 18.468109130859375, "learning_rate": 4.765041159005829e-06, "loss": 0.4038, "step": 57350 }, { "epoch": 0.56, "grad_norm": 9.057064056396484, "learning_rate": 4.7649170365515805e-06, "loss": 0.214, "step": 57375 }, { "epoch": 0.56, "grad_norm": 20.5722599029541, "learning_rate": 4.764792914097332e-06, "loss": 0.4323, "step": 57400 }, { "epoch": 0.56, "grad_norm": 8.391589164733887, "learning_rate": 4.764668791643084e-06, "loss": 0.2262, "step": 57425 }, { "epoch": 0.56, "grad_norm": 14.776695251464844, "learning_rate": 4.764544669188835e-06, "loss": 0.3956, "step": 57450 }, { "epoch": 0.57, "grad_norm": 9.95456600189209, "learning_rate": 4.764420546734587e-06, "loss": 0.2435, "step": 57475 }, { "epoch": 0.57, "grad_norm": 13.143842697143555, "learning_rate": 4.764296424280338e-06, "loss": 0.4185, "step": 57500 }, { "epoch": 0.57, "grad_norm": 10.528943061828613, "learning_rate": 4.76417230182609e-06, "loss": 0.2305, "step": 57525 }, { "epoch": 0.57, "grad_norm": Infinity, "learning_rate": 4.764053144270011e-06, "loss": 0.4109, "step": 57550 }, { "epoch": 0.57, "grad_norm": 9.395804405212402, "learning_rate": 4.763929021815763e-06, "loss": 0.2106, "step": 57575 }, { "epoch": 0.57, "grad_norm": 24.186260223388672, "learning_rate": 4.763804899361515e-06, "loss": 0.4214, "step": 57600 }, { "epoch": 0.57, "grad_norm": 10.689339637756348, "learning_rate": 4.763680776907267e-06, "loss": 0.2291, "step": 57625 }, { "epoch": 0.57, "grad_norm": 21.28446388244629, "learning_rate": 4.763556654453018e-06, "loss": 0.3508, "step": 57650 }, { "epoch": 0.57, "grad_norm": 6.8825907707214355, "learning_rate": 4.763432531998769e-06, "loss": 0.2251, "step": 57675 }, { "epoch": 0.57, "grad_norm": 13.486443519592285, "learning_rate": 4.763308409544521e-06, "loss": 0.4273, "step": 57700 }, { "epoch": 0.57, "grad_norm": 5.617632865905762, "learning_rate": 4.763184287090272e-06, "loss": 0.2118, "step": 57725 }, { "epoch": 0.57, "grad_norm": 10.623687744140625, "learning_rate": 4.763060164636024e-06, "loss": 0.458, "step": 57750 }, { "epoch": 0.57, "grad_norm": 10.007128715515137, "learning_rate": 4.762936042181775e-06, "loss": 0.2621, "step": 57775 }, { "epoch": 0.57, "grad_norm": 20.230331420898438, "learning_rate": 4.762811919727527e-06, "loss": 0.4427, "step": 57800 }, { "epoch": 0.57, "grad_norm": 10.022074699401855, "learning_rate": 4.762687797273278e-06, "loss": 0.1956, "step": 57825 }, { "epoch": 0.57, "grad_norm": 19.958532333374023, "learning_rate": 4.762563674819029e-06, "loss": 0.3438, "step": 57850 }, { "epoch": 0.57, "grad_norm": 6.69801664352417, "learning_rate": 4.762439552364781e-06, "loss": 0.2236, "step": 57875 }, { "epoch": 0.57, "grad_norm": 14.450757026672363, "learning_rate": 4.7623154299105326e-06, "loss": 0.3878, "step": 57900 }, { "epoch": 0.57, "grad_norm": 9.337735176086426, "learning_rate": 4.762191307456285e-06, "loss": 0.2291, "step": 57925 }, { "epoch": 0.57, "grad_norm": 18.479961395263672, "learning_rate": 4.762067185002036e-06, "loss": 0.4941, "step": 57950 }, { "epoch": 0.57, "grad_norm": 6.081964015960693, "learning_rate": 4.761943062547788e-06, "loss": 0.2364, "step": 57975 }, { "epoch": 0.57, "grad_norm": 23.750856399536133, "learning_rate": 4.761818940093539e-06, "loss": 0.4347, "step": 58000 }, { "epoch": 0.57, "grad_norm": 13.492725372314453, "learning_rate": 4.761694817639291e-06, "loss": 0.2266, "step": 58025 }, { "epoch": 0.57, "grad_norm": 17.991546630859375, "learning_rate": 4.761570695185042e-06, "loss": 0.424, "step": 58050 }, { "epoch": 0.57, "grad_norm": 9.506221771240234, "learning_rate": 4.7614465727307936e-06, "loss": 0.2561, "step": 58075 }, { "epoch": 0.57, "grad_norm": 14.080658912658691, "learning_rate": 4.761322450276546e-06, "loss": 0.4762, "step": 58100 }, { "epoch": 0.57, "grad_norm": 5.719743251800537, "learning_rate": 4.761198327822297e-06, "loss": 0.2271, "step": 58125 }, { "epoch": 0.57, "grad_norm": 25.432764053344727, "learning_rate": 4.761074205368048e-06, "loss": 0.4127, "step": 58150 }, { "epoch": 0.57, "grad_norm": 11.428235054016113, "learning_rate": 4.7609500829138e-06, "loss": 0.2411, "step": 58175 }, { "epoch": 0.57, "grad_norm": 18.575679779052734, "learning_rate": 4.760825960459551e-06, "loss": 0.3777, "step": 58200 }, { "epoch": 0.57, "grad_norm": 41.338314056396484, "learning_rate": 4.7607018380053025e-06, "loss": 0.2108, "step": 58225 }, { "epoch": 0.57, "grad_norm": 14.578934669494629, "learning_rate": 4.760577715551055e-06, "loss": 0.4146, "step": 58250 }, { "epoch": 0.57, "grad_norm": 13.561258316040039, "learning_rate": 4.760453593096806e-06, "loss": 0.2576, "step": 58275 }, { "epoch": 0.57, "grad_norm": 16.0782527923584, "learning_rate": 4.760329470642557e-06, "loss": 0.3834, "step": 58300 }, { "epoch": 0.57, "grad_norm": 14.577035903930664, "learning_rate": 4.760205348188309e-06, "loss": 0.2459, "step": 58325 }, { "epoch": 0.57, "grad_norm": 11.33894157409668, "learning_rate": 4.76008122573406e-06, "loss": 0.4254, "step": 58350 }, { "epoch": 0.57, "grad_norm": 9.455550193786621, "learning_rate": 4.759957103279812e-06, "loss": 0.1933, "step": 58375 }, { "epoch": 0.57, "grad_norm": 14.198629379272461, "learning_rate": 4.7598329808255636e-06, "loss": 0.437, "step": 58400 }, { "epoch": 0.57, "grad_norm": 10.50509262084961, "learning_rate": 4.759708858371316e-06, "loss": 0.2204, "step": 58425 }, { "epoch": 0.57, "grad_norm": 14.116068840026855, "learning_rate": 4.759584735917067e-06, "loss": 0.3441, "step": 58450 }, { "epoch": 0.57, "grad_norm": 7.4643964767456055, "learning_rate": 4.759460613462819e-06, "loss": 0.2204, "step": 58475 }, { "epoch": 0.58, "grad_norm": 17.915571212768555, "learning_rate": 4.75933649100857e-06, "loss": 0.3329, "step": 58500 }, { "epoch": 0.58, "grad_norm": 5.0501708984375, "learning_rate": 4.759212368554321e-06, "loss": 0.1835, "step": 58525 }, { "epoch": 0.58, "grad_norm": 22.188539505004883, "learning_rate": 4.759088246100073e-06, "loss": 0.4302, "step": 58550 }, { "epoch": 0.58, "grad_norm": 7.416275501251221, "learning_rate": 4.758964123645825e-06, "loss": 0.2267, "step": 58575 }, { "epoch": 0.58, "grad_norm": 18.06610107421875, "learning_rate": 4.758840001191576e-06, "loss": 0.348, "step": 58600 }, { "epoch": 0.58, "grad_norm": 7.032206058502197, "learning_rate": 4.758715878737327e-06, "loss": 0.2415, "step": 58625 }, { "epoch": 0.58, "grad_norm": 18.810218811035156, "learning_rate": 4.758591756283079e-06, "loss": 0.4381, "step": 58650 }, { "epoch": 0.58, "grad_norm": 9.98550033569336, "learning_rate": 4.75846763382883e-06, "loss": 0.2784, "step": 58675 }, { "epoch": 0.58, "grad_norm": 13.62476921081543, "learning_rate": 4.7583435113745815e-06, "loss": 0.3694, "step": 58700 }, { "epoch": 0.58, "grad_norm": 12.145069122314453, "learning_rate": 4.7582193889203335e-06, "loss": 0.2934, "step": 58725 }, { "epoch": 0.58, "grad_norm": 23.068557739257812, "learning_rate": 4.758095266466085e-06, "loss": 0.3528, "step": 58750 }, { "epoch": 0.58, "grad_norm": 7.787389755249023, "learning_rate": 4.757971144011837e-06, "loss": 0.1924, "step": 58775 }, { "epoch": 0.58, "grad_norm": 15.31666088104248, "learning_rate": 4.757847021557588e-06, "loss": 0.3775, "step": 58800 }, { "epoch": 0.58, "grad_norm": 13.573016166687012, "learning_rate": 4.75772289910334e-06, "loss": 0.2765, "step": 58825 }, { "epoch": 0.58, "grad_norm": 17.328908920288086, "learning_rate": 4.757598776649091e-06, "loss": 0.372, "step": 58850 }, { "epoch": 0.58, "grad_norm": 11.29321002960205, "learning_rate": 4.757474654194843e-06, "loss": 0.2108, "step": 58875 }, { "epoch": 0.58, "grad_norm": 23.48529052734375, "learning_rate": 4.7573505317405946e-06, "loss": 0.4305, "step": 58900 }, { "epoch": 0.58, "grad_norm": 10.186408042907715, "learning_rate": 4.757226409286346e-06, "loss": 0.1997, "step": 58925 }, { "epoch": 0.58, "grad_norm": 17.35993194580078, "learning_rate": 4.757102286832098e-06, "loss": 0.4294, "step": 58950 }, { "epoch": 0.58, "grad_norm": 10.656084060668945, "learning_rate": 4.756978164377849e-06, "loss": 0.231, "step": 58975 }, { "epoch": 0.58, "grad_norm": 21.124326705932617, "learning_rate": 4.7568540419236e-06, "loss": 0.3759, "step": 59000 }, { "epoch": 0.58, "grad_norm": 14.794957160949707, "learning_rate": 4.756729919469352e-06, "loss": 0.272, "step": 59025 }, { "epoch": 0.58, "grad_norm": 18.00164031982422, "learning_rate": 4.7566057970151035e-06, "loss": 0.3894, "step": 59050 }, { "epoch": 0.58, "grad_norm": 2.7005157470703125, "learning_rate": 4.756481674560855e-06, "loss": 0.1774, "step": 59075 }, { "epoch": 0.58, "grad_norm": 18.979305267333984, "learning_rate": 4.756357552106607e-06, "loss": 0.4245, "step": 59100 }, { "epoch": 0.58, "grad_norm": 3.666425943374634, "learning_rate": 4.756233429652358e-06, "loss": 0.2098, "step": 59125 }, { "epoch": 0.58, "grad_norm": 16.293460845947266, "learning_rate": 4.756109307198109e-06, "loss": 0.435, "step": 59150 }, { "epoch": 0.58, "grad_norm": 7.528684139251709, "learning_rate": 4.755985184743861e-06, "loss": 0.2553, "step": 59175 }, { "epoch": 0.58, "grad_norm": 5.268284320831299, "learning_rate": 4.7558610622896125e-06, "loss": 0.3868, "step": 59200 }, { "epoch": 0.58, "grad_norm": 8.206559181213379, "learning_rate": 4.7557369398353645e-06, "loss": 0.2441, "step": 59225 }, { "epoch": 0.58, "grad_norm": 15.437331199645996, "learning_rate": 4.755612817381116e-06, "loss": 0.4147, "step": 59250 }, { "epoch": 0.58, "grad_norm": 9.899516105651855, "learning_rate": 4.755488694926868e-06, "loss": 0.2429, "step": 59275 }, { "epoch": 0.58, "grad_norm": 19.58512306213379, "learning_rate": 4.755364572472619e-06, "loss": 0.4036, "step": 59300 }, { "epoch": 0.58, "grad_norm": 8.869194984436035, "learning_rate": 4.755240450018371e-06, "loss": 0.231, "step": 59325 }, { "epoch": 0.58, "grad_norm": 17.456069946289062, "learning_rate": 4.755116327564122e-06, "loss": 0.4227, "step": 59350 }, { "epoch": 0.58, "grad_norm": 13.695860862731934, "learning_rate": 4.7549922051098735e-06, "loss": 0.2432, "step": 59375 }, { "epoch": 0.58, "grad_norm": 19.87813949584961, "learning_rate": 4.7548680826556256e-06, "loss": 0.4127, "step": 59400 }, { "epoch": 0.58, "grad_norm": 9.969858169555664, "learning_rate": 4.754743960201377e-06, "loss": 0.2931, "step": 59425 }, { "epoch": 0.58, "grad_norm": 15.029800415039062, "learning_rate": 4.754619837747128e-06, "loss": 0.4031, "step": 59450 }, { "epoch": 0.58, "grad_norm": 8.365337371826172, "learning_rate": 4.754495715292879e-06, "loss": 0.2262, "step": 59475 }, { "epoch": 0.59, "grad_norm": 18.87302017211914, "learning_rate": 4.754371592838631e-06, "loss": 0.4339, "step": 59500 }, { "epoch": 0.59, "grad_norm": 15.840072631835938, "learning_rate": 4.7542474703843825e-06, "loss": 0.2515, "step": 59525 }, { "epoch": 0.59, "grad_norm": 6.565250873565674, "learning_rate": 4.754123347930134e-06, "loss": 0.4514, "step": 59550 }, { "epoch": 0.59, "grad_norm": 8.643647193908691, "learning_rate": 4.753999225475886e-06, "loss": 0.242, "step": 59575 }, { "epoch": 0.59, "grad_norm": 19.912622451782227, "learning_rate": 4.753875103021637e-06, "loss": 0.3669, "step": 59600 }, { "epoch": 0.59, "grad_norm": 6.496582508087158, "learning_rate": 4.753750980567389e-06, "loss": 0.231, "step": 59625 }, { "epoch": 0.59, "grad_norm": Infinity, "learning_rate": 4.75363182301131e-06, "loss": 0.4176, "step": 59650 }, { "epoch": 0.59, "grad_norm": 5.555845260620117, "learning_rate": 4.753507700557062e-06, "loss": 0.248, "step": 59675 }, { "epoch": 0.59, "grad_norm": 15.803118705749512, "learning_rate": 4.753383578102813e-06, "loss": 0.4166, "step": 59700 }, { "epoch": 0.59, "grad_norm": 10.99365234375, "learning_rate": 4.753259455648565e-06, "loss": 0.2199, "step": 59725 }, { "epoch": 0.59, "grad_norm": 15.329298973083496, "learning_rate": 4.753135333194317e-06, "loss": 0.4295, "step": 59750 }, { "epoch": 0.59, "grad_norm": 8.291814804077148, "learning_rate": 4.753011210740069e-06, "loss": 0.2122, "step": 59775 }, { "epoch": 0.59, "grad_norm": 16.055692672729492, "learning_rate": 4.75288708828582e-06, "loss": 0.4065, "step": 59800 }, { "epoch": 0.59, "grad_norm": 12.034446716308594, "learning_rate": 4.752762965831571e-06, "loss": 0.2539, "step": 59825 }, { "epoch": 0.59, "grad_norm": 19.528409957885742, "learning_rate": 4.752638843377323e-06, "loss": 0.4261, "step": 59850 }, { "epoch": 0.59, "grad_norm": 8.597729682922363, "learning_rate": 4.752514720923074e-06, "loss": 0.2461, "step": 59875 }, { "epoch": 0.59, "grad_norm": 16.920246124267578, "learning_rate": 4.7523905984688255e-06, "loss": 0.3473, "step": 59900 }, { "epoch": 0.59, "grad_norm": 5.5452799797058105, "learning_rate": 4.752266476014578e-06, "loss": 0.2183, "step": 59925 }, { "epoch": 0.59, "grad_norm": 13.419168472290039, "learning_rate": 4.752142353560329e-06, "loss": 0.39, "step": 59950 }, { "epoch": 0.59, "grad_norm": 7.854682445526123, "learning_rate": 4.75201823110608e-06, "loss": 0.2293, "step": 59975 }, { "epoch": 0.59, "grad_norm": 19.729812622070312, "learning_rate": 4.751894108651832e-06, "loss": 0.3853, "step": 60000 }, { "epoch": 0.59, "eval_loss": 0.5099149942398071, "eval_runtime": 6022.5684, "eval_samples_per_second": 1.572, "eval_steps_per_second": 0.197, "eval_wer": 0.14704178201248955, "step": 60000 }, { "epoch": 0.59, "grad_norm": 8.450888633728027, "learning_rate": 4.751769986197583e-06, "loss": 0.2174, "step": 60025 }, { "epoch": 0.59, "grad_norm": 16.7613468170166, "learning_rate": 4.7516458637433345e-06, "loss": 0.3568, "step": 60050 }, { "epoch": 0.59, "grad_norm": 4.2653045654296875, "learning_rate": 4.7515217412890866e-06, "loss": 0.223, "step": 60075 }, { "epoch": 0.59, "grad_norm": 19.253992080688477, "learning_rate": 4.751397618834838e-06, "loss": 0.3964, "step": 60100 }, { "epoch": 0.59, "grad_norm": 7.707286357879639, "learning_rate": 4.75127349638059e-06, "loss": 0.235, "step": 60125 }, { "epoch": 0.59, "grad_norm": 10.833660125732422, "learning_rate": 4.751149373926341e-06, "loss": 0.4548, "step": 60150 }, { "epoch": 0.59, "grad_norm": 9.409354209899902, "learning_rate": 4.751025251472093e-06, "loss": 0.2351, "step": 60175 }, { "epoch": 0.59, "grad_norm": 18.855295181274414, "learning_rate": 4.750901129017844e-06, "loss": 0.3985, "step": 60200 }, { "epoch": 0.59, "grad_norm": 11.701505661010742, "learning_rate": 4.7507770065635955e-06, "loss": 0.2398, "step": 60225 }, { "epoch": 0.59, "grad_norm": 22.699214935302734, "learning_rate": 4.750652884109348e-06, "loss": 0.4283, "step": 60250 }, { "epoch": 0.59, "grad_norm": 2.6902878284454346, "learning_rate": 4.750528761655099e-06, "loss": 0.2114, "step": 60275 }, { "epoch": 0.59, "grad_norm": 16.352291107177734, "learning_rate": 4.75040463920085e-06, "loss": 0.351, "step": 60300 }, { "epoch": 0.59, "grad_norm": 9.42310905456543, "learning_rate": 4.750280516746602e-06, "loss": 0.2558, "step": 60325 }, { "epoch": 0.59, "grad_norm": 17.833385467529297, "learning_rate": 4.750156394292353e-06, "loss": 0.4204, "step": 60350 }, { "epoch": 0.59, "grad_norm": 14.68553638458252, "learning_rate": 4.7500322718381045e-06, "loss": 0.2229, "step": 60375 }, { "epoch": 0.59, "grad_norm": 21.687761306762695, "learning_rate": 4.7499081493838565e-06, "loss": 0.5131, "step": 60400 }, { "epoch": 0.59, "grad_norm": 7.890764236450195, "learning_rate": 4.749784026929608e-06, "loss": 0.2414, "step": 60425 }, { "epoch": 0.59, "grad_norm": 14.65392017364502, "learning_rate": 4.74965990447536e-06, "loss": 0.4, "step": 60450 }, { "epoch": 0.59, "grad_norm": 18.142131805419922, "learning_rate": 4.749535782021111e-06, "loss": 0.2048, "step": 60475 }, { "epoch": 0.59, "grad_norm": 19.403596878051758, "learning_rate": 4.749411659566863e-06, "loss": 0.3169, "step": 60500 }, { "epoch": 0.6, "grad_norm": 12.017696380615234, "learning_rate": 4.749287537112614e-06, "loss": 0.2265, "step": 60525 }, { "epoch": 0.6, "grad_norm": 16.0651798248291, "learning_rate": 4.749163414658366e-06, "loss": 0.418, "step": 60550 }, { "epoch": 0.6, "grad_norm": 4.529430866241455, "learning_rate": 4.7490392922041176e-06, "loss": 0.2172, "step": 60575 }, { "epoch": 0.6, "grad_norm": 25.226594924926758, "learning_rate": 4.748915169749869e-06, "loss": 0.4285, "step": 60600 }, { "epoch": 0.6, "grad_norm": 11.142004013061523, "learning_rate": 4.748791047295621e-06, "loss": 0.2228, "step": 60625 }, { "epoch": 0.6, "grad_norm": 15.874055862426758, "learning_rate": 4.748666924841372e-06, "loss": 0.3882, "step": 60650 }, { "epoch": 0.6, "grad_norm": 10.669600486755371, "learning_rate": 4.748542802387123e-06, "loss": 0.2209, "step": 60675 }, { "epoch": 0.6, "grad_norm": 19.056116104125977, "learning_rate": 4.748418679932875e-06, "loss": 0.4557, "step": 60700 }, { "epoch": 0.6, "grad_norm": 5.024077892303467, "learning_rate": 4.7482945574786265e-06, "loss": 0.2135, "step": 60725 }, { "epoch": 0.6, "grad_norm": 18.10993766784668, "learning_rate": 4.748170435024378e-06, "loss": 0.4409, "step": 60750 }, { "epoch": 0.6, "grad_norm": 11.87138843536377, "learning_rate": 4.74804631257013e-06, "loss": 0.2289, "step": 60775 }, { "epoch": 0.6, "grad_norm": 9.538829803466797, "learning_rate": 4.747922190115881e-06, "loss": 0.4125, "step": 60800 }, { "epoch": 0.6, "grad_norm": 7.776928901672363, "learning_rate": 4.747798067661632e-06, "loss": 0.2386, "step": 60825 }, { "epoch": 0.6, "grad_norm": 21.847745895385742, "learning_rate": 4.747673945207384e-06, "loss": 0.416, "step": 60850 }, { "epoch": 0.6, "grad_norm": 10.52847671508789, "learning_rate": 4.7475498227531355e-06, "loss": 0.2111, "step": 60875 }, { "epoch": 0.6, "grad_norm": 20.442153930664062, "learning_rate": 4.7474257002988876e-06, "loss": 0.3825, "step": 60900 }, { "epoch": 0.6, "grad_norm": 13.834134101867676, "learning_rate": 4.747301577844639e-06, "loss": 0.2686, "step": 60925 }, { "epoch": 0.6, "grad_norm": 19.88157844543457, "learning_rate": 4.747177455390391e-06, "loss": 0.4373, "step": 60950 }, { "epoch": 0.6, "grad_norm": 3.358118772506714, "learning_rate": 4.747053332936142e-06, "loss": 0.2128, "step": 60975 }, { "epoch": 0.6, "grad_norm": 10.877924919128418, "learning_rate": 4.746929210481893e-06, "loss": 0.4697, "step": 61000 }, { "epoch": 0.6, "grad_norm": 4.114137172698975, "learning_rate": 4.746805088027645e-06, "loss": 0.2216, "step": 61025 }, { "epoch": 0.6, "grad_norm": 17.517654418945312, "learning_rate": 4.7466809655733965e-06, "loss": 0.3649, "step": 61050 }, { "epoch": 0.6, "grad_norm": 8.1101655960083, "learning_rate": 4.746556843119148e-06, "loss": 0.2198, "step": 61075 }, { "epoch": 0.6, "grad_norm": 14.21091079711914, "learning_rate": 4.7464327206649e-06, "loss": 0.4151, "step": 61100 }, { "epoch": 0.6, "grad_norm": 11.29476547241211, "learning_rate": 4.746308598210651e-06, "loss": 0.223, "step": 61125 }, { "epoch": 0.6, "grad_norm": 14.991753578186035, "learning_rate": 4.746184475756402e-06, "loss": 0.329, "step": 61150 }, { "epoch": 0.6, "grad_norm": 13.671906471252441, "learning_rate": 4.746060353302154e-06, "loss": 0.2407, "step": 61175 }, { "epoch": 0.6, "grad_norm": 14.63432502746582, "learning_rate": 4.7459362308479055e-06, "loss": 0.3484, "step": 61200 }, { "epoch": 0.6, "grad_norm": 9.416864395141602, "learning_rate": 4.745812108393657e-06, "loss": 0.2435, "step": 61225 }, { "epoch": 0.6, "grad_norm": 19.343738555908203, "learning_rate": 4.745687985939409e-06, "loss": 0.4321, "step": 61250 }, { "epoch": 0.6, "grad_norm": 7.9871344566345215, "learning_rate": 4.74556386348516e-06, "loss": 0.21, "step": 61275 }, { "epoch": 0.6, "grad_norm": 8.679779052734375, "learning_rate": 4.745439741030912e-06, "loss": 0.3758, "step": 61300 }, { "epoch": 0.6, "grad_norm": 6.245368003845215, "learning_rate": 4.745315618576663e-06, "loss": 0.2176, "step": 61325 }, { "epoch": 0.6, "grad_norm": 23.603906631469727, "learning_rate": 4.745191496122415e-06, "loss": 0.3522, "step": 61350 }, { "epoch": 0.6, "grad_norm": 12.17825698852539, "learning_rate": 4.7450673736681665e-06, "loss": 0.2314, "step": 61375 }, { "epoch": 0.6, "grad_norm": 14.028321266174316, "learning_rate": 4.7449432512139186e-06, "loss": 0.3913, "step": 61400 }, { "epoch": 0.6, "grad_norm": 6.760195255279541, "learning_rate": 4.74481912875967e-06, "loss": 0.2573, "step": 61425 }, { "epoch": 0.6, "grad_norm": 15.779847145080566, "learning_rate": 4.744695006305421e-06, "loss": 0.3508, "step": 61450 }, { "epoch": 0.6, "grad_norm": 13.058972358703613, "learning_rate": 4.744570883851173e-06, "loss": 0.2379, "step": 61475 }, { "epoch": 0.6, "grad_norm": 19.28507423400879, "learning_rate": 4.744446761396924e-06, "loss": 0.4468, "step": 61500 }, { "epoch": 0.6, "grad_norm": 13.975452423095703, "learning_rate": 4.7443226389426755e-06, "loss": 0.247, "step": 61525 }, { "epoch": 0.61, "grad_norm": 17.676244735717773, "learning_rate": 4.7441985164884275e-06, "loss": 0.4183, "step": 61550 }, { "epoch": 0.61, "grad_norm": 5.727464199066162, "learning_rate": 4.744074394034179e-06, "loss": 0.2524, "step": 61575 }, { "epoch": 0.61, "grad_norm": 11.734810829162598, "learning_rate": 4.74395027157993e-06, "loss": 0.4009, "step": 61600 }, { "epoch": 0.61, "grad_norm": 11.527219772338867, "learning_rate": 4.743826149125682e-06, "loss": 0.2315, "step": 61625 }, { "epoch": 0.61, "grad_norm": 8.759135246276855, "learning_rate": 4.743702026671433e-06, "loss": 0.365, "step": 61650 }, { "epoch": 0.61, "grad_norm": 12.060635566711426, "learning_rate": 4.7435779042171844e-06, "loss": 0.2044, "step": 61675 }, { "epoch": 0.61, "grad_norm": 17.987184524536133, "learning_rate": 4.743458746661106e-06, "loss": 0.4024, "step": 61700 }, { "epoch": 0.61, "grad_norm": 12.594635009765625, "learning_rate": 4.7433346242068575e-06, "loss": 0.1884, "step": 61725 }, { "epoch": 0.61, "grad_norm": 17.935165405273438, "learning_rate": 4.7432105017526096e-06, "loss": 0.4427, "step": 61750 }, { "epoch": 0.61, "grad_norm": 8.7632417678833, "learning_rate": 4.743086379298361e-06, "loss": 0.2478, "step": 61775 }, { "epoch": 0.61, "grad_norm": 25.52245330810547, "learning_rate": 4.742962256844113e-06, "loss": 0.4256, "step": 61800 }, { "epoch": 0.61, "grad_norm": 10.259181022644043, "learning_rate": 4.742838134389864e-06, "loss": 0.2015, "step": 61825 }, { "epoch": 0.61, "grad_norm": 15.034119606018066, "learning_rate": 4.742714011935616e-06, "loss": 0.4562, "step": 61850 }, { "epoch": 0.61, "grad_norm": 7.239771842956543, "learning_rate": 4.742589889481367e-06, "loss": 0.2024, "step": 61875 }, { "epoch": 0.61, "grad_norm": 10.166732788085938, "learning_rate": 4.7424657670271185e-06, "loss": 0.4068, "step": 61900 }, { "epoch": 0.61, "grad_norm": 2.5739102363586426, "learning_rate": 4.742341644572871e-06, "loss": 0.1942, "step": 61925 }, { "epoch": 0.61, "grad_norm": 20.10049819946289, "learning_rate": 4.742217522118622e-06, "loss": 0.3911, "step": 61950 }, { "epoch": 0.61, "grad_norm": 12.948963165283203, "learning_rate": 4.742093399664373e-06, "loss": 0.2084, "step": 61975 }, { "epoch": 0.61, "grad_norm": 13.82882308959961, "learning_rate": 4.741969277210125e-06, "loss": 0.3925, "step": 62000 }, { "epoch": 0.61, "grad_norm": 9.443575859069824, "learning_rate": 4.741845154755876e-06, "loss": 0.2472, "step": 62025 }, { "epoch": 0.61, "grad_norm": 19.649211883544922, "learning_rate": 4.7417210323016275e-06, "loss": 0.4253, "step": 62050 }, { "epoch": 0.61, "grad_norm": 7.988130569458008, "learning_rate": 4.7415969098473796e-06, "loss": 0.2033, "step": 62075 }, { "epoch": 0.61, "grad_norm": 20.57453155517578, "learning_rate": 4.741472787393131e-06, "loss": 0.3919, "step": 62100 }, { "epoch": 0.61, "grad_norm": 7.784149169921875, "learning_rate": 4.741348664938882e-06, "loss": 0.2138, "step": 62125 }, { "epoch": 0.61, "grad_norm": 12.746438026428223, "learning_rate": 4.741224542484634e-06, "loss": 0.3385, "step": 62150 }, { "epoch": 0.61, "grad_norm": 6.383761405944824, "learning_rate": 4.741100420030385e-06, "loss": 0.2309, "step": 62175 }, { "epoch": 0.61, "grad_norm": 31.250680923461914, "learning_rate": 4.740976297576137e-06, "loss": 0.4235, "step": 62200 }, { "epoch": 0.61, "grad_norm": 13.45019245147705, "learning_rate": 4.7408521751218885e-06, "loss": 0.2317, "step": 62225 }, { "epoch": 0.61, "grad_norm": 18.697771072387695, "learning_rate": 4.7407280526676406e-06, "loss": 0.4084, "step": 62250 }, { "epoch": 0.61, "grad_norm": 9.75770378112793, "learning_rate": 4.740603930213392e-06, "loss": 0.2577, "step": 62275 }, { "epoch": 0.61, "grad_norm": 12.956944465637207, "learning_rate": 4.740479807759144e-06, "loss": 0.4242, "step": 62300 }, { "epoch": 0.61, "grad_norm": 8.335428237915039, "learning_rate": 4.740355685304895e-06, "loss": 0.2368, "step": 62325 }, { "epoch": 0.61, "grad_norm": 18.885648727416992, "learning_rate": 4.740231562850646e-06, "loss": 0.4503, "step": 62350 }, { "epoch": 0.61, "grad_norm": 11.214428901672363, "learning_rate": 4.7401074403963975e-06, "loss": 0.1932, "step": 62375 }, { "epoch": 0.61, "grad_norm": 20.468555450439453, "learning_rate": 4.7399833179421495e-06, "loss": 0.4101, "step": 62400 }, { "epoch": 0.61, "grad_norm": 8.27176570892334, "learning_rate": 4.739859195487901e-06, "loss": 0.1859, "step": 62425 }, { "epoch": 0.61, "grad_norm": 21.655174255371094, "learning_rate": 4.739735073033652e-06, "loss": 0.3722, "step": 62450 }, { "epoch": 0.61, "grad_norm": 12.049479484558105, "learning_rate": 4.739610950579404e-06, "loss": 0.2161, "step": 62475 }, { "epoch": 0.61, "grad_norm": 12.217530250549316, "learning_rate": 4.739486828125155e-06, "loss": 0.3785, "step": 62500 }, { "epoch": 0.61, "grad_norm": 9.831656455993652, "learning_rate": 4.7393627056709064e-06, "loss": 0.2809, "step": 62525 }, { "epoch": 0.62, "grad_norm": 16.561594009399414, "learning_rate": 4.7392385832166585e-06, "loss": 0.4303, "step": 62550 }, { "epoch": 0.62, "grad_norm": 7.976644992828369, "learning_rate": 4.73911446076241e-06, "loss": 0.2233, "step": 62575 }, { "epoch": 0.62, "grad_norm": 16.563451766967773, "learning_rate": 4.738990338308162e-06, "loss": 0.4045, "step": 62600 }, { "epoch": 0.62, "grad_norm": 9.113304138183594, "learning_rate": 4.738866215853913e-06, "loss": 0.1661, "step": 62625 }, { "epoch": 0.62, "grad_norm": 12.346843719482422, "learning_rate": 4.738742093399665e-06, "loss": 0.4187, "step": 62650 }, { "epoch": 0.62, "grad_norm": 11.524795532226562, "learning_rate": 4.738617970945416e-06, "loss": 0.2391, "step": 62675 }, { "epoch": 0.62, "grad_norm": 15.427268981933594, "learning_rate": 4.738493848491168e-06, "loss": 0.4439, "step": 62700 }, { "epoch": 0.62, "grad_norm": 10.352045059204102, "learning_rate": 4.7383697260369195e-06, "loss": 0.254, "step": 62725 }, { "epoch": 0.62, "grad_norm": 19.197927474975586, "learning_rate": 4.738245603582671e-06, "loss": 0.4648, "step": 62750 }, { "epoch": 0.62, "grad_norm": 10.4898099899292, "learning_rate": 4.738121481128423e-06, "loss": 0.2484, "step": 62775 }, { "epoch": 0.62, "grad_norm": 27.94470977783203, "learning_rate": 4.737997358674174e-06, "loss": 0.4722, "step": 62800 }, { "epoch": 0.62, "grad_norm": 7.039337158203125, "learning_rate": 4.737873236219925e-06, "loss": 0.2184, "step": 62825 }, { "epoch": 0.62, "grad_norm": 16.506242752075195, "learning_rate": 4.737749113765677e-06, "loss": 0.4201, "step": 62850 }, { "epoch": 0.62, "grad_norm": 12.264220237731934, "learning_rate": 4.7376249913114285e-06, "loss": 0.2173, "step": 62875 }, { "epoch": 0.62, "grad_norm": 15.822281837463379, "learning_rate": 4.73750086885718e-06, "loss": 0.4086, "step": 62900 }, { "epoch": 0.62, "grad_norm": 12.614900588989258, "learning_rate": 4.737376746402932e-06, "loss": 0.2329, "step": 62925 }, { "epoch": 0.62, "grad_norm": 16.23208236694336, "learning_rate": 4.737252623948683e-06, "loss": 0.3694, "step": 62950 }, { "epoch": 0.62, "grad_norm": 13.226645469665527, "learning_rate": 4.737128501494434e-06, "loss": 0.2439, "step": 62975 }, { "epoch": 0.62, "grad_norm": 10.705899238586426, "learning_rate": 4.737004379040186e-06, "loss": 0.4142, "step": 63000 }, { "epoch": 0.62, "grad_norm": 10.933423042297363, "learning_rate": 4.7368802565859374e-06, "loss": 0.2538, "step": 63025 }, { "epoch": 0.62, "grad_norm": 21.15595817565918, "learning_rate": 4.7367561341316895e-06, "loss": 0.3726, "step": 63050 }, { "epoch": 0.62, "grad_norm": 17.540311813354492, "learning_rate": 4.736632011677441e-06, "loss": 0.2356, "step": 63075 }, { "epoch": 0.62, "grad_norm": 13.70141315460205, "learning_rate": 4.736507889223193e-06, "loss": 0.4145, "step": 63100 }, { "epoch": 0.62, "grad_norm": 10.210349082946777, "learning_rate": 4.736383766768944e-06, "loss": 0.2281, "step": 63125 }, { "epoch": 0.62, "grad_norm": 16.427352905273438, "learning_rate": 4.736259644314696e-06, "loss": 0.353, "step": 63150 }, { "epoch": 0.62, "grad_norm": 9.09450626373291, "learning_rate": 4.736135521860447e-06, "loss": 0.2597, "step": 63175 }, { "epoch": 0.62, "grad_norm": 14.708457946777344, "learning_rate": 4.7360113994061985e-06, "loss": 0.3803, "step": 63200 }, { "epoch": 0.62, "grad_norm": 13.87600040435791, "learning_rate": 4.73588727695195e-06, "loss": 0.2408, "step": 63225 }, { "epoch": 0.62, "grad_norm": 21.35753631591797, "learning_rate": 4.735763154497702e-06, "loss": 0.4135, "step": 63250 }, { "epoch": 0.62, "grad_norm": 13.647618293762207, "learning_rate": 4.735639032043453e-06, "loss": 0.2131, "step": 63275 }, { "epoch": 0.62, "grad_norm": 7.926036834716797, "learning_rate": 4.735514909589204e-06, "loss": 0.3717, "step": 63300 }, { "epoch": 0.62, "grad_norm": 10.950610160827637, "learning_rate": 4.735390787134956e-06, "loss": 0.2246, "step": 63325 }, { "epoch": 0.62, "grad_norm": 15.894444465637207, "learning_rate": 4.7352666646807074e-06, "loss": 0.4067, "step": 63350 }, { "epoch": 0.62, "grad_norm": 12.1261625289917, "learning_rate": 4.7351425422264595e-06, "loss": 0.2577, "step": 63375 }, { "epoch": 0.62, "grad_norm": 8.923748970031738, "learning_rate": 4.735018419772211e-06, "loss": 0.4247, "step": 63400 }, { "epoch": 0.62, "grad_norm": 5.220231056213379, "learning_rate": 4.734894297317963e-06, "loss": 0.2266, "step": 63425 }, { "epoch": 0.62, "grad_norm": 23.220006942749023, "learning_rate": 4.734770174863714e-06, "loss": 0.4023, "step": 63450 }, { "epoch": 0.62, "grad_norm": 11.006407737731934, "learning_rate": 4.734646052409466e-06, "loss": 0.2383, "step": 63475 }, { "epoch": 0.62, "grad_norm": 14.884317398071289, "learning_rate": 4.734521929955217e-06, "loss": 0.4574, "step": 63500 }, { "epoch": 0.62, "grad_norm": 9.63478946685791, "learning_rate": 4.7343978075009685e-06, "loss": 0.261, "step": 63525 }, { "epoch": 0.62, "grad_norm": 20.947895050048828, "learning_rate": 4.7342736850467205e-06, "loss": 0.3985, "step": 63550 }, { "epoch": 0.63, "grad_norm": 6.971557140350342, "learning_rate": 4.734149562592472e-06, "loss": 0.2047, "step": 63575 }, { "epoch": 0.63, "grad_norm": 27.22764015197754, "learning_rate": 4.734025440138223e-06, "loss": 0.4587, "step": 63600 }, { "epoch": 0.63, "grad_norm": 14.171181678771973, "learning_rate": 4.733901317683975e-06, "loss": 0.2283, "step": 63625 }, { "epoch": 0.63, "grad_norm": 20.837322235107422, "learning_rate": 4.733777195229726e-06, "loss": 0.3774, "step": 63650 }, { "epoch": 0.63, "grad_norm": 9.339509010314941, "learning_rate": 4.733653072775477e-06, "loss": 0.2225, "step": 63675 }, { "epoch": 0.63, "grad_norm": 21.74949073791504, "learning_rate": 4.7335289503212295e-06, "loss": 0.4711, "step": 63700 }, { "epoch": 0.63, "grad_norm": 7.530289649963379, "learning_rate": 4.733404827866981e-06, "loss": 0.2295, "step": 63725 }, { "epoch": 0.63, "grad_norm": 16.18223762512207, "learning_rate": 4.733280705412732e-06, "loss": 0.3825, "step": 63750 }, { "epoch": 0.63, "grad_norm": 12.71172046661377, "learning_rate": 4.733156582958484e-06, "loss": 0.1969, "step": 63775 }, { "epoch": 0.63, "grad_norm": 19.796409606933594, "learning_rate": 4.733032460504235e-06, "loss": 0.4211, "step": 63800 }, { "epoch": 0.63, "grad_norm": 8.786194801330566, "learning_rate": 4.732908338049987e-06, "loss": 0.174, "step": 63825 }, { "epoch": 0.63, "grad_norm": 15.524862289428711, "learning_rate": 4.7327842155957384e-06, "loss": 0.3878, "step": 63850 }, { "epoch": 0.63, "grad_norm": 6.464669227600098, "learning_rate": 4.7326600931414905e-06, "loss": 0.1914, "step": 63875 }, { "epoch": 0.63, "grad_norm": 22.01849365234375, "learning_rate": 4.732535970687242e-06, "loss": 0.4454, "step": 63900 }, { "epoch": 0.63, "grad_norm": 11.297830581665039, "learning_rate": 4.732411848232994e-06, "loss": 0.2167, "step": 63925 }, { "epoch": 0.63, "grad_norm": 14.293859481811523, "learning_rate": 4.732287725778745e-06, "loss": 0.421, "step": 63950 }, { "epoch": 0.63, "grad_norm": 9.38304328918457, "learning_rate": 4.732163603324496e-06, "loss": 0.2668, "step": 63975 }, { "epoch": 0.63, "grad_norm": 17.553539276123047, "learning_rate": 4.732044445768418e-06, "loss": 0.4084, "step": 64000 }, { "epoch": 0.63, "grad_norm": 9.084928512573242, "learning_rate": 4.731920323314169e-06, "loss": 0.2255, "step": 64025 }, { "epoch": 0.63, "grad_norm": 21.213714599609375, "learning_rate": 4.7317962008599205e-06, "loss": 0.4141, "step": 64050 }, { "epoch": 0.63, "grad_norm": 21.229230880737305, "learning_rate": 4.7316720784056725e-06, "loss": 0.2509, "step": 64075 }, { "epoch": 0.63, "grad_norm": 22.762685775756836, "learning_rate": 4.731547955951424e-06, "loss": 0.4282, "step": 64100 }, { "epoch": 0.63, "grad_norm": 13.10695743560791, "learning_rate": 4.731423833497175e-06, "loss": 0.2307, "step": 64125 }, { "epoch": 0.63, "grad_norm": 16.616918563842773, "learning_rate": 4.731299711042927e-06, "loss": 0.3716, "step": 64150 }, { "epoch": 0.63, "grad_norm": 25.303144454956055, "learning_rate": 4.731175588588678e-06, "loss": 0.2864, "step": 64175 }, { "epoch": 0.63, "grad_norm": 24.254356384277344, "learning_rate": 4.7310514661344294e-06, "loss": 0.4284, "step": 64200 }, { "epoch": 0.63, "grad_norm": 6.748021602630615, "learning_rate": 4.7309273436801815e-06, "loss": 0.2194, "step": 64225 }, { "epoch": 0.63, "grad_norm": 18.230243682861328, "learning_rate": 4.730803221225933e-06, "loss": 0.3802, "step": 64250 }, { "epoch": 0.63, "grad_norm": 11.988188743591309, "learning_rate": 4.730679098771685e-06, "loss": 0.2734, "step": 64275 }, { "epoch": 0.63, "grad_norm": 16.488643646240234, "learning_rate": 4.730554976317436e-06, "loss": 0.4027, "step": 64300 }, { "epoch": 0.63, "grad_norm": 6.672736644744873, "learning_rate": 4.730430853863188e-06, "loss": 0.2304, "step": 64325 }, { "epoch": 0.63, "grad_norm": 22.45458221435547, "learning_rate": 4.730306731408939e-06, "loss": 0.436, "step": 64350 }, { "epoch": 0.63, "grad_norm": 11.762018203735352, "learning_rate": 4.730182608954691e-06, "loss": 0.2037, "step": 64375 }, { "epoch": 0.63, "grad_norm": 13.752753257751465, "learning_rate": 4.7300584865004425e-06, "loss": 0.3551, "step": 64400 }, { "epoch": 0.63, "grad_norm": 10.254539489746094, "learning_rate": 4.729934364046194e-06, "loss": 0.2284, "step": 64425 }, { "epoch": 0.63, "grad_norm": 16.310251235961914, "learning_rate": 4.729810241591946e-06, "loss": 0.4067, "step": 64450 }, { "epoch": 0.63, "grad_norm": 3.5240683555603027, "learning_rate": 4.729686119137697e-06, "loss": 0.1934, "step": 64475 }, { "epoch": 0.63, "grad_norm": 13.362171173095703, "learning_rate": 4.729561996683448e-06, "loss": 0.328, "step": 64500 }, { "epoch": 0.63, "grad_norm": 12.663637161254883, "learning_rate": 4.7294378742292e-06, "loss": 0.2459, "step": 64525 }, { "epoch": 0.63, "grad_norm": 27.05335235595703, "learning_rate": 4.7293137517749515e-06, "loss": 0.3957, "step": 64550 }, { "epoch": 0.63, "grad_norm": 25.561025619506836, "learning_rate": 4.729189629320703e-06, "loss": 0.2479, "step": 64575 }, { "epoch": 0.64, "grad_norm": 27.310447692871094, "learning_rate": 4.729065506866455e-06, "loss": 0.4274, "step": 64600 }, { "epoch": 0.64, "grad_norm": 14.390434265136719, "learning_rate": 4.728941384412206e-06, "loss": 0.1986, "step": 64625 }, { "epoch": 0.64, "grad_norm": 12.193324089050293, "learning_rate": 4.728817261957957e-06, "loss": 0.3869, "step": 64650 }, { "epoch": 0.64, "grad_norm": 11.878798484802246, "learning_rate": 4.728693139503709e-06, "loss": 0.2038, "step": 64675 }, { "epoch": 0.64, "grad_norm": 11.487374305725098, "learning_rate": 4.7285690170494605e-06, "loss": 0.3585, "step": 64700 }, { "epoch": 0.64, "grad_norm": 9.37236499786377, "learning_rate": 4.7284448945952125e-06, "loss": 0.2265, "step": 64725 }, { "epoch": 0.64, "grad_norm": 21.982181549072266, "learning_rate": 4.728320772140964e-06, "loss": 0.4042, "step": 64750 }, { "epoch": 0.64, "grad_norm": 9.563532829284668, "learning_rate": 4.728196649686716e-06, "loss": 0.2644, "step": 64775 }, { "epoch": 0.64, "grad_norm": 19.837984085083008, "learning_rate": 4.728072527232467e-06, "loss": 0.3682, "step": 64800 }, { "epoch": 0.64, "grad_norm": 9.284906387329102, "learning_rate": 4.727948404778218e-06, "loss": 0.228, "step": 64825 }, { "epoch": 0.64, "grad_norm": 21.825336456298828, "learning_rate": 4.72782428232397e-06, "loss": 0.4176, "step": 64850 }, { "epoch": 0.64, "grad_norm": 10.911150932312012, "learning_rate": 4.7277001598697215e-06, "loss": 0.1692, "step": 64875 }, { "epoch": 0.64, "grad_norm": 15.62082290649414, "learning_rate": 4.727576037415473e-06, "loss": 0.3733, "step": 64900 }, { "epoch": 0.64, "grad_norm": 8.350494384765625, "learning_rate": 4.727451914961225e-06, "loss": 0.1997, "step": 64925 }, { "epoch": 0.64, "grad_norm": 20.083892822265625, "learning_rate": 4.727327792506976e-06, "loss": 0.3479, "step": 64950 }, { "epoch": 0.64, "grad_norm": 11.318272590637207, "learning_rate": 4.727203670052727e-06, "loss": 0.2344, "step": 64975 }, { "epoch": 0.64, "grad_norm": 20.101411819458008, "learning_rate": 4.727079547598479e-06, "loss": 0.4274, "step": 65000 }, { "epoch": 0.64, "grad_norm": 7.991676330566406, "learning_rate": 4.7269554251442304e-06, "loss": 0.1907, "step": 65025 }, { "epoch": 0.64, "grad_norm": 14.328476905822754, "learning_rate": 4.726831302689982e-06, "loss": 0.4183, "step": 65050 }, { "epoch": 0.64, "grad_norm": 10.220565795898438, "learning_rate": 4.726707180235734e-06, "loss": 0.194, "step": 65075 }, { "epoch": 0.64, "grad_norm": 22.52943992614746, "learning_rate": 4.726583057781485e-06, "loss": 0.4226, "step": 65100 }, { "epoch": 0.64, "grad_norm": 15.909747123718262, "learning_rate": 4.726458935327237e-06, "loss": 0.2384, "step": 65125 }, { "epoch": 0.64, "grad_norm": 16.52733039855957, "learning_rate": 4.726334812872988e-06, "loss": 0.4139, "step": 65150 }, { "epoch": 0.64, "grad_norm": 10.395895957946777, "learning_rate": 4.72621069041874e-06, "loss": 0.2581, "step": 65175 }, { "epoch": 0.64, "grad_norm": 15.346083641052246, "learning_rate": 4.7260865679644915e-06, "loss": 0.3464, "step": 65200 }, { "epoch": 0.64, "grad_norm": 11.076512336730957, "learning_rate": 4.7259624455102435e-06, "loss": 0.2243, "step": 65225 }, { "epoch": 0.64, "grad_norm": 19.980257034301758, "learning_rate": 4.725838323055995e-06, "loss": 0.4709, "step": 65250 }, { "epoch": 0.64, "grad_norm": 8.637491226196289, "learning_rate": 4.725714200601746e-06, "loss": 0.2405, "step": 65275 }, { "epoch": 0.64, "grad_norm": 18.183008193969727, "learning_rate": 4.725590078147498e-06, "loss": 0.3988, "step": 65300 }, { "epoch": 0.64, "grad_norm": 11.614791870117188, "learning_rate": 4.725465955693249e-06, "loss": 0.2562, "step": 65325 }, { "epoch": 0.64, "grad_norm": 20.93361473083496, "learning_rate": 4.725341833239e-06, "loss": 0.3579, "step": 65350 }, { "epoch": 0.64, "grad_norm": 9.504739761352539, "learning_rate": 4.7252177107847525e-06, "loss": 0.2431, "step": 65375 }, { "epoch": 0.64, "grad_norm": 13.03640365600586, "learning_rate": 4.725093588330504e-06, "loss": 0.3808, "step": 65400 }, { "epoch": 0.64, "grad_norm": 15.544629096984863, "learning_rate": 4.724969465876255e-06, "loss": 0.2244, "step": 65425 }, { "epoch": 0.64, "grad_norm": 14.719712257385254, "learning_rate": 4.724845343422007e-06, "loss": 0.3746, "step": 65450 }, { "epoch": 0.64, "grad_norm": 10.667710304260254, "learning_rate": 4.724721220967758e-06, "loss": 0.2051, "step": 65475 }, { "epoch": 0.64, "grad_norm": 12.918051719665527, "learning_rate": 4.724597098513509e-06, "loss": 0.374, "step": 65500 }, { "epoch": 0.64, "grad_norm": 8.34079647064209, "learning_rate": 4.7244729760592614e-06, "loss": 0.2201, "step": 65525 }, { "epoch": 0.64, "grad_norm": 15.583660125732422, "learning_rate": 4.724348853605013e-06, "loss": 0.4053, "step": 65550 }, { "epoch": 0.64, "grad_norm": 12.497974395751953, "learning_rate": 4.724224731150765e-06, "loss": 0.2122, "step": 65575 }, { "epoch": 0.64, "grad_norm": 14.763118743896484, "learning_rate": 4.724100608696516e-06, "loss": 0.3901, "step": 65600 }, { "epoch": 0.65, "grad_norm": 7.329502105712891, "learning_rate": 4.723976486242268e-06, "loss": 0.1741, "step": 65625 }, { "epoch": 0.65, "grad_norm": 18.600753784179688, "learning_rate": 4.723852363788019e-06, "loss": 0.4427, "step": 65650 }, { "epoch": 0.65, "grad_norm": 15.289934158325195, "learning_rate": 4.72372824133377e-06, "loss": 0.2337, "step": 65675 }, { "epoch": 0.65, "grad_norm": 13.137743949890137, "learning_rate": 4.7236041188795225e-06, "loss": 0.4788, "step": 65700 }, { "epoch": 0.65, "grad_norm": 3.6573710441589355, "learning_rate": 4.723479996425274e-06, "loss": 0.2105, "step": 65725 }, { "epoch": 0.65, "grad_norm": 15.215520858764648, "learning_rate": 4.723355873971025e-06, "loss": 0.3741, "step": 65750 }, { "epoch": 0.65, "grad_norm": 2.3655667304992676, "learning_rate": 4.723231751516777e-06, "loss": 0.1836, "step": 65775 }, { "epoch": 0.65, "grad_norm": 22.287811279296875, "learning_rate": 4.723107629062528e-06, "loss": 0.3919, "step": 65800 }, { "epoch": 0.65, "grad_norm": 8.480646133422852, "learning_rate": 4.722983506608279e-06, "loss": 0.2623, "step": 65825 }, { "epoch": 0.65, "grad_norm": 16.592578887939453, "learning_rate": 4.7228593841540314e-06, "loss": 0.3537, "step": 65850 }, { "epoch": 0.65, "grad_norm": 6.793530464172363, "learning_rate": 4.722735261699783e-06, "loss": 0.2323, "step": 65875 }, { "epoch": 0.65, "grad_norm": 17.232210159301758, "learning_rate": 4.722611139245534e-06, "loss": 0.3697, "step": 65900 }, { "epoch": 0.65, "grad_norm": 11.282862663269043, "learning_rate": 4.722487016791286e-06, "loss": 0.2211, "step": 65925 }, { "epoch": 0.65, "grad_norm": 19.496105194091797, "learning_rate": 4.722362894337037e-06, "loss": 0.4453, "step": 65950 }, { "epoch": 0.65, "grad_norm": 15.893335342407227, "learning_rate": 4.722238771882789e-06, "loss": 0.2466, "step": 65975 }, { "epoch": 0.65, "grad_norm": 14.15458869934082, "learning_rate": 4.72211464942854e-06, "loss": 0.3527, "step": 66000 }, { "epoch": 0.65, "grad_norm": 13.222451210021973, "learning_rate": 4.7219905269742924e-06, "loss": 0.2614, "step": 66025 }, { "epoch": 0.65, "grad_norm": Infinity, "learning_rate": 4.7218713694182135e-06, "loss": 0.4285, "step": 66050 }, { "epoch": 0.65, "grad_norm": 20.375831604003906, "learning_rate": 4.7217472469639655e-06, "loss": 0.2759, "step": 66075 }, { "epoch": 0.65, "grad_norm": 28.932144165039062, "learning_rate": 4.721623124509717e-06, "loss": 0.4955, "step": 66100 }, { "epoch": 0.65, "grad_norm": 4.3714518547058105, "learning_rate": 4.721499002055468e-06, "loss": 0.2074, "step": 66125 }, { "epoch": 0.65, "grad_norm": 10.20468807220459, "learning_rate": 4.72137487960122e-06, "loss": 0.38, "step": 66150 }, { "epoch": 0.65, "grad_norm": 10.25092887878418, "learning_rate": 4.721250757146971e-06, "loss": 0.2086, "step": 66175 }, { "epoch": 0.65, "grad_norm": 17.473779678344727, "learning_rate": 4.7211266346927224e-06, "loss": 0.4054, "step": 66200 }, { "epoch": 0.65, "grad_norm": 13.803604125976562, "learning_rate": 4.7210025122384745e-06, "loss": 0.2281, "step": 66225 }, { "epoch": 0.65, "grad_norm": 19.210966110229492, "learning_rate": 4.720878389784226e-06, "loss": 0.4481, "step": 66250 }, { "epoch": 0.65, "grad_norm": 7.180522441864014, "learning_rate": 4.720754267329977e-06, "loss": 0.2414, "step": 66275 }, { "epoch": 0.65, "grad_norm": 17.969614028930664, "learning_rate": 4.720630144875729e-06, "loss": 0.3664, "step": 66300 }, { "epoch": 0.65, "grad_norm": 6.765834331512451, "learning_rate": 4.72050602242148e-06, "loss": 0.2552, "step": 66325 }, { "epoch": 0.65, "grad_norm": 20.59902000427246, "learning_rate": 4.720381899967232e-06, "loss": 0.3715, "step": 66350 }, { "epoch": 0.65, "grad_norm": 5.817336082458496, "learning_rate": 4.7202577775129835e-06, "loss": 0.2181, "step": 66375 }, { "epoch": 0.65, "grad_norm": 19.810468673706055, "learning_rate": 4.7201336550587355e-06, "loss": 0.3617, "step": 66400 }, { "epoch": 0.65, "grad_norm": 9.822418212890625, "learning_rate": 4.720009532604487e-06, "loss": 0.2415, "step": 66425 }, { "epoch": 0.65, "grad_norm": 13.523073196411133, "learning_rate": 4.719885410150239e-06, "loss": 0.4181, "step": 66450 }, { "epoch": 0.65, "grad_norm": 14.232303619384766, "learning_rate": 4.71976128769599e-06, "loss": 0.2273, "step": 66475 }, { "epoch": 0.65, "grad_norm": 14.877684593200684, "learning_rate": 4.719637165241741e-06, "loss": 0.3736, "step": 66500 }, { "epoch": 0.65, "grad_norm": 6.144410610198975, "learning_rate": 4.719513042787493e-06, "loss": 0.2151, "step": 66525 }, { "epoch": 0.65, "grad_norm": 24.129409790039062, "learning_rate": 4.7193889203332445e-06, "loss": 0.3954, "step": 66550 }, { "epoch": 0.65, "grad_norm": 11.187809944152832, "learning_rate": 4.719264797878996e-06, "loss": 0.2625, "step": 66575 }, { "epoch": 0.65, "grad_norm": 18.523632049560547, "learning_rate": 4.719140675424748e-06, "loss": 0.4307, "step": 66600 }, { "epoch": 0.66, "grad_norm": 9.091630935668945, "learning_rate": 4.719016552970499e-06, "loss": 0.2289, "step": 66625 }, { "epoch": 0.66, "grad_norm": 17.782310485839844, "learning_rate": 4.71889243051625e-06, "loss": 0.3983, "step": 66650 }, { "epoch": 0.66, "grad_norm": 13.349950790405273, "learning_rate": 4.718768308062002e-06, "loss": 0.2528, "step": 66675 }, { "epoch": 0.66, "grad_norm": 30.470048904418945, "learning_rate": 4.7186441856077534e-06, "loss": 0.4283, "step": 66700 }, { "epoch": 0.66, "grad_norm": 16.2064266204834, "learning_rate": 4.718520063153505e-06, "loss": 0.2237, "step": 66725 }, { "epoch": 0.66, "grad_norm": 20.09068489074707, "learning_rate": 4.718395940699257e-06, "loss": 0.3881, "step": 66750 }, { "epoch": 0.66, "grad_norm": 8.97260570526123, "learning_rate": 4.718271818245008e-06, "loss": 0.1905, "step": 66775 }, { "epoch": 0.66, "grad_norm": 15.389350891113281, "learning_rate": 4.71814769579076e-06, "loss": 0.3434, "step": 66800 }, { "epoch": 0.66, "grad_norm": 7.770060062408447, "learning_rate": 4.718023573336511e-06, "loss": 0.2909, "step": 66825 }, { "epoch": 0.66, "grad_norm": 14.149503707885742, "learning_rate": 4.717899450882263e-06, "loss": 0.3789, "step": 66850 }, { "epoch": 0.66, "grad_norm": 16.625938415527344, "learning_rate": 4.7177753284280145e-06, "loss": 0.2032, "step": 66875 }, { "epoch": 0.66, "grad_norm": 20.277790069580078, "learning_rate": 4.7176512059737665e-06, "loss": 0.3593, "step": 66900 }, { "epoch": 0.66, "grad_norm": 10.685933113098145, "learning_rate": 4.717527083519518e-06, "loss": 0.2206, "step": 66925 }, { "epoch": 0.66, "grad_norm": 22.425512313842773, "learning_rate": 4.717402961065269e-06, "loss": 0.3793, "step": 66950 }, { "epoch": 0.66, "grad_norm": 10.033339500427246, "learning_rate": 4.71727883861102e-06, "loss": 0.2819, "step": 66975 }, { "epoch": 0.66, "grad_norm": 19.911314010620117, "learning_rate": 4.717154716156772e-06, "loss": 0.3569, "step": 67000 }, { "epoch": 0.66, "grad_norm": 5.968227863311768, "learning_rate": 4.7170305937025234e-06, "loss": 0.25, "step": 67025 }, { "epoch": 0.66, "grad_norm": 17.87217903137207, "learning_rate": 4.716906471248275e-06, "loss": 0.408, "step": 67050 }, { "epoch": 0.66, "grad_norm": 14.927989959716797, "learning_rate": 4.716782348794027e-06, "loss": 0.2333, "step": 67075 }, { "epoch": 0.66, "grad_norm": 23.926294326782227, "learning_rate": 4.716658226339778e-06, "loss": 0.4529, "step": 67100 }, { "epoch": 0.66, "grad_norm": 13.07618236541748, "learning_rate": 4.716534103885529e-06, "loss": 0.2329, "step": 67125 }, { "epoch": 0.66, "grad_norm": 12.683573722839355, "learning_rate": 4.716409981431281e-06, "loss": 0.3945, "step": 67150 }, { "epoch": 0.66, "grad_norm": 8.433199882507324, "learning_rate": 4.716285858977032e-06, "loss": 0.3001, "step": 67175 }, { "epoch": 0.66, "grad_norm": 25.907398223876953, "learning_rate": 4.7161617365227845e-06, "loss": 0.3864, "step": 67200 }, { "epoch": 0.66, "grad_norm": 11.67505168914795, "learning_rate": 4.716037614068536e-06, "loss": 0.2295, "step": 67225 }, { "epoch": 0.66, "grad_norm": 11.989913940429688, "learning_rate": 4.715913491614288e-06, "loss": 0.4199, "step": 67250 }, { "epoch": 0.66, "grad_norm": 9.961359024047852, "learning_rate": 4.715789369160039e-06, "loss": 0.2213, "step": 67275 }, { "epoch": 0.66, "grad_norm": 14.132173538208008, "learning_rate": 4.715665246705791e-06, "loss": 0.3193, "step": 67300 }, { "epoch": 0.66, "grad_norm": 5.171988010406494, "learning_rate": 4.715541124251542e-06, "loss": 0.2135, "step": 67325 }, { "epoch": 0.66, "grad_norm": 14.202604293823242, "learning_rate": 4.715417001797293e-06, "loss": 0.4103, "step": 67350 }, { "epoch": 0.66, "grad_norm": 7.8439202308654785, "learning_rate": 4.7152928793430455e-06, "loss": 0.2549, "step": 67375 }, { "epoch": 0.66, "grad_norm": 15.273367881774902, "learning_rate": 4.715168756888797e-06, "loss": 0.4283, "step": 67400 }, { "epoch": 0.66, "grad_norm": 14.205330848693848, "learning_rate": 4.715044634434548e-06, "loss": 0.2311, "step": 67425 }, { "epoch": 0.66, "grad_norm": 11.15294075012207, "learning_rate": 4.7149205119803e-06, "loss": 0.3327, "step": 67450 }, { "epoch": 0.66, "grad_norm": 11.5526704788208, "learning_rate": 4.714796389526051e-06, "loss": 0.2043, "step": 67475 }, { "epoch": 0.66, "grad_norm": 16.330995559692383, "learning_rate": 4.714672267071802e-06, "loss": 0.4176, "step": 67500 }, { "epoch": 0.66, "grad_norm": 9.515365600585938, "learning_rate": 4.7145481446175544e-06, "loss": 0.2251, "step": 67525 }, { "epoch": 0.66, "grad_norm": 16.557653427124023, "learning_rate": 4.714424022163306e-06, "loss": 0.3914, "step": 67550 }, { "epoch": 0.66, "grad_norm": 5.276176929473877, "learning_rate": 4.714299899709057e-06, "loss": 0.196, "step": 67575 }, { "epoch": 0.66, "grad_norm": 16.219823837280273, "learning_rate": 4.714175777254809e-06, "loss": 0.4178, "step": 67600 }, { "epoch": 0.66, "grad_norm": 8.286964416503906, "learning_rate": 4.71405165480056e-06, "loss": 0.2391, "step": 67625 }, { "epoch": 0.67, "grad_norm": 18.656814575195312, "learning_rate": 4.713927532346312e-06, "loss": 0.3706, "step": 67650 }, { "epoch": 0.67, "grad_norm": 13.105464935302734, "learning_rate": 4.713803409892063e-06, "loss": 0.2043, "step": 67675 }, { "epoch": 0.67, "grad_norm": 19.911874771118164, "learning_rate": 4.7136792874378155e-06, "loss": 0.451, "step": 67700 }, { "epoch": 0.67, "grad_norm": 12.39262866973877, "learning_rate": 4.713555164983567e-06, "loss": 0.2503, "step": 67725 }, { "epoch": 0.67, "grad_norm": 16.68259620666504, "learning_rate": 4.713431042529319e-06, "loss": 0.4842, "step": 67750 }, { "epoch": 0.67, "grad_norm": 11.40378189086914, "learning_rate": 4.71330692007507e-06, "loss": 0.2359, "step": 67775 }, { "epoch": 0.67, "grad_norm": 11.029264450073242, "learning_rate": 4.713182797620821e-06, "loss": 0.3724, "step": 67800 }, { "epoch": 0.67, "grad_norm": 13.420011520385742, "learning_rate": 4.713058675166572e-06, "loss": 0.2499, "step": 67825 }, { "epoch": 0.67, "grad_norm": 21.487913131713867, "learning_rate": 4.712934552712324e-06, "loss": 0.4273, "step": 67850 }, { "epoch": 0.67, "grad_norm": 9.270196914672852, "learning_rate": 4.712810430258076e-06, "loss": 0.2251, "step": 67875 }, { "epoch": 0.67, "grad_norm": 18.473806381225586, "learning_rate": 4.712686307803827e-06, "loss": 0.4018, "step": 67900 }, { "epoch": 0.67, "grad_norm": 6.906260967254639, "learning_rate": 4.712562185349579e-06, "loss": 0.2112, "step": 67925 }, { "epoch": 0.67, "grad_norm": 26.895177841186523, "learning_rate": 4.71243806289533e-06, "loss": 0.4033, "step": 67950 }, { "epoch": 0.67, "grad_norm": 15.042343139648438, "learning_rate": 4.712313940441081e-06, "loss": 0.2408, "step": 67975 }, { "epoch": 0.67, "grad_norm": 15.59880256652832, "learning_rate": 4.712189817986833e-06, "loss": 0.417, "step": 68000 }, { "epoch": 0.67, "grad_norm": 10.478867530822754, "learning_rate": 4.712065695532585e-06, "loss": 0.2524, "step": 68025 }, { "epoch": 0.67, "grad_norm": 19.06991195678711, "learning_rate": 4.711941573078337e-06, "loss": 0.4059, "step": 68050 }, { "epoch": 0.67, "grad_norm": 6.359004974365234, "learning_rate": 4.711817450624088e-06, "loss": 0.2516, "step": 68075 }, { "epoch": 0.67, "grad_norm": 16.003250122070312, "learning_rate": 4.71169332816984e-06, "loss": 0.3625, "step": 68100 }, { "epoch": 0.67, "grad_norm": 15.410901069641113, "learning_rate": 4.711569205715591e-06, "loss": 0.2172, "step": 68125 }, { "epoch": 0.67, "grad_norm": 14.179645538330078, "learning_rate": 4.711445083261343e-06, "loss": 0.3944, "step": 68150 }, { "epoch": 0.67, "grad_norm": 10.246788024902344, "learning_rate": 4.711320960807094e-06, "loss": 0.2814, "step": 68175 }, { "epoch": 0.67, "grad_norm": Infinity, "learning_rate": 4.711201803251016e-06, "loss": 0.4278, "step": 68200 }, { "epoch": 0.67, "grad_norm": 9.390657424926758, "learning_rate": 4.7110776807967675e-06, "loss": 0.2358, "step": 68225 }, { "epoch": 0.67, "grad_norm": 13.802861213684082, "learning_rate": 4.710953558342519e-06, "loss": 0.394, "step": 68250 }, { "epoch": 0.67, "grad_norm": 14.086795806884766, "learning_rate": 4.710829435888271e-06, "loss": 0.1966, "step": 68275 }, { "epoch": 0.67, "grad_norm": 20.868101119995117, "learning_rate": 4.710705313434022e-06, "loss": 0.4023, "step": 68300 }, { "epoch": 0.67, "grad_norm": 13.667937278747559, "learning_rate": 4.710581190979773e-06, "loss": 0.29, "step": 68325 }, { "epoch": 0.67, "grad_norm": 16.23984146118164, "learning_rate": 4.710457068525525e-06, "loss": 0.4227, "step": 68350 }, { "epoch": 0.67, "grad_norm": 6.811930179595947, "learning_rate": 4.7103329460712765e-06, "loss": 0.2188, "step": 68375 }, { "epoch": 0.67, "grad_norm": 12.443225860595703, "learning_rate": 4.710208823617028e-06, "loss": 0.4151, "step": 68400 }, { "epoch": 0.67, "grad_norm": 17.490703582763672, "learning_rate": 4.710084701162779e-06, "loss": 0.2155, "step": 68425 }, { "epoch": 0.67, "grad_norm": 18.64995002746582, "learning_rate": 4.709960578708531e-06, "loss": 0.3928, "step": 68450 }, { "epoch": 0.67, "grad_norm": 11.666152954101562, "learning_rate": 4.709836456254282e-06, "loss": 0.2374, "step": 68475 }, { "epoch": 0.67, "grad_norm": 16.497398376464844, "learning_rate": 4.709712333800034e-06, "loss": 0.4154, "step": 68500 }, { "epoch": 0.67, "grad_norm": 9.02646541595459, "learning_rate": 4.709588211345785e-06, "loss": 0.2182, "step": 68525 }, { "epoch": 0.67, "grad_norm": 13.561775207519531, "learning_rate": 4.7094640888915375e-06, "loss": 0.3854, "step": 68550 }, { "epoch": 0.67, "grad_norm": 6.734775066375732, "learning_rate": 4.709339966437289e-06, "loss": 0.2191, "step": 68575 }, { "epoch": 0.67, "grad_norm": 15.78093147277832, "learning_rate": 4.709215843983041e-06, "loss": 0.3835, "step": 68600 }, { "epoch": 0.67, "grad_norm": 100.79389953613281, "learning_rate": 4.709091721528792e-06, "loss": 0.2099, "step": 68625 }, { "epoch": 0.67, "grad_norm": 9.876262664794922, "learning_rate": 4.708967599074543e-06, "loss": 0.3937, "step": 68650 }, { "epoch": 0.68, "grad_norm": 8.924511909484863, "learning_rate": 4.708843476620295e-06, "loss": 0.2004, "step": 68675 }, { "epoch": 0.68, "grad_norm": 24.146989822387695, "learning_rate": 4.7087193541660464e-06, "loss": 0.4158, "step": 68700 }, { "epoch": 0.68, "grad_norm": 6.365175247192383, "learning_rate": 4.708595231711798e-06, "loss": 0.2243, "step": 68725 }, { "epoch": 0.68, "grad_norm": 27.92540740966797, "learning_rate": 4.70847110925755e-06, "loss": 0.4393, "step": 68750 }, { "epoch": 0.68, "grad_norm": 10.727814674377441, "learning_rate": 4.708346986803301e-06, "loss": 0.2918, "step": 68775 }, { "epoch": 0.68, "grad_norm": 18.538827896118164, "learning_rate": 4.708222864349052e-06, "loss": 0.3842, "step": 68800 }, { "epoch": 0.68, "grad_norm": 9.835701942443848, "learning_rate": 4.708098741894804e-06, "loss": 0.2359, "step": 68825 }, { "epoch": 0.68, "grad_norm": 14.42872428894043, "learning_rate": 4.707974619440555e-06, "loss": 0.4048, "step": 68850 }, { "epoch": 0.68, "grad_norm": 13.099203109741211, "learning_rate": 4.707850496986307e-06, "loss": 0.2093, "step": 68875 }, { "epoch": 0.68, "grad_norm": 17.55560302734375, "learning_rate": 4.707726374532059e-06, "loss": 0.4058, "step": 68900 }, { "epoch": 0.68, "grad_norm": 10.874061584472656, "learning_rate": 4.70760225207781e-06, "loss": 0.2214, "step": 68925 }, { "epoch": 0.68, "grad_norm": 19.40294075012207, "learning_rate": 4.707478129623562e-06, "loss": 0.3874, "step": 68950 }, { "epoch": 0.68, "grad_norm": 16.3079776763916, "learning_rate": 4.707354007169313e-06, "loss": 0.2327, "step": 68975 }, { "epoch": 0.68, "grad_norm": 18.9360408782959, "learning_rate": 4.707229884715065e-06, "loss": 0.443, "step": 69000 }, { "epoch": 0.68, "grad_norm": 11.727006912231445, "learning_rate": 4.707105762260816e-06, "loss": 0.2398, "step": 69025 }, { "epoch": 0.68, "grad_norm": 19.082805633544922, "learning_rate": 4.7069816398065685e-06, "loss": 0.4602, "step": 69050 }, { "epoch": 0.68, "grad_norm": 11.125679016113281, "learning_rate": 4.70685751735232e-06, "loss": 0.2277, "step": 69075 }, { "epoch": 0.68, "grad_norm": 12.452330589294434, "learning_rate": 4.706733394898071e-06, "loss": 0.3985, "step": 69100 }, { "epoch": 0.68, "grad_norm": 8.05204963684082, "learning_rate": 4.706609272443823e-06, "loss": 0.221, "step": 69125 }, { "epoch": 0.68, "grad_norm": 18.652143478393555, "learning_rate": 4.706485149989574e-06, "loss": 0.3916, "step": 69150 }, { "epoch": 0.68, "grad_norm": 10.983165740966797, "learning_rate": 4.706361027535325e-06, "loss": 0.2468, "step": 69175 }, { "epoch": 0.68, "grad_norm": 15.711796760559082, "learning_rate": 4.7062369050810774e-06, "loss": 0.4092, "step": 69200 }, { "epoch": 0.68, "grad_norm": 9.701367378234863, "learning_rate": 4.706112782626829e-06, "loss": 0.2344, "step": 69225 }, { "epoch": 0.68, "grad_norm": 14.095653533935547, "learning_rate": 4.70598866017258e-06, "loss": 0.4193, "step": 69250 }, { "epoch": 0.68, "grad_norm": 12.973346710205078, "learning_rate": 4.705864537718332e-06, "loss": 0.2556, "step": 69275 }, { "epoch": 0.68, "grad_norm": 11.359575271606445, "learning_rate": 4.705740415264083e-06, "loss": 0.3773, "step": 69300 }, { "epoch": 0.68, "grad_norm": 6.233964920043945, "learning_rate": 4.705616292809835e-06, "loss": 0.2285, "step": 69325 }, { "epoch": 0.68, "grad_norm": 19.080095291137695, "learning_rate": 4.705492170355586e-06, "loss": 0.3866, "step": 69350 }, { "epoch": 0.68, "grad_norm": 12.678186416625977, "learning_rate": 4.7053680479013385e-06, "loss": 0.1877, "step": 69375 }, { "epoch": 0.68, "grad_norm": 12.548133850097656, "learning_rate": 4.70524392544709e-06, "loss": 0.3856, "step": 69400 }, { "epoch": 0.68, "grad_norm": 6.132906913757324, "learning_rate": 4.705119802992841e-06, "loss": 0.178, "step": 69425 }, { "epoch": 0.68, "grad_norm": 15.493829727172852, "learning_rate": 4.704995680538593e-06, "loss": 0.463, "step": 69450 }, { "epoch": 0.68, "grad_norm": 3.9654464721679688, "learning_rate": 4.704871558084344e-06, "loss": 0.2502, "step": 69475 }, { "epoch": 0.68, "grad_norm": 10.673737525939941, "learning_rate": 4.704747435630095e-06, "loss": 0.348, "step": 69500 }, { "epoch": 0.68, "grad_norm": 9.651604652404785, "learning_rate": 4.7046233131758474e-06, "loss": 0.1968, "step": 69525 }, { "epoch": 0.68, "grad_norm": 19.78862762451172, "learning_rate": 4.704499190721599e-06, "loss": 0.4461, "step": 69550 }, { "epoch": 0.68, "grad_norm": 6.4505791664123535, "learning_rate": 4.70437506826735e-06, "loss": 0.2333, "step": 69575 }, { "epoch": 0.68, "grad_norm": 19.435997009277344, "learning_rate": 4.704250945813102e-06, "loss": 0.402, "step": 69600 }, { "epoch": 0.68, "grad_norm": 8.997078895568848, "learning_rate": 4.704126823358853e-06, "loss": 0.2124, "step": 69625 }, { "epoch": 0.68, "grad_norm": 18.089611053466797, "learning_rate": 4.704002700904604e-06, "loss": 0.4361, "step": 69650 }, { "epoch": 0.69, "grad_norm": 9.480117797851562, "learning_rate": 4.703878578450356e-06, "loss": 0.1997, "step": 69675 }, { "epoch": 0.69, "grad_norm": 16.86829376220703, "learning_rate": 4.703754455996108e-06, "loss": 0.3966, "step": 69700 }, { "epoch": 0.69, "grad_norm": 5.495416164398193, "learning_rate": 4.70363033354186e-06, "loss": 0.196, "step": 69725 }, { "epoch": 0.69, "grad_norm": 13.280668258666992, "learning_rate": 4.703506211087611e-06, "loss": 0.4321, "step": 69750 }, { "epoch": 0.69, "grad_norm": 5.374019622802734, "learning_rate": 4.703382088633363e-06, "loss": 0.2007, "step": 69775 }, { "epoch": 0.69, "grad_norm": 15.150514602661133, "learning_rate": 4.703257966179114e-06, "loss": 0.3618, "step": 69800 }, { "epoch": 0.69, "grad_norm": 14.49991226196289, "learning_rate": 4.703133843724866e-06, "loss": 0.2523, "step": 69825 }, { "epoch": 0.69, "grad_norm": 13.068975448608398, "learning_rate": 4.703009721270617e-06, "loss": 0.3892, "step": 69850 }, { "epoch": 0.69, "grad_norm": 10.777725219726562, "learning_rate": 4.702885598816369e-06, "loss": 0.2499, "step": 69875 }, { "epoch": 0.69, "grad_norm": 18.145559310913086, "learning_rate": 4.702761476362121e-06, "loss": 0.419, "step": 69900 }, { "epoch": 0.69, "grad_norm": 4.6879119873046875, "learning_rate": 4.702637353907872e-06, "loss": 0.1901, "step": 69925 }, { "epoch": 0.69, "grad_norm": 17.587993621826172, "learning_rate": 4.702513231453623e-06, "loss": 0.4022, "step": 69950 }, { "epoch": 0.69, "grad_norm": 7.410604000091553, "learning_rate": 4.702389108999375e-06, "loss": 0.2518, "step": 69975 }, { "epoch": 0.69, "grad_norm": 15.55375862121582, "learning_rate": 4.702264986545126e-06, "loss": 0.3517, "step": 70000 }, { "epoch": 0.69, "grad_norm": 10.615499496459961, "learning_rate": 4.702140864090878e-06, "loss": 0.1708, "step": 70025 }, { "epoch": 0.69, "grad_norm": 11.818323135375977, "learning_rate": 4.70201674163663e-06, "loss": 0.4723, "step": 70050 }, { "epoch": 0.69, "grad_norm": 17.563718795776367, "learning_rate": 4.701892619182381e-06, "loss": 0.2391, "step": 70075 }, { "epoch": 0.69, "grad_norm": 14.941967010498047, "learning_rate": 4.701768496728132e-06, "loss": 0.4313, "step": 70100 }, { "epoch": 0.69, "grad_norm": 12.572134971618652, "learning_rate": 4.701644374273884e-06, "loss": 0.2033, "step": 70125 }, { "epoch": 0.69, "grad_norm": 12.100427627563477, "learning_rate": 4.701520251819635e-06, "loss": 0.4311, "step": 70150 }, { "epoch": 0.69, "grad_norm": 6.221564769744873, "learning_rate": 4.701396129365387e-06, "loss": 0.2441, "step": 70175 }, { "epoch": 0.69, "grad_norm": 18.621749877929688, "learning_rate": 4.701272006911139e-06, "loss": 0.4481, "step": 70200 }, { "epoch": 0.69, "grad_norm": 6.698835372924805, "learning_rate": 4.701147884456891e-06, "loss": 0.2201, "step": 70225 }, { "epoch": 0.69, "grad_norm": 18.3475341796875, "learning_rate": 4.701023762002642e-06, "loss": 0.3998, "step": 70250 }, { "epoch": 0.69, "grad_norm": 7.474917411804199, "learning_rate": 4.700899639548393e-06, "loss": 0.2271, "step": 70275 }, { "epoch": 0.69, "grad_norm": 11.777044296264648, "learning_rate": 4.700775517094145e-06, "loss": 0.4278, "step": 70300 }, { "epoch": 0.69, "grad_norm": 0.8287597298622131, "learning_rate": 4.700651394639896e-06, "loss": 0.2271, "step": 70325 }, { "epoch": 0.69, "grad_norm": 9.596585273742676, "learning_rate": 4.7005272721856476e-06, "loss": 0.4027, "step": 70350 }, { "epoch": 0.69, "grad_norm": 10.41602897644043, "learning_rate": 4.7004031497314e-06, "loss": 0.2234, "step": 70375 }, { "epoch": 0.69, "grad_norm": 18.122529983520508, "learning_rate": 4.700279027277151e-06, "loss": 0.3984, "step": 70400 }, { "epoch": 0.69, "grad_norm": 4.093145847320557, "learning_rate": 4.700154904822902e-06, "loss": 0.1961, "step": 70425 }, { "epoch": 0.69, "grad_norm": 12.22458267211914, "learning_rate": 4.700030782368654e-06, "loss": 0.414, "step": 70450 }, { "epoch": 0.69, "grad_norm": 13.60293197631836, "learning_rate": 4.699906659914405e-06, "loss": 0.2423, "step": 70475 }, { "epoch": 0.69, "grad_norm": Infinity, "learning_rate": 4.699787502358327e-06, "loss": 0.3866, "step": 70500 }, { "epoch": 0.69, "grad_norm": 9.929112434387207, "learning_rate": 4.699663379904078e-06, "loss": 0.2163, "step": 70525 }, { "epoch": 0.69, "grad_norm": 16.94806480407715, "learning_rate": 4.69953925744983e-06, "loss": 0.4434, "step": 70550 }, { "epoch": 0.69, "grad_norm": 10.000528335571289, "learning_rate": 4.699415134995582e-06, "loss": 0.2245, "step": 70575 }, { "epoch": 0.69, "grad_norm": 13.506429672241211, "learning_rate": 4.699291012541333e-06, "loss": 0.4305, "step": 70600 }, { "epoch": 0.69, "grad_norm": 6.946115016937256, "learning_rate": 4.699166890087085e-06, "loss": 0.1981, "step": 70625 }, { "epoch": 0.69, "grad_norm": 18.988176345825195, "learning_rate": 4.699042767632836e-06, "loss": 0.4439, "step": 70650 }, { "epoch": 0.69, "grad_norm": 9.278717041015625, "learning_rate": 4.698918645178588e-06, "loss": 0.1928, "step": 70675 }, { "epoch": 0.7, "grad_norm": 13.793976783752441, "learning_rate": 4.6987945227243394e-06, "loss": 0.4063, "step": 70700 }, { "epoch": 0.7, "grad_norm": 11.492413520812988, "learning_rate": 4.698670400270091e-06, "loss": 0.226, "step": 70725 }, { "epoch": 0.7, "grad_norm": 10.745796203613281, "learning_rate": 4.698546277815843e-06, "loss": 0.4253, "step": 70750 }, { "epoch": 0.7, "grad_norm": 5.485611438751221, "learning_rate": 4.698422155361594e-06, "loss": 0.2234, "step": 70775 }, { "epoch": 0.7, "grad_norm": 11.294568061828613, "learning_rate": 4.698298032907345e-06, "loss": 0.3729, "step": 70800 }, { "epoch": 0.7, "grad_norm": 10.819905281066895, "learning_rate": 4.698173910453097e-06, "loss": 0.2255, "step": 70825 }, { "epoch": 0.7, "grad_norm": 23.14632797241211, "learning_rate": 4.698049787998848e-06, "loss": 0.436, "step": 70850 }, { "epoch": 0.7, "grad_norm": 10.184884071350098, "learning_rate": 4.6979256655446e-06, "loss": 0.2338, "step": 70875 }, { "epoch": 0.7, "grad_norm": 14.125219345092773, "learning_rate": 4.697801543090352e-06, "loss": 0.353, "step": 70900 }, { "epoch": 0.7, "grad_norm": 11.398466110229492, "learning_rate": 4.697677420636103e-06, "loss": 0.1931, "step": 70925 }, { "epoch": 0.7, "grad_norm": 12.139673233032227, "learning_rate": 4.697553298181854e-06, "loss": 0.368, "step": 70950 }, { "epoch": 0.7, "grad_norm": 12.634056091308594, "learning_rate": 4.697429175727606e-06, "loss": 0.2075, "step": 70975 }, { "epoch": 0.7, "grad_norm": 8.636940002441406, "learning_rate": 4.697305053273357e-06, "loss": 0.3811, "step": 71000 }, { "epoch": 0.7, "grad_norm": 9.278968811035156, "learning_rate": 4.697180930819109e-06, "loss": 0.2534, "step": 71025 }, { "epoch": 0.7, "grad_norm": 13.708889961242676, "learning_rate": 4.697056808364861e-06, "loss": 0.4504, "step": 71050 }, { "epoch": 0.7, "grad_norm": 10.697529792785645, "learning_rate": 4.696932685910613e-06, "loss": 0.226, "step": 71075 }, { "epoch": 0.7, "grad_norm": 28.26177978515625, "learning_rate": 4.696808563456364e-06, "loss": 0.39, "step": 71100 }, { "epoch": 0.7, "grad_norm": 13.944071769714355, "learning_rate": 4.696684441002116e-06, "loss": 0.2506, "step": 71125 }, { "epoch": 0.7, "grad_norm": 22.43869972229004, "learning_rate": 4.696560318547867e-06, "loss": 0.4354, "step": 71150 }, { "epoch": 0.7, "grad_norm": 7.885863780975342, "learning_rate": 4.696436196093618e-06, "loss": 0.2034, "step": 71175 }, { "epoch": 0.7, "grad_norm": 12.427787780761719, "learning_rate": 4.6963120736393704e-06, "loss": 0.3404, "step": 71200 }, { "epoch": 0.7, "grad_norm": 7.611198902130127, "learning_rate": 4.696187951185122e-06, "loss": 0.28, "step": 71225 }, { "epoch": 0.7, "grad_norm": 15.320954322814941, "learning_rate": 4.696063828730873e-06, "loss": 0.3864, "step": 71250 }, { "epoch": 0.7, "grad_norm": 10.328238487243652, "learning_rate": 4.695939706276625e-06, "loss": 0.206, "step": 71275 }, { "epoch": 0.7, "grad_norm": 15.83838176727295, "learning_rate": 4.695815583822376e-06, "loss": 0.3942, "step": 71300 }, { "epoch": 0.7, "grad_norm": 15.56348705291748, "learning_rate": 4.695691461368127e-06, "loss": 0.2195, "step": 71325 }, { "epoch": 0.7, "grad_norm": 15.569214820861816, "learning_rate": 4.695567338913879e-06, "loss": 0.3974, "step": 71350 }, { "epoch": 0.7, "grad_norm": 6.922055244445801, "learning_rate": 4.695443216459631e-06, "loss": 0.2494, "step": 71375 }, { "epoch": 0.7, "grad_norm": 20.44749641418457, "learning_rate": 4.695319094005382e-06, "loss": 0.4069, "step": 71400 }, { "epoch": 0.7, "grad_norm": 5.559786319732666, "learning_rate": 4.695194971551134e-06, "loss": 0.2665, "step": 71425 }, { "epoch": 0.7, "grad_norm": 19.40376853942871, "learning_rate": 4.695070849096885e-06, "loss": 0.381, "step": 71450 }, { "epoch": 0.7, "grad_norm": 8.927835464477539, "learning_rate": 4.694946726642637e-06, "loss": 0.2207, "step": 71475 }, { "epoch": 0.7, "grad_norm": 18.6732120513916, "learning_rate": 4.694822604188388e-06, "loss": 0.3926, "step": 71500 }, { "epoch": 0.7, "grad_norm": 6.474820137023926, "learning_rate": 4.69469848173414e-06, "loss": 0.2135, "step": 71525 }, { "epoch": 0.7, "grad_norm": 24.78487777709961, "learning_rate": 4.694574359279892e-06, "loss": 0.3873, "step": 71550 }, { "epoch": 0.7, "grad_norm": 8.34357738494873, "learning_rate": 4.694450236825643e-06, "loss": 0.2233, "step": 71575 }, { "epoch": 0.7, "grad_norm": 15.312056541442871, "learning_rate": 4.694326114371395e-06, "loss": 0.4648, "step": 71600 }, { "epoch": 0.7, "grad_norm": 7.1391987800598145, "learning_rate": 4.694201991917146e-06, "loss": 0.2311, "step": 71625 }, { "epoch": 0.7, "grad_norm": 15.550582885742188, "learning_rate": 4.694077869462897e-06, "loss": 0.4192, "step": 71650 }, { "epoch": 0.7, "grad_norm": 10.010726928710938, "learning_rate": 4.693953747008649e-06, "loss": 0.1965, "step": 71675 }, { "epoch": 0.7, "grad_norm": 14.235206604003906, "learning_rate": 4.693829624554401e-06, "loss": 0.4455, "step": 71700 }, { "epoch": 0.71, "grad_norm": 7.355916500091553, "learning_rate": 4.693705502100152e-06, "loss": 0.2098, "step": 71725 }, { "epoch": 0.71, "grad_norm": 18.086030960083008, "learning_rate": 4.693581379645904e-06, "loss": 0.4136, "step": 71750 }, { "epoch": 0.71, "grad_norm": 11.359256744384766, "learning_rate": 4.693457257191655e-06, "loss": 0.2253, "step": 71775 }, { "epoch": 0.71, "grad_norm": 12.756499290466309, "learning_rate": 4.693333134737406e-06, "loss": 0.4321, "step": 71800 }, { "epoch": 0.71, "grad_norm": 6.275032043457031, "learning_rate": 4.693209012283158e-06, "loss": 0.2472, "step": 71825 }, { "epoch": 0.71, "grad_norm": 22.303895950317383, "learning_rate": 4.6930848898289096e-06, "loss": 0.5013, "step": 71850 }, { "epoch": 0.71, "grad_norm": 12.547204971313477, "learning_rate": 4.692960767374662e-06, "loss": 0.2538, "step": 71875 }, { "epoch": 0.71, "grad_norm": 11.802750587463379, "learning_rate": 4.692836644920413e-06, "loss": 0.3702, "step": 71900 }, { "epoch": 0.71, "grad_norm": 8.183605194091797, "learning_rate": 4.692712522466165e-06, "loss": 0.2088, "step": 71925 }, { "epoch": 0.71, "grad_norm": 14.948104858398438, "learning_rate": 4.692588400011916e-06, "loss": 0.3221, "step": 71950 }, { "epoch": 0.71, "grad_norm": 3.1007931232452393, "learning_rate": 4.692464277557668e-06, "loss": 0.2179, "step": 71975 }, { "epoch": 0.71, "grad_norm": 19.10137176513672, "learning_rate": 4.692340155103419e-06, "loss": 0.3989, "step": 72000 }, { "epoch": 0.71, "grad_norm": 12.412896156311035, "learning_rate": 4.6922160326491706e-06, "loss": 0.1957, "step": 72025 }, { "epoch": 0.71, "grad_norm": 16.41411781311035, "learning_rate": 4.692091910194923e-06, "loss": 0.3859, "step": 72050 }, { "epoch": 0.71, "grad_norm": 15.023660659790039, "learning_rate": 4.691967787740674e-06, "loss": 0.2147, "step": 72075 }, { "epoch": 0.71, "grad_norm": 23.186092376708984, "learning_rate": 4.691843665286425e-06, "loss": 0.4563, "step": 72100 }, { "epoch": 0.71, "grad_norm": 5.025132656097412, "learning_rate": 4.691719542832177e-06, "loss": 0.2009, "step": 72125 }, { "epoch": 0.71, "grad_norm": 6.933465957641602, "learning_rate": 4.691595420377928e-06, "loss": 0.4097, "step": 72150 }, { "epoch": 0.71, "grad_norm": 7.641866207122803, "learning_rate": 4.6914712979236795e-06, "loss": 0.2271, "step": 72175 }, { "epoch": 0.71, "grad_norm": 18.530147552490234, "learning_rate": 4.691347175469432e-06, "loss": 0.4168, "step": 72200 }, { "epoch": 0.71, "grad_norm": 10.660435676574707, "learning_rate": 4.691223053015183e-06, "loss": 0.2255, "step": 72225 }, { "epoch": 0.71, "grad_norm": 15.517437934875488, "learning_rate": 4.691098930560935e-06, "loss": 0.3953, "step": 72250 }, { "epoch": 0.71, "grad_norm": 9.813968658447266, "learning_rate": 4.690974808106686e-06, "loss": 0.2359, "step": 72275 }, { "epoch": 0.71, "grad_norm": 14.103626251220703, "learning_rate": 4.690850685652438e-06, "loss": 0.4143, "step": 72300 }, { "epoch": 0.71, "grad_norm": 9.672545433044434, "learning_rate": 4.690726563198189e-06, "loss": 0.1959, "step": 72325 }, { "epoch": 0.71, "grad_norm": 15.441216468811035, "learning_rate": 4.690602440743941e-06, "loss": 0.3626, "step": 72350 }, { "epoch": 0.71, "grad_norm": 9.802533149719238, "learning_rate": 4.690478318289693e-06, "loss": 0.2238, "step": 72375 }, { "epoch": 0.71, "grad_norm": 13.968785285949707, "learning_rate": 4.690354195835444e-06, "loss": 0.388, "step": 72400 }, { "epoch": 0.71, "grad_norm": 8.35837459564209, "learning_rate": 4.690230073381195e-06, "loss": 0.2397, "step": 72425 }, { "epoch": 0.71, "grad_norm": 17.80441665649414, "learning_rate": 4.690105950926947e-06, "loss": 0.3708, "step": 72450 }, { "epoch": 0.71, "grad_norm": 11.710013389587402, "learning_rate": 4.689981828472698e-06, "loss": 0.2721, "step": 72475 }, { "epoch": 0.71, "grad_norm": 12.21774959564209, "learning_rate": 4.6898577060184495e-06, "loss": 0.4349, "step": 72500 }, { "epoch": 0.71, "grad_norm": 7.465461254119873, "learning_rate": 4.689733583564202e-06, "loss": 0.2259, "step": 72525 }, { "epoch": 0.71, "grad_norm": 19.25328826904297, "learning_rate": 4.689614426008123e-06, "loss": 0.3713, "step": 72550 }, { "epoch": 0.71, "grad_norm": 10.995989799499512, "learning_rate": 4.689490303553875e-06, "loss": 0.2118, "step": 72575 }, { "epoch": 0.71, "grad_norm": 11.468853950500488, "learning_rate": 4.689366181099626e-06, "loss": 0.3867, "step": 72600 }, { "epoch": 0.71, "grad_norm": 8.509323120117188, "learning_rate": 4.689242058645377e-06, "loss": 0.1884, "step": 72625 }, { "epoch": 0.71, "grad_norm": 12.153197288513184, "learning_rate": 4.689117936191129e-06, "loss": 0.3688, "step": 72650 }, { "epoch": 0.71, "grad_norm": 9.147745132446289, "learning_rate": 4.68899381373688e-06, "loss": 0.2032, "step": 72675 }, { "epoch": 0.71, "grad_norm": 12.20984935760498, "learning_rate": 4.688869691282632e-06, "loss": 0.297, "step": 72700 }, { "epoch": 0.72, "grad_norm": 8.803332328796387, "learning_rate": 4.688745568828384e-06, "loss": 0.1516, "step": 72725 }, { "epoch": 0.72, "grad_norm": 20.964200973510742, "learning_rate": 4.688621446374136e-06, "loss": 0.3941, "step": 72750 }, { "epoch": 0.72, "grad_norm": 10.277998924255371, "learning_rate": 4.688497323919887e-06, "loss": 0.2135, "step": 72775 }, { "epoch": 0.72, "grad_norm": 13.095494270324707, "learning_rate": 4.688373201465639e-06, "loss": 0.4708, "step": 72800 }, { "epoch": 0.72, "grad_norm": 14.179704666137695, "learning_rate": 4.68824907901139e-06, "loss": 0.2226, "step": 72825 }, { "epoch": 0.72, "grad_norm": 11.577832221984863, "learning_rate": 4.688124956557141e-06, "loss": 0.419, "step": 72850 }, { "epoch": 0.72, "grad_norm": 9.214800834655762, "learning_rate": 4.6880008341028934e-06, "loss": 0.2265, "step": 72875 }, { "epoch": 0.72, "grad_norm": 19.237232208251953, "learning_rate": 4.687876711648645e-06, "loss": 0.3417, "step": 72900 }, { "epoch": 0.72, "grad_norm": 9.023896217346191, "learning_rate": 4.687752589194396e-06, "loss": 0.2193, "step": 72925 }, { "epoch": 0.72, "grad_norm": 15.24543571472168, "learning_rate": 4.687628466740148e-06, "loss": 0.4023, "step": 72950 }, { "epoch": 0.72, "grad_norm": 7.803258419036865, "learning_rate": 4.687504344285899e-06, "loss": 0.2, "step": 72975 }, { "epoch": 0.72, "grad_norm": 16.23072624206543, "learning_rate": 4.68738022183165e-06, "loss": 0.3467, "step": 73000 }, { "epoch": 0.72, "grad_norm": 10.045574188232422, "learning_rate": 4.6872560993774016e-06, "loss": 0.2289, "step": 73025 }, { "epoch": 0.72, "grad_norm": 16.13597297668457, "learning_rate": 4.687131976923154e-06, "loss": 0.4271, "step": 73050 }, { "epoch": 0.72, "grad_norm": 11.85981273651123, "learning_rate": 4.687007854468905e-06, "loss": 0.2318, "step": 73075 }, { "epoch": 0.72, "grad_norm": 16.16217613220215, "learning_rate": 4.686883732014657e-06, "loss": 0.4061, "step": 73100 }, { "epoch": 0.72, "grad_norm": 2.7025535106658936, "learning_rate": 4.686759609560408e-06, "loss": 0.1993, "step": 73125 }, { "epoch": 0.72, "grad_norm": 22.503131866455078, "learning_rate": 4.68663548710616e-06, "loss": 0.4298, "step": 73150 }, { "epoch": 0.72, "grad_norm": 8.457064628601074, "learning_rate": 4.686511364651911e-06, "loss": 0.2009, "step": 73175 }, { "epoch": 0.72, "grad_norm": 19.623287200927734, "learning_rate": 4.686387242197663e-06, "loss": 0.3888, "step": 73200 }, { "epoch": 0.72, "grad_norm": 8.676495552062988, "learning_rate": 4.686263119743415e-06, "loss": 0.2501, "step": 73225 }, { "epoch": 0.72, "grad_norm": 7.60063362121582, "learning_rate": 4.686138997289166e-06, "loss": 0.3807, "step": 73250 }, { "epoch": 0.72, "grad_norm": 14.51423454284668, "learning_rate": 4.686014874834918e-06, "loss": 0.24, "step": 73275 }, { "epoch": 0.72, "grad_norm": 5.450240135192871, "learning_rate": 4.685890752380669e-06, "loss": 0.4018, "step": 73300 }, { "epoch": 0.72, "grad_norm": 5.060211658477783, "learning_rate": 4.68576662992642e-06, "loss": 0.1769, "step": 73325 }, { "epoch": 0.72, "grad_norm": 20.936250686645508, "learning_rate": 4.685642507472172e-06, "loss": 0.3273, "step": 73350 }, { "epoch": 0.72, "grad_norm": 10.653822898864746, "learning_rate": 4.685518385017924e-06, "loss": 0.2369, "step": 73375 }, { "epoch": 0.72, "grad_norm": 11.609609603881836, "learning_rate": 4.685394262563675e-06, "loss": 0.3567, "step": 73400 }, { "epoch": 0.72, "grad_norm": 5.118774890899658, "learning_rate": 4.685270140109427e-06, "loss": 0.2065, "step": 73425 }, { "epoch": 0.72, "grad_norm": 15.398567199707031, "learning_rate": 4.685146017655178e-06, "loss": 0.3809, "step": 73450 }, { "epoch": 0.72, "grad_norm": 10.889557838439941, "learning_rate": 4.685021895200929e-06, "loss": 0.254, "step": 73475 }, { "epoch": 0.72, "grad_norm": 9.945869445800781, "learning_rate": 4.684897772746681e-06, "loss": 0.4225, "step": 73500 }, { "epoch": 0.72, "grad_norm": 10.784631729125977, "learning_rate": 4.6847736502924326e-06, "loss": 0.2381, "step": 73525 }, { "epoch": 0.72, "grad_norm": 12.93691635131836, "learning_rate": 4.684649527838185e-06, "loss": 0.372, "step": 73550 }, { "epoch": 0.72, "grad_norm": 2.9721388816833496, "learning_rate": 4.684525405383936e-06, "loss": 0.2262, "step": 73575 }, { "epoch": 0.72, "grad_norm": 13.635061264038086, "learning_rate": 4.684401282929688e-06, "loss": 0.4226, "step": 73600 }, { "epoch": 0.72, "grad_norm": 15.299574851989746, "learning_rate": 4.684277160475439e-06, "loss": 0.2023, "step": 73625 }, { "epoch": 0.72, "grad_norm": 15.79573917388916, "learning_rate": 4.684153038021191e-06, "loss": 0.4031, "step": 73650 }, { "epoch": 0.72, "grad_norm": 12.721120834350586, "learning_rate": 4.684028915566942e-06, "loss": 0.2223, "step": 73675 }, { "epoch": 0.72, "grad_norm": 3.2842416763305664, "learning_rate": 4.683904793112694e-06, "loss": 0.3968, "step": 73700 }, { "epoch": 0.72, "grad_norm": 10.52178955078125, "learning_rate": 4.683780670658446e-06, "loss": 0.2176, "step": 73725 }, { "epoch": 0.73, "grad_norm": 24.778141021728516, "learning_rate": 4.683656548204197e-06, "loss": 0.4807, "step": 73750 }, { "epoch": 0.73, "grad_norm": 11.333699226379395, "learning_rate": 4.683532425749948e-06, "loss": 0.2501, "step": 73775 }, { "epoch": 0.73, "grad_norm": 18.02165985107422, "learning_rate": 4.6834083032957e-06, "loss": 0.3473, "step": 73800 }, { "epoch": 0.73, "grad_norm": 10.769251823425293, "learning_rate": 4.683284180841451e-06, "loss": 0.2043, "step": 73825 }, { "epoch": 0.73, "grad_norm": 15.288529396057129, "learning_rate": 4.6831600583872025e-06, "loss": 0.3532, "step": 73850 }, { "epoch": 0.73, "grad_norm": 6.1409783363342285, "learning_rate": 4.683035935932954e-06, "loss": 0.1614, "step": 73875 }, { "epoch": 0.73, "grad_norm": 10.00924301147461, "learning_rate": 4.682911813478706e-06, "loss": 0.3888, "step": 73900 }, { "epoch": 0.73, "grad_norm": 8.497122764587402, "learning_rate": 4.682787691024457e-06, "loss": 0.2149, "step": 73925 }, { "epoch": 0.73, "grad_norm": 19.902023315429688, "learning_rate": 4.682663568570209e-06, "loss": 0.399, "step": 73950 }, { "epoch": 0.73, "grad_norm": 7.4475321769714355, "learning_rate": 4.68253944611596e-06, "loss": 0.2059, "step": 73975 }, { "epoch": 0.73, "grad_norm": 15.714587211608887, "learning_rate": 4.682415323661712e-06, "loss": 0.3609, "step": 74000 }, { "epoch": 0.73, "grad_norm": 13.664435386657715, "learning_rate": 4.6822912012074636e-06, "loss": 0.2103, "step": 74025 }, { "epoch": 0.73, "grad_norm": 16.038766860961914, "learning_rate": 4.682167078753216e-06, "loss": 0.4113, "step": 74050 }, { "epoch": 0.73, "grad_norm": 10.416951179504395, "learning_rate": 4.682042956298967e-06, "loss": 0.1734, "step": 74075 }, { "epoch": 0.73, "grad_norm": 15.427781105041504, "learning_rate": 4.681918833844718e-06, "loss": 0.3699, "step": 74100 }, { "epoch": 0.73, "grad_norm": 7.087953090667725, "learning_rate": 4.68179471139047e-06, "loss": 0.2173, "step": 74125 }, { "epoch": 0.73, "grad_norm": 15.678176879882812, "learning_rate": 4.681670588936221e-06, "loss": 0.4176, "step": 74150 }, { "epoch": 0.73, "grad_norm": 8.610845565795898, "learning_rate": 4.6815464664819725e-06, "loss": 0.2351, "step": 74175 }, { "epoch": 0.73, "grad_norm": 15.753218650817871, "learning_rate": 4.681422344027725e-06, "loss": 0.4126, "step": 74200 }, { "epoch": 0.73, "grad_norm": 8.533904075622559, "learning_rate": 4.681298221573476e-06, "loss": 0.1925, "step": 74225 }, { "epoch": 0.73, "grad_norm": 19.679645538330078, "learning_rate": 4.681174099119227e-06, "loss": 0.4163, "step": 74250 }, { "epoch": 0.73, "grad_norm": 6.498769283294678, "learning_rate": 4.681049976664979e-06, "loss": 0.2216, "step": 74275 }, { "epoch": 0.73, "grad_norm": 45.791603088378906, "learning_rate": 4.68092585421073e-06, "loss": 0.463, "step": 74300 }, { "epoch": 0.73, "grad_norm": 10.658093452453613, "learning_rate": 4.6808017317564815e-06, "loss": 0.225, "step": 74325 }, { "epoch": 0.73, "grad_norm": 18.826457977294922, "learning_rate": 4.6806776093022335e-06, "loss": 0.3988, "step": 74350 }, { "epoch": 0.73, "grad_norm": 7.3859171867370605, "learning_rate": 4.680553486847985e-06, "loss": 0.1776, "step": 74375 }, { "epoch": 0.73, "grad_norm": 12.906993865966797, "learning_rate": 4.680429364393737e-06, "loss": 0.3941, "step": 74400 }, { "epoch": 0.73, "grad_norm": 3.567929983139038, "learning_rate": 4.680305241939488e-06, "loss": 0.1718, "step": 74425 }, { "epoch": 0.73, "grad_norm": 19.43429183959961, "learning_rate": 4.68018111948524e-06, "loss": 0.3807, "step": 74450 }, { "epoch": 0.73, "grad_norm": 5.052545547485352, "learning_rate": 4.680056997030991e-06, "loss": 0.2032, "step": 74475 }, { "epoch": 0.73, "grad_norm": 10.042625427246094, "learning_rate": 4.679932874576743e-06, "loss": 0.3634, "step": 74500 }, { "epoch": 0.73, "grad_norm": 8.527515411376953, "learning_rate": 4.6798087521224946e-06, "loss": 0.19, "step": 74525 }, { "epoch": 0.73, "grad_norm": 15.301850318908691, "learning_rate": 4.679684629668246e-06, "loss": 0.4264, "step": 74550 }, { "epoch": 0.73, "grad_norm": 7.8541669845581055, "learning_rate": 4.679560507213998e-06, "loss": 0.1792, "step": 74575 }, { "epoch": 0.73, "grad_norm": 18.00905418395996, "learning_rate": 4.679436384759749e-06, "loss": 0.5181, "step": 74600 }, { "epoch": 0.73, "grad_norm": 9.055086135864258, "learning_rate": 4.6793122623055e-06, "loss": 0.2297, "step": 74625 }, { "epoch": 0.73, "grad_norm": Infinity, "learning_rate": 4.679193104749422e-06, "loss": 0.3631, "step": 74650 }, { "epoch": 0.73, "grad_norm": 6.733613014221191, "learning_rate": 4.679068982295173e-06, "loss": 0.2366, "step": 74675 }, { "epoch": 0.73, "grad_norm": 24.072141647338867, "learning_rate": 4.6789448598409246e-06, "loss": 0.3981, "step": 74700 }, { "epoch": 0.73, "grad_norm": 5.238262176513672, "learning_rate": 4.678820737386677e-06, "loss": 0.2174, "step": 74725 }, { "epoch": 0.73, "grad_norm": 16.014049530029297, "learning_rate": 4.678696614932428e-06, "loss": 0.3865, "step": 74750 }, { "epoch": 0.74, "grad_norm": 5.074497699737549, "learning_rate": 4.678572492478179e-06, "loss": 0.2153, "step": 74775 }, { "epoch": 0.74, "grad_norm": 17.4664306640625, "learning_rate": 4.678448370023931e-06, "loss": 0.4322, "step": 74800 }, { "epoch": 0.74, "grad_norm": 5.86081600189209, "learning_rate": 4.678324247569682e-06, "loss": 0.2742, "step": 74825 }, { "epoch": 0.74, "grad_norm": 10.490516662597656, "learning_rate": 4.678200125115434e-06, "loss": 0.4346, "step": 74850 }, { "epoch": 0.74, "grad_norm": 6.834997177124023, "learning_rate": 4.678076002661186e-06, "loss": 0.2274, "step": 74875 }, { "epoch": 0.74, "grad_norm": 15.513404846191406, "learning_rate": 4.677951880206938e-06, "loss": 0.3695, "step": 74900 }, { "epoch": 0.74, "grad_norm": 9.411602973937988, "learning_rate": 4.677827757752689e-06, "loss": 0.1868, "step": 74925 }, { "epoch": 0.74, "grad_norm": 20.00225257873535, "learning_rate": 4.677703635298441e-06, "loss": 0.4039, "step": 74950 }, { "epoch": 0.74, "grad_norm": 8.527923583984375, "learning_rate": 4.677579512844192e-06, "loss": 0.2671, "step": 74975 }, { "epoch": 0.74, "grad_norm": 21.733348846435547, "learning_rate": 4.677455390389943e-06, "loss": 0.3634, "step": 75000 }, { "epoch": 0.74, "grad_norm": 5.784280300140381, "learning_rate": 4.677331267935695e-06, "loss": 0.2298, "step": 75025 }, { "epoch": 0.74, "grad_norm": 17.4490966796875, "learning_rate": 4.677207145481447e-06, "loss": 0.3967, "step": 75050 }, { "epoch": 0.74, "grad_norm": 9.101051330566406, "learning_rate": 4.677083023027198e-06, "loss": 0.1817, "step": 75075 }, { "epoch": 0.74, "grad_norm": 23.471982955932617, "learning_rate": 4.67695890057295e-06, "loss": 0.3765, "step": 75100 }, { "epoch": 0.74, "grad_norm": 11.825438499450684, "learning_rate": 4.676834778118701e-06, "loss": 0.1974, "step": 75125 }, { "epoch": 0.74, "grad_norm": 12.605852127075195, "learning_rate": 4.676710655664452e-06, "loss": 0.4051, "step": 75150 }, { "epoch": 0.74, "grad_norm": 6.026358604431152, "learning_rate": 4.676586533210204e-06, "loss": 0.2143, "step": 75175 }, { "epoch": 0.74, "grad_norm": 19.881507873535156, "learning_rate": 4.6764624107559556e-06, "loss": 0.3421, "step": 75200 }, { "epoch": 0.74, "grad_norm": 9.487032890319824, "learning_rate": 4.676338288301708e-06, "loss": 0.1771, "step": 75225 }, { "epoch": 0.74, "grad_norm": 19.71018409729004, "learning_rate": 4.676214165847459e-06, "loss": 0.4149, "step": 75250 }, { "epoch": 0.74, "grad_norm": 8.643694877624512, "learning_rate": 4.676090043393211e-06, "loss": 0.2503, "step": 75275 }, { "epoch": 0.74, "grad_norm": 18.479219436645508, "learning_rate": 4.675965920938962e-06, "loss": 0.4249, "step": 75300 }, { "epoch": 0.74, "grad_norm": 7.499507427215576, "learning_rate": 4.675841798484713e-06, "loss": 0.2291, "step": 75325 }, { "epoch": 0.74, "grad_norm": 12.15707778930664, "learning_rate": 4.675717676030465e-06, "loss": 0.3295, "step": 75350 }, { "epoch": 0.74, "grad_norm": 8.667253494262695, "learning_rate": 4.675593553576217e-06, "loss": 0.2684, "step": 75375 }, { "epoch": 0.74, "grad_norm": 26.49690055847168, "learning_rate": 4.675469431121968e-06, "loss": 0.4703, "step": 75400 }, { "epoch": 0.74, "grad_norm": 9.50851821899414, "learning_rate": 4.67534530866772e-06, "loss": 0.2334, "step": 75425 }, { "epoch": 0.74, "grad_norm": 23.745065689086914, "learning_rate": 4.675221186213471e-06, "loss": 0.417, "step": 75450 }, { "epoch": 0.74, "grad_norm": 7.3897294998168945, "learning_rate": 4.675097063759222e-06, "loss": 0.2197, "step": 75475 }, { "epoch": 0.74, "grad_norm": 21.330854415893555, "learning_rate": 4.674972941304974e-06, "loss": 0.3765, "step": 75500 }, { "epoch": 0.74, "grad_norm": 5.821763515472412, "learning_rate": 4.6748488188507256e-06, "loss": 0.2315, "step": 75525 }, { "epoch": 0.74, "grad_norm": 15.248900413513184, "learning_rate": 4.674724696396477e-06, "loss": 0.4647, "step": 75550 }, { "epoch": 0.74, "grad_norm": 10.230478286743164, "learning_rate": 4.674600573942229e-06, "loss": 0.2205, "step": 75575 }, { "epoch": 0.74, "grad_norm": 16.921884536743164, "learning_rate": 4.67447645148798e-06, "loss": 0.3385, "step": 75600 }, { "epoch": 0.74, "grad_norm": 11.15357494354248, "learning_rate": 4.674352329033732e-06, "loss": 0.1945, "step": 75625 }, { "epoch": 0.74, "grad_norm": 15.208297729492188, "learning_rate": 4.674228206579483e-06, "loss": 0.4342, "step": 75650 }, { "epoch": 0.74, "grad_norm": 5.160855293273926, "learning_rate": 4.674104084125235e-06, "loss": 0.2173, "step": 75675 }, { "epoch": 0.74, "grad_norm": 20.752853393554688, "learning_rate": 4.6739799616709866e-06, "loss": 0.3443, "step": 75700 }, { "epoch": 0.74, "grad_norm": 15.23300838470459, "learning_rate": 4.673855839216739e-06, "loss": 0.2701, "step": 75725 }, { "epoch": 0.74, "grad_norm": 20.140560150146484, "learning_rate": 4.67373171676249e-06, "loss": 0.3797, "step": 75750 }, { "epoch": 0.75, "grad_norm": 9.607276916503906, "learning_rate": 4.673607594308241e-06, "loss": 0.2347, "step": 75775 }, { "epoch": 0.75, "grad_norm": 17.53881072998047, "learning_rate": 4.673483471853993e-06, "loss": 0.3822, "step": 75800 }, { "epoch": 0.75, "grad_norm": 18.5737247467041, "learning_rate": 4.673359349399744e-06, "loss": 0.2057, "step": 75825 }, { "epoch": 0.75, "grad_norm": 14.013337135314941, "learning_rate": 4.6732352269454955e-06, "loss": 0.4406, "step": 75850 }, { "epoch": 0.75, "grad_norm": 17.36216926574707, "learning_rate": 4.673111104491248e-06, "loss": 0.255, "step": 75875 }, { "epoch": 0.75, "grad_norm": 13.35816764831543, "learning_rate": 4.672986982036999e-06, "loss": 0.3465, "step": 75900 }, { "epoch": 0.75, "grad_norm": 7.013616561889648, "learning_rate": 4.67286285958275e-06, "loss": 0.2203, "step": 75925 }, { "epoch": 0.75, "grad_norm": 17.691431045532227, "learning_rate": 4.672738737128502e-06, "loss": 0.4035, "step": 75950 }, { "epoch": 0.75, "grad_norm": 17.94013786315918, "learning_rate": 4.672614614674253e-06, "loss": 0.2232, "step": 75975 }, { "epoch": 0.75, "grad_norm": 19.90534782409668, "learning_rate": 4.6724904922200045e-06, "loss": 0.3918, "step": 76000 }, { "epoch": 0.75, "grad_norm": 6.960578441619873, "learning_rate": 4.6723663697657566e-06, "loss": 0.2344, "step": 76025 }, { "epoch": 0.75, "grad_norm": 20.541500091552734, "learning_rate": 4.672242247311508e-06, "loss": 0.3484, "step": 76050 }, { "epoch": 0.75, "grad_norm": 14.231547355651855, "learning_rate": 4.67211812485726e-06, "loss": 0.2271, "step": 76075 }, { "epoch": 0.75, "grad_norm": 25.21469497680664, "learning_rate": 4.671994002403011e-06, "loss": 0.3913, "step": 76100 }, { "epoch": 0.75, "grad_norm": 9.863459587097168, "learning_rate": 4.671869879948763e-06, "loss": 0.2031, "step": 76125 }, { "epoch": 0.75, "grad_norm": 18.67494010925293, "learning_rate": 4.671745757494514e-06, "loss": 0.4251, "step": 76150 }, { "epoch": 0.75, "grad_norm": 5.982175350189209, "learning_rate": 4.6716216350402655e-06, "loss": 0.2818, "step": 76175 }, { "epoch": 0.75, "grad_norm": 16.55320167541504, "learning_rate": 4.671497512586018e-06, "loss": 0.4332, "step": 76200 }, { "epoch": 0.75, "grad_norm": 13.606358528137207, "learning_rate": 4.671373390131769e-06, "loss": 0.2418, "step": 76225 }, { "epoch": 0.75, "grad_norm": 22.05845069885254, "learning_rate": 4.67124926767752e-06, "loss": 0.3517, "step": 76250 }, { "epoch": 0.75, "grad_norm": 10.75830078125, "learning_rate": 4.671125145223272e-06, "loss": 0.2161, "step": 76275 }, { "epoch": 0.75, "grad_norm": 18.725757598876953, "learning_rate": 4.671001022769023e-06, "loss": 0.383, "step": 76300 }, { "epoch": 0.75, "grad_norm": 9.72372817993164, "learning_rate": 4.6708769003147745e-06, "loss": 0.2069, "step": 76325 }, { "epoch": 0.75, "grad_norm": 19.64694595336914, "learning_rate": 4.6707527778605265e-06, "loss": 0.3617, "step": 76350 }, { "epoch": 0.75, "grad_norm": 9.830676078796387, "learning_rate": 4.670628655406278e-06, "loss": 0.2445, "step": 76375 }, { "epoch": 0.75, "grad_norm": 17.67898178100586, "learning_rate": 4.670504532952029e-06, "loss": 0.4137, "step": 76400 }, { "epoch": 0.75, "grad_norm": 18.189037322998047, "learning_rate": 4.670380410497781e-06, "loss": 0.2579, "step": 76425 }, { "epoch": 0.75, "grad_norm": 21.09868621826172, "learning_rate": 4.670256288043532e-06, "loss": 0.4069, "step": 76450 }, { "epoch": 0.75, "grad_norm": 10.31492805480957, "learning_rate": 4.670132165589284e-06, "loss": 0.2615, "step": 76475 }, { "epoch": 0.75, "grad_norm": 25.386709213256836, "learning_rate": 4.6700080431350355e-06, "loss": 0.3965, "step": 76500 }, { "epoch": 0.75, "grad_norm": 8.84200382232666, "learning_rate": 4.6698839206807876e-06, "loss": 0.2212, "step": 76525 }, { "epoch": 0.75, "grad_norm": 12.038192749023438, "learning_rate": 4.669759798226539e-06, "loss": 0.37, "step": 76550 }, { "epoch": 0.75, "grad_norm": 9.391754150390625, "learning_rate": 4.669635675772291e-06, "loss": 0.199, "step": 76575 }, { "epoch": 0.75, "grad_norm": 17.186620712280273, "learning_rate": 4.669511553318042e-06, "loss": 0.3987, "step": 76600 }, { "epoch": 0.75, "grad_norm": 11.689257621765137, "learning_rate": 4.669387430863793e-06, "loss": 0.2781, "step": 76625 }, { "epoch": 0.75, "grad_norm": 16.503358840942383, "learning_rate": 4.669263308409545e-06, "loss": 0.383, "step": 76650 }, { "epoch": 0.75, "grad_norm": 15.982669830322266, "learning_rate": 4.6691391859552965e-06, "loss": 0.2657, "step": 76675 }, { "epoch": 0.75, "grad_norm": 19.55646324157715, "learning_rate": 4.669015063501048e-06, "loss": 0.3682, "step": 76700 }, { "epoch": 0.75, "grad_norm": 10.838432312011719, "learning_rate": 4.6688909410468e-06, "loss": 0.2401, "step": 76725 }, { "epoch": 0.75, "grad_norm": 17.776037216186523, "learning_rate": 4.668766818592551e-06, "loss": 0.3425, "step": 76750 }, { "epoch": 0.75, "grad_norm": 8.21491527557373, "learning_rate": 4.668642696138302e-06, "loss": 0.2341, "step": 76775 }, { "epoch": 0.76, "grad_norm": 16.66028594970703, "learning_rate": 4.668518573684054e-06, "loss": 0.3775, "step": 76800 }, { "epoch": 0.76, "grad_norm": 7.271406650543213, "learning_rate": 4.6683944512298055e-06, "loss": 0.2113, "step": 76825 }, { "epoch": 0.76, "grad_norm": 11.377153396606445, "learning_rate": 4.668270328775557e-06, "loss": 0.4023, "step": 76850 }, { "epoch": 0.76, "grad_norm": 10.387580871582031, "learning_rate": 4.668146206321309e-06, "loss": 0.214, "step": 76875 }, { "epoch": 0.76, "grad_norm": 19.471906661987305, "learning_rate": 4.66802704876523e-06, "loss": 0.4746, "step": 76900 }, { "epoch": 0.76, "grad_norm": 11.365224838256836, "learning_rate": 4.667902926310982e-06, "loss": 0.2404, "step": 76925 }, { "epoch": 0.76, "grad_norm": 12.376609802246094, "learning_rate": 4.667778803856733e-06, "loss": 0.3903, "step": 76950 }, { "epoch": 0.76, "grad_norm": 7.334659576416016, "learning_rate": 4.667654681402485e-06, "loss": 0.2121, "step": 76975 }, { "epoch": 0.76, "grad_norm": 17.443628311157227, "learning_rate": 4.667530558948236e-06, "loss": 0.4407, "step": 77000 }, { "epoch": 0.76, "grad_norm": 9.1051664352417, "learning_rate": 4.667406436493988e-06, "loss": 0.2679, "step": 77025 }, { "epoch": 0.76, "grad_norm": 10.806927680969238, "learning_rate": 4.66728231403974e-06, "loss": 0.4249, "step": 77050 }, { "epoch": 0.76, "grad_norm": 5.221894264221191, "learning_rate": 4.667158191585491e-06, "loss": 0.1917, "step": 77075 }, { "epoch": 0.76, "grad_norm": 15.95266342163086, "learning_rate": 4.667034069131243e-06, "loss": 0.3952, "step": 77100 }, { "epoch": 0.76, "grad_norm": 19.34324836730957, "learning_rate": 4.666909946676994e-06, "loss": 0.2156, "step": 77125 }, { "epoch": 0.76, "grad_norm": 18.087263107299805, "learning_rate": 4.666785824222745e-06, "loss": 0.3761, "step": 77150 }, { "epoch": 0.76, "grad_norm": 8.864401817321777, "learning_rate": 4.666661701768497e-06, "loss": 0.2272, "step": 77175 }, { "epoch": 0.76, "grad_norm": 20.100051879882812, "learning_rate": 4.6665375793142486e-06, "loss": 0.3793, "step": 77200 }, { "epoch": 0.76, "grad_norm": 10.712589263916016, "learning_rate": 4.66641345686e-06, "loss": 0.2069, "step": 77225 }, { "epoch": 0.76, "grad_norm": 13.30968189239502, "learning_rate": 4.666289334405752e-06, "loss": 0.3908, "step": 77250 }, { "epoch": 0.76, "grad_norm": 12.242740631103516, "learning_rate": 4.666165211951503e-06, "loss": 0.253, "step": 77275 }, { "epoch": 0.76, "grad_norm": 16.323368072509766, "learning_rate": 4.666041089497254e-06, "loss": 0.3575, "step": 77300 }, { "epoch": 0.76, "grad_norm": 13.55346965789795, "learning_rate": 4.665916967043006e-06, "loss": 0.2476, "step": 77325 }, { "epoch": 0.76, "grad_norm": 18.970462799072266, "learning_rate": 4.6657928445887575e-06, "loss": 0.4511, "step": 77350 }, { "epoch": 0.76, "grad_norm": 8.23715591430664, "learning_rate": 4.66566872213451e-06, "loss": 0.2161, "step": 77375 }, { "epoch": 0.76, "grad_norm": 13.51924991607666, "learning_rate": 4.665544599680261e-06, "loss": 0.333, "step": 77400 }, { "epoch": 0.76, "grad_norm": 17.722179412841797, "learning_rate": 4.665420477226013e-06, "loss": 0.2883, "step": 77425 }, { "epoch": 0.76, "grad_norm": 18.516849517822266, "learning_rate": 4.665296354771764e-06, "loss": 0.4012, "step": 77450 }, { "epoch": 0.76, "grad_norm": 11.721158981323242, "learning_rate": 4.665172232317516e-06, "loss": 0.2134, "step": 77475 }, { "epoch": 0.76, "grad_norm": 23.354076385498047, "learning_rate": 4.665048109863267e-06, "loss": 0.4371, "step": 77500 }, { "epoch": 0.76, "grad_norm": 7.4776434898376465, "learning_rate": 4.6649239874090185e-06, "loss": 0.1747, "step": 77525 }, { "epoch": 0.76, "grad_norm": 19.31818389892578, "learning_rate": 4.66479986495477e-06, "loss": 0.4557, "step": 77550 }, { "epoch": 0.76, "grad_norm": 5.97924280166626, "learning_rate": 4.664675742500522e-06, "loss": 0.2223, "step": 77575 }, { "epoch": 0.76, "grad_norm": 20.2830753326416, "learning_rate": 4.664551620046273e-06, "loss": 0.3923, "step": 77600 }, { "epoch": 0.76, "grad_norm": 15.665878295898438, "learning_rate": 4.664427497592024e-06, "loss": 0.2116, "step": 77625 }, { "epoch": 0.76, "grad_norm": 16.404020309448242, "learning_rate": 4.664303375137776e-06, "loss": 0.4244, "step": 77650 }, { "epoch": 0.76, "grad_norm": 6.107117176055908, "learning_rate": 4.6641792526835275e-06, "loss": 0.2163, "step": 77675 }, { "epoch": 0.76, "grad_norm": 10.994444847106934, "learning_rate": 4.664055130229279e-06, "loss": 0.3511, "step": 77700 }, { "epoch": 0.76, "grad_norm": 15.932321548461914, "learning_rate": 4.663931007775031e-06, "loss": 0.2503, "step": 77725 }, { "epoch": 0.76, "grad_norm": 15.189091682434082, "learning_rate": 4.663806885320782e-06, "loss": 0.3784, "step": 77750 }, { "epoch": 0.76, "grad_norm": 9.782669067382812, "learning_rate": 4.663682762866534e-06, "loss": 0.2114, "step": 77775 }, { "epoch": 0.76, "grad_norm": 20.14048194885254, "learning_rate": 4.663558640412285e-06, "loss": 0.3626, "step": 77800 }, { "epoch": 0.77, "grad_norm": 10.450572967529297, "learning_rate": 4.663434517958037e-06, "loss": 0.2046, "step": 77825 }, { "epoch": 0.77, "grad_norm": 31.4129695892334, "learning_rate": 4.6633103955037885e-06, "loss": 0.446, "step": 77850 }, { "epoch": 0.77, "grad_norm": 10.990160942077637, "learning_rate": 4.663186273049541e-06, "loss": 0.2053, "step": 77875 }, { "epoch": 0.77, "grad_norm": 14.02619457244873, "learning_rate": 4.663062150595292e-06, "loss": 0.3863, "step": 77900 }, { "epoch": 0.77, "grad_norm": 11.669012069702148, "learning_rate": 4.662938028141043e-06, "loss": 0.2239, "step": 77925 }, { "epoch": 0.77, "grad_norm": 13.697635650634766, "learning_rate": 4.662813905686795e-06, "loss": 0.3633, "step": 77950 }, { "epoch": 0.77, "grad_norm": 14.638333320617676, "learning_rate": 4.662689783232546e-06, "loss": 0.2771, "step": 77975 }, { "epoch": 0.77, "grad_norm": 16.968734741210938, "learning_rate": 4.6625656607782975e-06, "loss": 0.3285, "step": 78000 }, { "epoch": 0.77, "grad_norm": 7.804004669189453, "learning_rate": 4.6624415383240495e-06, "loss": 0.2682, "step": 78025 }, { "epoch": 0.77, "grad_norm": 17.583602905273438, "learning_rate": 4.662317415869801e-06, "loss": 0.396, "step": 78050 }, { "epoch": 0.77, "grad_norm": 13.810503959655762, "learning_rate": 4.662193293415552e-06, "loss": 0.202, "step": 78075 }, { "epoch": 0.77, "grad_norm": 23.805858612060547, "learning_rate": 4.662069170961304e-06, "loss": 0.349, "step": 78100 }, { "epoch": 0.77, "grad_norm": 10.43929386138916, "learning_rate": 4.661945048507055e-06, "loss": 0.2379, "step": 78125 }, { "epoch": 0.77, "grad_norm": 19.863590240478516, "learning_rate": 4.661820926052807e-06, "loss": 0.3899, "step": 78150 }, { "epoch": 0.77, "grad_norm": 10.279121398925781, "learning_rate": 4.6616968035985585e-06, "loss": 0.225, "step": 78175 }, { "epoch": 0.77, "grad_norm": 15.81837272644043, "learning_rate": 4.6615726811443106e-06, "loss": 0.4064, "step": 78200 }, { "epoch": 0.77, "grad_norm": 8.234503746032715, "learning_rate": 4.661448558690062e-06, "loss": 0.2557, "step": 78225 }, { "epoch": 0.77, "grad_norm": 16.021997451782227, "learning_rate": 4.661324436235814e-06, "loss": 0.4029, "step": 78250 }, { "epoch": 0.77, "grad_norm": 3.9491381645202637, "learning_rate": 4.661200313781565e-06, "loss": 0.1952, "step": 78275 }, { "epoch": 0.77, "grad_norm": 8.206734657287598, "learning_rate": 4.661076191327316e-06, "loss": 0.4179, "step": 78300 }, { "epoch": 0.77, "grad_norm": 12.63196849822998, "learning_rate": 4.660952068873068e-06, "loss": 0.2345, "step": 78325 }, { "epoch": 0.77, "grad_norm": 18.07394790649414, "learning_rate": 4.6608279464188195e-06, "loss": 0.4549, "step": 78350 }, { "epoch": 0.77, "grad_norm": 8.905708312988281, "learning_rate": 4.660703823964571e-06, "loss": 0.2174, "step": 78375 }, { "epoch": 0.77, "grad_norm": 22.618440628051758, "learning_rate": 4.660579701510322e-06, "loss": 0.2994, "step": 78400 }, { "epoch": 0.77, "grad_norm": 1.280554175376892, "learning_rate": 4.660455579056074e-06, "loss": 0.2003, "step": 78425 }, { "epoch": 0.77, "grad_norm": 15.915440559387207, "learning_rate": 4.660331456601825e-06, "loss": 0.4286, "step": 78450 }, { "epoch": 0.77, "grad_norm": 12.179112434387207, "learning_rate": 4.6602073341475764e-06, "loss": 0.2434, "step": 78475 }, { "epoch": 0.77, "grad_norm": 11.6765775680542, "learning_rate": 4.6600832116933285e-06, "loss": 0.3987, "step": 78500 }, { "epoch": 0.77, "grad_norm": 9.120492935180664, "learning_rate": 4.65995908923908e-06, "loss": 0.1837, "step": 78525 }, { "epoch": 0.77, "grad_norm": 27.42160987854004, "learning_rate": 4.659834966784832e-06, "loss": 0.3937, "step": 78550 }, { "epoch": 0.77, "grad_norm": 14.037278175354004, "learning_rate": 4.659710844330583e-06, "loss": 0.2717, "step": 78575 }, { "epoch": 0.77, "grad_norm": 17.58612060546875, "learning_rate": 4.659586721876335e-06, "loss": 0.4243, "step": 78600 }, { "epoch": 0.77, "grad_norm": 6.200094223022461, "learning_rate": 4.659462599422086e-06, "loss": 0.2287, "step": 78625 }, { "epoch": 0.77, "grad_norm": 23.34228515625, "learning_rate": 4.659338476967838e-06, "loss": 0.392, "step": 78650 }, { "epoch": 0.77, "grad_norm": 12.032188415527344, "learning_rate": 4.6592143545135895e-06, "loss": 0.2162, "step": 78675 }, { "epoch": 0.77, "grad_norm": 9.777369499206543, "learning_rate": 4.659090232059341e-06, "loss": 0.33, "step": 78700 }, { "epoch": 0.77, "grad_norm": 11.410871505737305, "learning_rate": 4.658966109605093e-06, "loss": 0.2113, "step": 78725 }, { "epoch": 0.77, "grad_norm": 8.72661018371582, "learning_rate": 4.658841987150844e-06, "loss": 0.4102, "step": 78750 }, { "epoch": 0.77, "grad_norm": 10.68313980102539, "learning_rate": 4.658717864696595e-06, "loss": 0.2567, "step": 78775 }, { "epoch": 0.77, "grad_norm": 14.581280708312988, "learning_rate": 4.658593742242347e-06, "loss": 0.4555, "step": 78800 }, { "epoch": 0.78, "grad_norm": 9.382633209228516, "learning_rate": 4.6584696197880985e-06, "loss": 0.2342, "step": 78825 }, { "epoch": 0.78, "grad_norm": 13.024113655090332, "learning_rate": 4.65834549733385e-06, "loss": 0.3587, "step": 78850 }, { "epoch": 0.78, "grad_norm": 8.656449317932129, "learning_rate": 4.658221374879602e-06, "loss": 0.1985, "step": 78875 }, { "epoch": 0.78, "grad_norm": 18.22564125061035, "learning_rate": 4.658097252425353e-06, "loss": 0.4111, "step": 78900 }, { "epoch": 0.78, "grad_norm": 31.481922149658203, "learning_rate": 4.657973129971104e-06, "loss": 0.2255, "step": 78925 }, { "epoch": 0.78, "grad_norm": 22.685199737548828, "learning_rate": 4.657849007516856e-06, "loss": 0.349, "step": 78950 }, { "epoch": 0.78, "grad_norm": 14.600767135620117, "learning_rate": 4.6577248850626074e-06, "loss": 0.2417, "step": 78975 }, { "epoch": 0.78, "grad_norm": 7.786931037902832, "learning_rate": 4.6576007626083595e-06, "loss": 0.3917, "step": 79000 }, { "epoch": 0.78, "grad_norm": 8.518046379089355, "learning_rate": 4.657476640154111e-06, "loss": 0.2128, "step": 79025 }, { "epoch": 0.78, "grad_norm": 21.065107345581055, "learning_rate": 4.657352517699863e-06, "loss": 0.365, "step": 79050 }, { "epoch": 0.78, "grad_norm": 8.543041229248047, "learning_rate": 4.657228395245614e-06, "loss": 0.1811, "step": 79075 }, { "epoch": 0.78, "grad_norm": 10.622678756713867, "learning_rate": 4.657104272791366e-06, "loss": 0.4308, "step": 79100 }, { "epoch": 0.78, "grad_norm": 12.692373275756836, "learning_rate": 4.656980150337117e-06, "loss": 0.265, "step": 79125 }, { "epoch": 0.78, "grad_norm": 17.857988357543945, "learning_rate": 4.6568560278828685e-06, "loss": 0.3552, "step": 79150 }, { "epoch": 0.78, "grad_norm": 8.243208885192871, "learning_rate": 4.6567319054286205e-06, "loss": 0.2327, "step": 79175 }, { "epoch": 0.78, "grad_norm": 6.754174709320068, "learning_rate": 4.656607782974372e-06, "loss": 0.3943, "step": 79200 }, { "epoch": 0.78, "grad_norm": 9.345386505126953, "learning_rate": 4.656483660520123e-06, "loss": 0.2352, "step": 79225 }, { "epoch": 0.78, "grad_norm": 28.99691390991211, "learning_rate": 4.656359538065874e-06, "loss": 0.4333, "step": 79250 }, { "epoch": 0.78, "grad_norm": 7.385318756103516, "learning_rate": 4.656235415611626e-06, "loss": 0.1919, "step": 79275 }, { "epoch": 0.78, "grad_norm": 17.149431228637695, "learning_rate": 4.6561112931573774e-06, "loss": 0.3785, "step": 79300 }, { "epoch": 0.78, "grad_norm": 10.526477813720703, "learning_rate": 4.655987170703129e-06, "loss": 0.2439, "step": 79325 }, { "epoch": 0.78, "grad_norm": 19.131277084350586, "learning_rate": 4.655863048248881e-06, "loss": 0.3743, "step": 79350 }, { "epoch": 0.78, "grad_norm": 4.808711051940918, "learning_rate": 4.655738925794632e-06, "loss": 0.2061, "step": 79375 }, { "epoch": 0.78, "grad_norm": 12.423016548156738, "learning_rate": 4.655614803340384e-06, "loss": 0.4218, "step": 79400 }, { "epoch": 0.78, "grad_norm": 8.08968448638916, "learning_rate": 4.655490680886135e-06, "loss": 0.2239, "step": 79425 }, { "epoch": 0.78, "grad_norm": 17.92495346069336, "learning_rate": 4.655366558431887e-06, "loss": 0.3565, "step": 79450 }, { "epoch": 0.78, "grad_norm": 9.598228454589844, "learning_rate": 4.6552424359776384e-06, "loss": 0.2377, "step": 79475 }, { "epoch": 0.78, "grad_norm": 16.1563777923584, "learning_rate": 4.65512327842156e-06, "loss": 0.4513, "step": 79500 }, { "epoch": 0.78, "grad_norm": 13.649802207946777, "learning_rate": 4.6549991559673115e-06, "loss": 0.2344, "step": 79525 }, { "epoch": 0.78, "grad_norm": 10.382918357849121, "learning_rate": 4.654875033513064e-06, "loss": 0.3518, "step": 79550 }, { "epoch": 0.78, "grad_norm": 10.973952293395996, "learning_rate": 4.654750911058815e-06, "loss": 0.2506, "step": 79575 }, { "epoch": 0.78, "grad_norm": 8.510842323303223, "learning_rate": 4.654626788604566e-06, "loss": 0.3991, "step": 79600 }, { "epoch": 0.78, "grad_norm": 5.040127754211426, "learning_rate": 4.654502666150318e-06, "loss": 0.2536, "step": 79625 }, { "epoch": 0.78, "grad_norm": 16.150501251220703, "learning_rate": 4.654378543696069e-06, "loss": 0.3309, "step": 79650 }, { "epoch": 0.78, "grad_norm": 12.208230018615723, "learning_rate": 4.6542544212418205e-06, "loss": 0.2137, "step": 79675 }, { "epoch": 0.78, "grad_norm": 7.620943546295166, "learning_rate": 4.6541302987875726e-06, "loss": 0.3948, "step": 79700 }, { "epoch": 0.78, "grad_norm": 8.815511703491211, "learning_rate": 4.654006176333324e-06, "loss": 0.2591, "step": 79725 }, { "epoch": 0.78, "grad_norm": 11.682554244995117, "learning_rate": 4.653882053879075e-06, "loss": 0.3352, "step": 79750 }, { "epoch": 0.78, "grad_norm": 6.791160583496094, "learning_rate": 4.653757931424827e-06, "loss": 0.2214, "step": 79775 }, { "epoch": 0.78, "grad_norm": 13.565519332885742, "learning_rate": 4.653633808970578e-06, "loss": 0.3286, "step": 79800 }, { "epoch": 0.78, "grad_norm": 8.649249076843262, "learning_rate": 4.6535096865163295e-06, "loss": 0.1812, "step": 79825 }, { "epoch": 0.79, "grad_norm": 16.59381103515625, "learning_rate": 4.6533855640620815e-06, "loss": 0.3506, "step": 79850 }, { "epoch": 0.79, "grad_norm": 8.605033874511719, "learning_rate": 4.653261441607833e-06, "loss": 0.2398, "step": 79875 }, { "epoch": 0.79, "grad_norm": 12.983023643493652, "learning_rate": 4.653137319153585e-06, "loss": 0.4139, "step": 79900 }, { "epoch": 0.79, "grad_norm": 17.312362670898438, "learning_rate": 4.653013196699336e-06, "loss": 0.2831, "step": 79925 }, { "epoch": 0.79, "grad_norm": 16.45040512084961, "learning_rate": 4.652889074245088e-06, "loss": 0.3671, "step": 79950 }, { "epoch": 0.79, "grad_norm": 7.79976749420166, "learning_rate": 4.652764951790839e-06, "loss": 0.2154, "step": 79975 }, { "epoch": 0.79, "grad_norm": 17.195541381835938, "learning_rate": 4.6526408293365905e-06, "loss": 0.37, "step": 80000 }, { "epoch": 0.79, "eval_loss": 0.4752821624279022, "eval_runtime": 6055.2343, "eval_samples_per_second": 1.563, "eval_steps_per_second": 0.196, "eval_wer": 0.14391939741196164, "step": 80000 }, { "epoch": 0.79, "grad_norm": 16.284563064575195, "learning_rate": 4.6525167068823425e-06, "loss": 0.2072, "step": 80025 }, { "epoch": 0.79, "grad_norm": 13.42373275756836, "learning_rate": 4.652392584428094e-06, "loss": 0.3576, "step": 80050 }, { "epoch": 0.79, "grad_norm": 8.335911750793457, "learning_rate": 4.652268461973845e-06, "loss": 0.2624, "step": 80075 }, { "epoch": 0.79, "grad_norm": 21.19757652282715, "learning_rate": 4.652144339519597e-06, "loss": 0.3957, "step": 80100 }, { "epoch": 0.79, "grad_norm": 1.3273006677627563, "learning_rate": 4.652020217065348e-06, "loss": 0.226, "step": 80125 }, { "epoch": 0.79, "grad_norm": 16.103363037109375, "learning_rate": 4.6518960946110994e-06, "loss": 0.3538, "step": 80150 }, { "epoch": 0.79, "grad_norm": 13.24730110168457, "learning_rate": 4.6517719721568515e-06, "loss": 0.2155, "step": 80175 }, { "epoch": 0.79, "grad_norm": 13.516129493713379, "learning_rate": 4.651647849702603e-06, "loss": 0.3988, "step": 80200 }, { "epoch": 0.79, "grad_norm": 10.459228515625, "learning_rate": 4.651523727248354e-06, "loss": 0.2163, "step": 80225 }, { "epoch": 0.79, "grad_norm": 17.384851455688477, "learning_rate": 4.651399604794106e-06, "loss": 0.3928, "step": 80250 }, { "epoch": 0.79, "grad_norm": 8.996210098266602, "learning_rate": 4.651275482339857e-06, "loss": 0.211, "step": 80275 }, { "epoch": 0.79, "grad_norm": 20.13312339782715, "learning_rate": 4.651151359885609e-06, "loss": 0.4272, "step": 80300 }, { "epoch": 0.79, "grad_norm": 11.77880859375, "learning_rate": 4.6510272374313605e-06, "loss": 0.2152, "step": 80325 }, { "epoch": 0.79, "grad_norm": 20.383834838867188, "learning_rate": 4.6509031149771125e-06, "loss": 0.4086, "step": 80350 }, { "epoch": 0.79, "grad_norm": 9.369466781616211, "learning_rate": 4.650778992522864e-06, "loss": 0.2187, "step": 80375 }, { "epoch": 0.79, "grad_norm": 16.312341690063477, "learning_rate": 4.650654870068616e-06, "loss": 0.4112, "step": 80400 }, { "epoch": 0.79, "grad_norm": 6.203465461730957, "learning_rate": 4.650530747614367e-06, "loss": 0.1967, "step": 80425 }, { "epoch": 0.79, "grad_norm": 14.092530250549316, "learning_rate": 4.650406625160118e-06, "loss": 0.4292, "step": 80450 }, { "epoch": 0.79, "grad_norm": 9.407604217529297, "learning_rate": 4.65028250270587e-06, "loss": 0.2297, "step": 80475 }, { "epoch": 0.79, "grad_norm": 18.492345809936523, "learning_rate": 4.6501583802516215e-06, "loss": 0.3837, "step": 80500 }, { "epoch": 0.79, "grad_norm": 9.354296684265137, "learning_rate": 4.650034257797373e-06, "loss": 0.2226, "step": 80525 }, { "epoch": 0.79, "grad_norm": 19.32183837890625, "learning_rate": 4.649910135343125e-06, "loss": 0.3811, "step": 80550 }, { "epoch": 0.79, "grad_norm": 6.645812034606934, "learning_rate": 4.649786012888876e-06, "loss": 0.1925, "step": 80575 }, { "epoch": 0.79, "grad_norm": 15.091409683227539, "learning_rate": 4.649661890434627e-06, "loss": 0.3821, "step": 80600 }, { "epoch": 0.79, "grad_norm": 11.207202911376953, "learning_rate": 4.649537767980379e-06, "loss": 0.216, "step": 80625 }, { "epoch": 0.79, "grad_norm": 19.42162322998047, "learning_rate": 4.6494136455261304e-06, "loss": 0.39, "step": 80650 }, { "epoch": 0.79, "grad_norm": 6.9813079833984375, "learning_rate": 4.6492895230718825e-06, "loss": 0.2243, "step": 80675 }, { "epoch": 0.79, "grad_norm": 18.240127563476562, "learning_rate": 4.649165400617634e-06, "loss": 0.3369, "step": 80700 }, { "epoch": 0.79, "grad_norm": 8.794167518615723, "learning_rate": 4.649041278163386e-06, "loss": 0.2176, "step": 80725 }, { "epoch": 0.79, "grad_norm": 18.764141082763672, "learning_rate": 4.648917155709137e-06, "loss": 0.4849, "step": 80750 }, { "epoch": 0.79, "grad_norm": 6.512016773223877, "learning_rate": 4.648793033254888e-06, "loss": 0.2519, "step": 80775 }, { "epoch": 0.79, "grad_norm": 31.69902801513672, "learning_rate": 4.64866891080064e-06, "loss": 0.4003, "step": 80800 }, { "epoch": 0.79, "grad_norm": 5.695474147796631, "learning_rate": 4.6485447883463915e-06, "loss": 0.2174, "step": 80825 }, { "epoch": 0.79, "grad_norm": 13.987292289733887, "learning_rate": 4.648420665892143e-06, "loss": 0.3442, "step": 80850 }, { "epoch": 0.8, "grad_norm": 15.724960327148438, "learning_rate": 4.648296543437895e-06, "loss": 0.2374, "step": 80875 }, { "epoch": 0.8, "grad_norm": 21.184961318969727, "learning_rate": 4.648172420983646e-06, "loss": 0.4004, "step": 80900 }, { "epoch": 0.8, "grad_norm": 12.989569664001465, "learning_rate": 4.648048298529397e-06, "loss": 0.2711, "step": 80925 }, { "epoch": 0.8, "grad_norm": 18.750009536743164, "learning_rate": 4.647924176075149e-06, "loss": 0.3455, "step": 80950 }, { "epoch": 0.8, "grad_norm": 9.554710388183594, "learning_rate": 4.6478000536209004e-06, "loss": 0.1852, "step": 80975 }, { "epoch": 0.8, "grad_norm": 13.958714485168457, "learning_rate": 4.647675931166652e-06, "loss": 0.412, "step": 81000 }, { "epoch": 0.8, "grad_norm": 10.925291061401367, "learning_rate": 4.647551808712404e-06, "loss": 0.2056, "step": 81025 }, { "epoch": 0.8, "grad_norm": 17.660091400146484, "learning_rate": 4.647427686258155e-06, "loss": 0.4161, "step": 81050 }, { "epoch": 0.8, "grad_norm": 10.172846794128418, "learning_rate": 4.647303563803907e-06, "loss": 0.2266, "step": 81075 }, { "epoch": 0.8, "grad_norm": 12.246139526367188, "learning_rate": 4.647179441349658e-06, "loss": 0.4074, "step": 81100 }, { "epoch": 0.8, "grad_norm": 11.076301574707031, "learning_rate": 4.64705531889541e-06, "loss": 0.2011, "step": 81125 }, { "epoch": 0.8, "grad_norm": 15.26587200164795, "learning_rate": 4.6469311964411615e-06, "loss": 0.3708, "step": 81150 }, { "epoch": 0.8, "grad_norm": 10.33590316772461, "learning_rate": 4.6468070739869135e-06, "loss": 0.2077, "step": 81175 }, { "epoch": 0.8, "grad_norm": 13.54151439666748, "learning_rate": 4.646682951532665e-06, "loss": 0.3677, "step": 81200 }, { "epoch": 0.8, "grad_norm": 15.729162216186523, "learning_rate": 4.646558829078416e-06, "loss": 0.2384, "step": 81225 }, { "epoch": 0.8, "grad_norm": 11.409503936767578, "learning_rate": 4.646434706624168e-06, "loss": 0.3611, "step": 81250 }, { "epoch": 0.8, "grad_norm": 13.115421295166016, "learning_rate": 4.646310584169919e-06, "loss": 0.2718, "step": 81275 }, { "epoch": 0.8, "grad_norm": 16.79172706604004, "learning_rate": 4.64618646171567e-06, "loss": 0.3516, "step": 81300 }, { "epoch": 0.8, "grad_norm": 8.908818244934082, "learning_rate": 4.6460623392614225e-06, "loss": 0.23, "step": 81325 }, { "epoch": 0.8, "grad_norm": 20.180696487426758, "learning_rate": 4.645938216807174e-06, "loss": 0.4585, "step": 81350 }, { "epoch": 0.8, "grad_norm": 17.05826759338379, "learning_rate": 4.645814094352925e-06, "loss": 0.232, "step": 81375 }, { "epoch": 0.8, "grad_norm": 11.354398727416992, "learning_rate": 4.645689971898677e-06, "loss": 0.3571, "step": 81400 }, { "epoch": 0.8, "grad_norm": 6.607508659362793, "learning_rate": 4.645565849444428e-06, "loss": 0.2346, "step": 81425 }, { "epoch": 0.8, "grad_norm": 11.414576530456543, "learning_rate": 4.645441726990179e-06, "loss": 0.4104, "step": 81450 }, { "epoch": 0.8, "grad_norm": 9.03747844696045, "learning_rate": 4.6453176045359314e-06, "loss": 0.1692, "step": 81475 }, { "epoch": 0.8, "grad_norm": 8.608806610107422, "learning_rate": 4.645193482081683e-06, "loss": 0.4208, "step": 81500 }, { "epoch": 0.8, "grad_norm": 14.34041976928711, "learning_rate": 4.645069359627435e-06, "loss": 0.2405, "step": 81525 }, { "epoch": 0.8, "grad_norm": 8.595706939697266, "learning_rate": 4.644945237173186e-06, "loss": 0.3522, "step": 81550 }, { "epoch": 0.8, "grad_norm": 11.066884994506836, "learning_rate": 4.644821114718938e-06, "loss": 0.2467, "step": 81575 }, { "epoch": 0.8, "grad_norm": 11.496023178100586, "learning_rate": 4.644696992264689e-06, "loss": 0.4172, "step": 81600 }, { "epoch": 0.8, "grad_norm": 7.824522018432617, "learning_rate": 4.64457286981044e-06, "loss": 0.2265, "step": 81625 }, { "epoch": 0.8, "grad_norm": 11.349656105041504, "learning_rate": 4.6444487473561925e-06, "loss": 0.4167, "step": 81650 }, { "epoch": 0.8, "grad_norm": 3.924262285232544, "learning_rate": 4.644324624901944e-06, "loss": 0.2025, "step": 81675 }, { "epoch": 0.8, "grad_norm": 12.102376937866211, "learning_rate": 4.644200502447695e-06, "loss": 0.3801, "step": 81700 }, { "epoch": 0.8, "grad_norm": 17.246910095214844, "learning_rate": 4.644076379993447e-06, "loss": 0.2328, "step": 81725 }, { "epoch": 0.8, "grad_norm": 13.04458236694336, "learning_rate": 4.643952257539198e-06, "loss": 0.377, "step": 81750 }, { "epoch": 0.8, "grad_norm": 9.603846549987793, "learning_rate": 4.643828135084949e-06, "loss": 0.2142, "step": 81775 }, { "epoch": 0.8, "grad_norm": 21.29812240600586, "learning_rate": 4.643708977528871e-06, "loss": 0.3543, "step": 81800 }, { "epoch": 0.8, "grad_norm": 7.398759365081787, "learning_rate": 4.6435848550746224e-06, "loss": 0.1909, "step": 81825 }, { "epoch": 0.8, "grad_norm": 14.032997131347656, "learning_rate": 4.6434607326203745e-06, "loss": 0.3943, "step": 81850 }, { "epoch": 0.81, "grad_norm": 12.419564247131348, "learning_rate": 4.643336610166126e-06, "loss": 0.226, "step": 81875 }, { "epoch": 0.81, "grad_norm": 16.1326961517334, "learning_rate": 4.643212487711877e-06, "loss": 0.4406, "step": 81900 }, { "epoch": 0.81, "grad_norm": 8.193105697631836, "learning_rate": 4.643088365257629e-06, "loss": 0.2169, "step": 81925 }, { "epoch": 0.81, "grad_norm": 8.686888694763184, "learning_rate": 4.64296424280338e-06, "loss": 0.4001, "step": 81950 }, { "epoch": 0.81, "grad_norm": 10.084080696105957, "learning_rate": 4.642840120349132e-06, "loss": 0.1998, "step": 81975 }, { "epoch": 0.81, "grad_norm": 19.276378631591797, "learning_rate": 4.6427159978948835e-06, "loss": 0.416, "step": 82000 }, { "epoch": 0.81, "grad_norm": 6.900076389312744, "learning_rate": 4.6425918754406355e-06, "loss": 0.1807, "step": 82025 }, { "epoch": 0.81, "grad_norm": 16.52909278869629, "learning_rate": 4.642467752986387e-06, "loss": 0.434, "step": 82050 }, { "epoch": 0.81, "grad_norm": 6.870248794555664, "learning_rate": 4.642343630532139e-06, "loss": 0.2212, "step": 82075 }, { "epoch": 0.81, "grad_norm": 15.780892372131348, "learning_rate": 4.64221950807789e-06, "loss": 0.3902, "step": 82100 }, { "epoch": 0.81, "grad_norm": 16.44130516052246, "learning_rate": 4.642095385623641e-06, "loss": 0.2511, "step": 82125 }, { "epoch": 0.81, "grad_norm": 12.856880187988281, "learning_rate": 4.6419712631693924e-06, "loss": 0.3924, "step": 82150 }, { "epoch": 0.81, "grad_norm": 5.566736221313477, "learning_rate": 4.6418471407151445e-06, "loss": 0.203, "step": 82175 }, { "epoch": 0.81, "grad_norm": 13.279658317565918, "learning_rate": 4.641723018260896e-06, "loss": 0.4093, "step": 82200 }, { "epoch": 0.81, "grad_norm": 9.946560859680176, "learning_rate": 4.641598895806647e-06, "loss": 0.2479, "step": 82225 }, { "epoch": 0.81, "grad_norm": 18.483606338500977, "learning_rate": 4.641474773352399e-06, "loss": 0.4282, "step": 82250 }, { "epoch": 0.81, "grad_norm": 8.467192649841309, "learning_rate": 4.64135065089815e-06, "loss": 0.2131, "step": 82275 }, { "epoch": 0.81, "grad_norm": 16.48850440979004, "learning_rate": 4.641226528443901e-06, "loss": 0.4199, "step": 82300 }, { "epoch": 0.81, "grad_norm": 10.90915298461914, "learning_rate": 4.6411024059896535e-06, "loss": 0.1797, "step": 82325 }, { "epoch": 0.81, "grad_norm": 20.7631778717041, "learning_rate": 4.640978283535405e-06, "loss": 0.3974, "step": 82350 }, { "epoch": 0.81, "grad_norm": 5.464991569519043, "learning_rate": 4.640854161081157e-06, "loss": 0.1845, "step": 82375 }, { "epoch": 0.81, "grad_norm": 14.889494895935059, "learning_rate": 4.640730038626908e-06, "loss": 0.4192, "step": 82400 }, { "epoch": 0.81, "grad_norm": 12.964189529418945, "learning_rate": 4.64060591617266e-06, "loss": 0.233, "step": 82425 }, { "epoch": 0.81, "grad_norm": 11.920080184936523, "learning_rate": 4.640481793718411e-06, "loss": 0.387, "step": 82450 }, { "epoch": 0.81, "grad_norm": 9.789579391479492, "learning_rate": 4.640357671264163e-06, "loss": 0.208, "step": 82475 }, { "epoch": 0.81, "grad_norm": 18.679821014404297, "learning_rate": 4.6402335488099145e-06, "loss": 0.3796, "step": 82500 }, { "epoch": 0.81, "grad_norm": 8.778326988220215, "learning_rate": 4.640109426355666e-06, "loss": 0.2442, "step": 82525 }, { "epoch": 0.81, "grad_norm": 13.816136360168457, "learning_rate": 4.639985303901418e-06, "loss": 0.3168, "step": 82550 }, { "epoch": 0.81, "grad_norm": 11.231898307800293, "learning_rate": 4.639861181447169e-06, "loss": 0.1985, "step": 82575 }, { "epoch": 0.81, "grad_norm": 10.559547424316406, "learning_rate": 4.63973705899292e-06, "loss": 0.3989, "step": 82600 }, { "epoch": 0.81, "grad_norm": 7.493391990661621, "learning_rate": 4.639612936538672e-06, "loss": 0.219, "step": 82625 }, { "epoch": 0.81, "grad_norm": 16.546701431274414, "learning_rate": 4.6394888140844234e-06, "loss": 0.3974, "step": 82650 }, { "epoch": 0.81, "grad_norm": 12.071348190307617, "learning_rate": 4.639364691630175e-06, "loss": 0.2247, "step": 82675 }, { "epoch": 0.81, "grad_norm": 11.322566986083984, "learning_rate": 4.639240569175927e-06, "loss": 0.3675, "step": 82700 }, { "epoch": 0.81, "grad_norm": 5.669017314910889, "learning_rate": 4.639116446721678e-06, "loss": 0.2334, "step": 82725 }, { "epoch": 0.81, "grad_norm": 11.320479393005371, "learning_rate": 4.638992324267429e-06, "loss": 0.3697, "step": 82750 }, { "epoch": 0.81, "grad_norm": 8.14482593536377, "learning_rate": 4.638868201813181e-06, "loss": 0.186, "step": 82775 }, { "epoch": 0.81, "grad_norm": 17.182233810424805, "learning_rate": 4.638744079358932e-06, "loss": 0.4202, "step": 82800 }, { "epoch": 0.81, "grad_norm": 5.603175163269043, "learning_rate": 4.6386199569046845e-06, "loss": 0.2087, "step": 82825 }, { "epoch": 0.81, "grad_norm": 20.63267707824707, "learning_rate": 4.638495834450436e-06, "loss": 0.4155, "step": 82850 }, { "epoch": 0.81, "grad_norm": 8.562771797180176, "learning_rate": 4.638371711996188e-06, "loss": 0.2357, "step": 82875 }, { "epoch": 0.82, "grad_norm": 14.250029563903809, "learning_rate": 4.638247589541939e-06, "loss": 0.3341, "step": 82900 }, { "epoch": 0.82, "grad_norm": 8.003043174743652, "learning_rate": 4.638123467087691e-06, "loss": 0.1959, "step": 82925 }, { "epoch": 0.82, "grad_norm": 15.047026634216309, "learning_rate": 4.637999344633442e-06, "loss": 0.371, "step": 82950 }, { "epoch": 0.82, "grad_norm": 8.101025581359863, "learning_rate": 4.637875222179193e-06, "loss": 0.2573, "step": 82975 }, { "epoch": 0.82, "grad_norm": 16.018129348754883, "learning_rate": 4.637751099724945e-06, "loss": 0.373, "step": 83000 }, { "epoch": 0.82, "grad_norm": 18.72789192199707, "learning_rate": 4.637626977270697e-06, "loss": 0.2236, "step": 83025 }, { "epoch": 0.82, "grad_norm": 13.531192779541016, "learning_rate": 4.637502854816448e-06, "loss": 0.4221, "step": 83050 }, { "epoch": 0.82, "grad_norm": 7.386510848999023, "learning_rate": 4.637378732362199e-06, "loss": 0.1973, "step": 83075 }, { "epoch": 0.82, "grad_norm": 16.314287185668945, "learning_rate": 4.637254609907951e-06, "loss": 0.3439, "step": 83100 }, { "epoch": 0.82, "grad_norm": 8.664833068847656, "learning_rate": 4.637130487453702e-06, "loss": 0.2461, "step": 83125 }, { "epoch": 0.82, "grad_norm": 21.530771255493164, "learning_rate": 4.637006364999454e-06, "loss": 0.4267, "step": 83150 }, { "epoch": 0.82, "grad_norm": 6.7997870445251465, "learning_rate": 4.636882242545206e-06, "loss": 0.246, "step": 83175 }, { "epoch": 0.82, "grad_norm": 16.119760513305664, "learning_rate": 4.636758120090957e-06, "loss": 0.425, "step": 83200 }, { "epoch": 0.82, "grad_norm": 7.580116271972656, "learning_rate": 4.636633997636709e-06, "loss": 0.1986, "step": 83225 }, { "epoch": 0.82, "grad_norm": 17.27694320678711, "learning_rate": 4.63650987518246e-06, "loss": 0.4335, "step": 83250 }, { "epoch": 0.82, "grad_norm": 6.8748250007629395, "learning_rate": 4.636385752728212e-06, "loss": 0.2425, "step": 83275 }, { "epoch": 0.82, "grad_norm": 16.238262176513672, "learning_rate": 4.636261630273963e-06, "loss": 0.38, "step": 83300 }, { "epoch": 0.82, "grad_norm": 7.896474838256836, "learning_rate": 4.6361375078197155e-06, "loss": 0.224, "step": 83325 }, { "epoch": 0.82, "grad_norm": 22.16035270690918, "learning_rate": 4.636013385365467e-06, "loss": 0.3743, "step": 83350 }, { "epoch": 0.82, "grad_norm": 9.12401294708252, "learning_rate": 4.635889262911218e-06, "loss": 0.1876, "step": 83375 }, { "epoch": 0.82, "grad_norm": 18.137788772583008, "learning_rate": 4.63576514045697e-06, "loss": 0.4138, "step": 83400 }, { "epoch": 0.82, "grad_norm": 7.298060417175293, "learning_rate": 4.635641018002721e-06, "loss": 0.2333, "step": 83425 }, { "epoch": 0.82, "grad_norm": 14.230142593383789, "learning_rate": 4.635516895548472e-06, "loss": 0.4187, "step": 83450 }, { "epoch": 0.82, "grad_norm": 7.797863006591797, "learning_rate": 4.6353927730942244e-06, "loss": 0.2135, "step": 83475 }, { "epoch": 0.82, "grad_norm": 8.509295463562012, "learning_rate": 4.635268650639976e-06, "loss": 0.3946, "step": 83500 }, { "epoch": 0.82, "grad_norm": 7.3865132331848145, "learning_rate": 4.635144528185727e-06, "loss": 0.1978, "step": 83525 }, { "epoch": 0.82, "grad_norm": 13.463114738464355, "learning_rate": 4.635020405731479e-06, "loss": 0.3905, "step": 83550 }, { "epoch": 0.82, "grad_norm": 5.528219699859619, "learning_rate": 4.63489628327723e-06, "loss": 0.165, "step": 83575 }, { "epoch": 0.82, "grad_norm": 15.420339584350586, "learning_rate": 4.634772160822982e-06, "loss": 0.3572, "step": 83600 }, { "epoch": 0.82, "grad_norm": 7.756485939025879, "learning_rate": 4.634648038368733e-06, "loss": 0.2212, "step": 83625 }, { "epoch": 0.82, "grad_norm": 23.074134826660156, "learning_rate": 4.6345239159144854e-06, "loss": 0.3395, "step": 83650 }, { "epoch": 0.82, "grad_norm": 6.82558012008667, "learning_rate": 4.634399793460237e-06, "loss": 0.2162, "step": 83675 }, { "epoch": 0.82, "grad_norm": 16.058320999145508, "learning_rate": 4.634275671005989e-06, "loss": 0.3971, "step": 83700 }, { "epoch": 0.82, "grad_norm": 10.014626502990723, "learning_rate": 4.63415154855174e-06, "loss": 0.2063, "step": 83725 }, { "epoch": 0.82, "grad_norm": 13.092534065246582, "learning_rate": 4.634027426097491e-06, "loss": 0.3851, "step": 83750 }, { "epoch": 0.82, "grad_norm": 11.216641426086426, "learning_rate": 4.633903303643243e-06, "loss": 0.2139, "step": 83775 }, { "epoch": 0.82, "grad_norm": 24.748624801635742, "learning_rate": 4.633779181188994e-06, "loss": 0.3923, "step": 83800 }, { "epoch": 0.82, "grad_norm": 18.364892959594727, "learning_rate": 4.633655058734746e-06, "loss": 0.2379, "step": 83825 }, { "epoch": 0.82, "grad_norm": 8.612194061279297, "learning_rate": 4.633530936280497e-06, "loss": 0.3505, "step": 83850 }, { "epoch": 0.82, "grad_norm": 8.029293060302734, "learning_rate": 4.633406813826249e-06, "loss": 0.2646, "step": 83875 }, { "epoch": 0.82, "grad_norm": 10.922780990600586, "learning_rate": 4.63328765627017e-06, "loss": 0.4271, "step": 83900 }, { "epoch": 0.83, "grad_norm": 9.011139869689941, "learning_rate": 4.633163533815922e-06, "loss": 0.2031, "step": 83925 }, { "epoch": 0.83, "grad_norm": 18.159992218017578, "learning_rate": 4.633039411361673e-06, "loss": 0.3916, "step": 83950 }, { "epoch": 0.83, "grad_norm": 7.612317085266113, "learning_rate": 4.632915288907424e-06, "loss": 0.2174, "step": 83975 }, { "epoch": 0.83, "grad_norm": 16.707164764404297, "learning_rate": 4.6327911664531765e-06, "loss": 0.3622, "step": 84000 }, { "epoch": 0.83, "grad_norm": 9.625896453857422, "learning_rate": 4.632667043998928e-06, "loss": 0.1686, "step": 84025 }, { "epoch": 0.83, "grad_norm": 19.020597457885742, "learning_rate": 4.63254292154468e-06, "loss": 0.4253, "step": 84050 }, { "epoch": 0.83, "grad_norm": 8.923417091369629, "learning_rate": 4.632418799090431e-06, "loss": 0.2635, "step": 84075 }, { "epoch": 0.83, "grad_norm": 20.577102661132812, "learning_rate": 4.632294676636183e-06, "loss": 0.3998, "step": 84100 }, { "epoch": 0.83, "grad_norm": 11.575098991394043, "learning_rate": 4.632170554181934e-06, "loss": 0.2234, "step": 84125 }, { "epoch": 0.83, "grad_norm": 19.7717227935791, "learning_rate": 4.632046431727686e-06, "loss": 0.3696, "step": 84150 }, { "epoch": 0.83, "grad_norm": 8.521339416503906, "learning_rate": 4.6319223092734375e-06, "loss": 0.1921, "step": 84175 }, { "epoch": 0.83, "grad_norm": 15.92007064819336, "learning_rate": 4.631798186819189e-06, "loss": 0.3769, "step": 84200 }, { "epoch": 0.83, "grad_norm": 4.072328567504883, "learning_rate": 4.631674064364941e-06, "loss": 0.1723, "step": 84225 }, { "epoch": 0.83, "grad_norm": 16.42098045349121, "learning_rate": 4.631549941910692e-06, "loss": 0.4336, "step": 84250 }, { "epoch": 0.83, "grad_norm": 7.848473072052002, "learning_rate": 4.631425819456443e-06, "loss": 0.2105, "step": 84275 }, { "epoch": 0.83, "grad_norm": 17.410367965698242, "learning_rate": 4.631301697002195e-06, "loss": 0.394, "step": 84300 }, { "epoch": 0.83, "grad_norm": 6.347203731536865, "learning_rate": 4.6311775745479464e-06, "loss": 0.2523, "step": 84325 }, { "epoch": 0.83, "grad_norm": 19.373626708984375, "learning_rate": 4.631053452093698e-06, "loss": 0.4072, "step": 84350 }, { "epoch": 0.83, "grad_norm": 8.792842864990234, "learning_rate": 4.63092932963945e-06, "loss": 0.1914, "step": 84375 }, { "epoch": 0.83, "grad_norm": 15.856327056884766, "learning_rate": 4.630805207185201e-06, "loss": 0.4124, "step": 84400 }, { "epoch": 0.83, "grad_norm": 7.97528600692749, "learning_rate": 4.630681084730952e-06, "loss": 0.1982, "step": 84425 }, { "epoch": 0.83, "grad_norm": 15.4802885055542, "learning_rate": 4.630556962276704e-06, "loss": 0.42, "step": 84450 }, { "epoch": 0.83, "grad_norm": 11.581535339355469, "learning_rate": 4.630432839822455e-06, "loss": 0.2753, "step": 84475 }, { "epoch": 0.83, "grad_norm": 17.790130615234375, "learning_rate": 4.6303087173682075e-06, "loss": 0.4289, "step": 84500 }, { "epoch": 0.83, "grad_norm": 6.582238674163818, "learning_rate": 4.630184594913959e-06, "loss": 0.2093, "step": 84525 }, { "epoch": 0.83, "grad_norm": 12.294099807739258, "learning_rate": 4.630060472459711e-06, "loss": 0.3556, "step": 84550 }, { "epoch": 0.83, "grad_norm": 5.9979376792907715, "learning_rate": 4.629936350005462e-06, "loss": 0.2495, "step": 84575 }, { "epoch": 0.83, "grad_norm": 12.137763023376465, "learning_rate": 4.629812227551213e-06, "loss": 0.3602, "step": 84600 }, { "epoch": 0.83, "grad_norm": 8.068641662597656, "learning_rate": 4.629688105096965e-06, "loss": 0.1935, "step": 84625 }, { "epoch": 0.83, "grad_norm": 17.497644424438477, "learning_rate": 4.6295639826427164e-06, "loss": 0.3744, "step": 84650 }, { "epoch": 0.83, "grad_norm": 9.289983749389648, "learning_rate": 4.629439860188468e-06, "loss": 0.2083, "step": 84675 }, { "epoch": 0.83, "grad_norm": 15.805365562438965, "learning_rate": 4.62931573773422e-06, "loss": 0.3348, "step": 84700 }, { "epoch": 0.83, "grad_norm": 11.327028274536133, "learning_rate": 4.629191615279971e-06, "loss": 0.2728, "step": 84725 }, { "epoch": 0.83, "grad_norm": 8.79069709777832, "learning_rate": 4.629067492825722e-06, "loss": 0.4095, "step": 84750 }, { "epoch": 0.83, "grad_norm": 7.2413411140441895, "learning_rate": 4.628943370371474e-06, "loss": 0.1982, "step": 84775 }, { "epoch": 0.83, "grad_norm": 17.027746200561523, "learning_rate": 4.628819247917225e-06, "loss": 0.3335, "step": 84800 }, { "epoch": 0.83, "grad_norm": 9.976123809814453, "learning_rate": 4.628695125462977e-06, "loss": 0.2744, "step": 84825 }, { "epoch": 0.83, "grad_norm": 9.354757308959961, "learning_rate": 4.628571003008729e-06, "loss": 0.3618, "step": 84850 }, { "epoch": 0.83, "grad_norm": 8.634700775146484, "learning_rate": 4.62844688055448e-06, "loss": 0.2157, "step": 84875 }, { "epoch": 0.83, "grad_norm": 12.951868057250977, "learning_rate": 4.628322758100232e-06, "loss": 0.3161, "step": 84900 }, { "epoch": 0.83, "grad_norm": 6.916961669921875, "learning_rate": 4.628198635645983e-06, "loss": 0.1985, "step": 84925 }, { "epoch": 0.84, "grad_norm": 19.948198318481445, "learning_rate": 4.628074513191735e-06, "loss": 0.3518, "step": 84950 }, { "epoch": 0.84, "grad_norm": 6.986914157867432, "learning_rate": 4.627950390737486e-06, "loss": 0.2285, "step": 84975 }, { "epoch": 0.84, "grad_norm": 13.546826362609863, "learning_rate": 4.6278262682832385e-06, "loss": 0.3957, "step": 85000 }, { "epoch": 0.84, "grad_norm": 8.458174705505371, "learning_rate": 4.62770214582899e-06, "loss": 0.237, "step": 85025 }, { "epoch": 0.84, "grad_norm": 13.408402442932129, "learning_rate": 4.627578023374741e-06, "loss": 0.3653, "step": 85050 }, { "epoch": 0.84, "grad_norm": 9.952954292297363, "learning_rate": 4.627453900920493e-06, "loss": 0.2408, "step": 85075 }, { "epoch": 0.84, "grad_norm": 11.988868713378906, "learning_rate": 4.627329778466244e-06, "loss": 0.3807, "step": 85100 }, { "epoch": 0.84, "grad_norm": 10.57896614074707, "learning_rate": 4.627205656011995e-06, "loss": 0.2188, "step": 85125 }, { "epoch": 0.84, "grad_norm": 15.0853910446167, "learning_rate": 4.6270815335577474e-06, "loss": 0.3665, "step": 85150 }, { "epoch": 0.84, "grad_norm": 15.3672456741333, "learning_rate": 4.626957411103499e-06, "loss": 0.2425, "step": 85175 }, { "epoch": 0.84, "grad_norm": 20.373023986816406, "learning_rate": 4.62683328864925e-06, "loss": 0.3742, "step": 85200 }, { "epoch": 0.84, "grad_norm": 12.262311935424805, "learning_rate": 4.626709166195002e-06, "loss": 0.2212, "step": 85225 }, { "epoch": 0.84, "grad_norm": 16.55143928527832, "learning_rate": 4.626585043740753e-06, "loss": 0.336, "step": 85250 }, { "epoch": 0.84, "grad_norm": 7.186399459838867, "learning_rate": 4.626460921286504e-06, "loss": 0.1827, "step": 85275 }, { "epoch": 0.84, "grad_norm": 14.623678207397461, "learning_rate": 4.626336798832256e-06, "loss": 0.352, "step": 85300 }, { "epoch": 0.84, "grad_norm": 7.3516716957092285, "learning_rate": 4.626212676378008e-06, "loss": 0.217, "step": 85325 }, { "epoch": 0.84, "grad_norm": 17.752796173095703, "learning_rate": 4.62608855392376e-06, "loss": 0.389, "step": 85350 }, { "epoch": 0.84, "grad_norm": 4.94877815246582, "learning_rate": 4.625964431469511e-06, "loss": 0.1874, "step": 85375 }, { "epoch": 0.84, "grad_norm": 9.284451484680176, "learning_rate": 4.625840309015263e-06, "loss": 0.344, "step": 85400 }, { "epoch": 0.84, "grad_norm": 8.913058280944824, "learning_rate": 4.625716186561014e-06, "loss": 0.2214, "step": 85425 }, { "epoch": 0.84, "grad_norm": 20.617115020751953, "learning_rate": 4.625592064106765e-06, "loss": 0.3759, "step": 85450 }, { "epoch": 0.84, "grad_norm": 14.631721496582031, "learning_rate": 4.625467941652517e-06, "loss": 0.2425, "step": 85475 }, { "epoch": 0.84, "grad_norm": 21.344539642333984, "learning_rate": 4.625343819198269e-06, "loss": 0.3513, "step": 85500 }, { "epoch": 0.84, "grad_norm": 9.11252212524414, "learning_rate": 4.62521969674402e-06, "loss": 0.1912, "step": 85525 }, { "epoch": 0.84, "grad_norm": 20.946163177490234, "learning_rate": 4.625095574289772e-06, "loss": 0.3859, "step": 85550 }, { "epoch": 0.84, "grad_norm": 10.39757251739502, "learning_rate": 4.624971451835523e-06, "loss": 0.2329, "step": 85575 }, { "epoch": 0.84, "grad_norm": 16.23317527770996, "learning_rate": 4.624847329381274e-06, "loss": 0.4726, "step": 85600 }, { "epoch": 0.84, "grad_norm": 8.05774211883545, "learning_rate": 4.624723206927026e-06, "loss": 0.2505, "step": 85625 }, { "epoch": 0.84, "grad_norm": 24.807662963867188, "learning_rate": 4.624599084472778e-06, "loss": 0.3524, "step": 85650 }, { "epoch": 0.84, "grad_norm": 3.214437961578369, "learning_rate": 4.624474962018529e-06, "loss": 0.2479, "step": 85675 }, { "epoch": 0.84, "grad_norm": 14.994596481323242, "learning_rate": 4.624350839564281e-06, "loss": 0.4048, "step": 85700 }, { "epoch": 0.84, "grad_norm": 11.603293418884277, "learning_rate": 4.624226717110032e-06, "loss": 0.1977, "step": 85725 }, { "epoch": 0.84, "grad_norm": 12.062350273132324, "learning_rate": 4.624102594655784e-06, "loss": 0.2986, "step": 85750 }, { "epoch": 0.84, "grad_norm": 10.766397476196289, "learning_rate": 4.623978472201535e-06, "loss": 0.2319, "step": 85775 }, { "epoch": 0.84, "grad_norm": 16.18118667602539, "learning_rate": 4.623854349747287e-06, "loss": 0.3697, "step": 85800 }, { "epoch": 0.84, "grad_norm": 6.9633941650390625, "learning_rate": 4.623730227293039e-06, "loss": 0.2314, "step": 85825 }, { "epoch": 0.84, "grad_norm": 23.516767501831055, "learning_rate": 4.623606104838791e-06, "loss": 0.3921, "step": 85850 }, { "epoch": 0.84, "grad_norm": 4.674759864807129, "learning_rate": 4.623481982384542e-06, "loss": 0.1921, "step": 85875 }, { "epoch": 0.84, "grad_norm": 9.698713302612305, "learning_rate": 4.623362824828463e-06, "loss": 0.3752, "step": 85900 }, { "epoch": 0.84, "grad_norm": 8.681654930114746, "learning_rate": 4.623238702374215e-06, "loss": 0.2341, "step": 85925 }, { "epoch": 0.85, "grad_norm": 22.91862678527832, "learning_rate": 4.623114579919966e-06, "loss": 0.3938, "step": 85950 }, { "epoch": 0.85, "grad_norm": 14.883262634277344, "learning_rate": 4.622990457465717e-06, "loss": 0.2297, "step": 85975 }, { "epoch": 0.85, "grad_norm": 15.375422477722168, "learning_rate": 4.6228663350114695e-06, "loss": 0.407, "step": 86000 }, { "epoch": 0.85, "grad_norm": 3.7468693256378174, "learning_rate": 4.622742212557221e-06, "loss": 0.2288, "step": 86025 }, { "epoch": 0.85, "grad_norm": 12.80898666381836, "learning_rate": 4.622618090102972e-06, "loss": 0.3326, "step": 86050 }, { "epoch": 0.85, "grad_norm": 7.370795726776123, "learning_rate": 4.622493967648724e-06, "loss": 0.2045, "step": 86075 }, { "epoch": 0.85, "grad_norm": 23.461854934692383, "learning_rate": 4.622369845194475e-06, "loss": 0.4952, "step": 86100 }, { "epoch": 0.85, "grad_norm": 8.454436302185059, "learning_rate": 4.622245722740226e-06, "loss": 0.2208, "step": 86125 }, { "epoch": 0.85, "grad_norm": 13.885692596435547, "learning_rate": 4.622121600285978e-06, "loss": 0.4155, "step": 86150 }, { "epoch": 0.85, "grad_norm": 11.326643943786621, "learning_rate": 4.62199747783173e-06, "loss": 0.2039, "step": 86175 }, { "epoch": 0.85, "grad_norm": 14.17658805847168, "learning_rate": 4.621873355377482e-06, "loss": 0.3744, "step": 86200 }, { "epoch": 0.85, "grad_norm": 5.603442668914795, "learning_rate": 4.621749232923233e-06, "loss": 0.2029, "step": 86225 }, { "epoch": 0.85, "grad_norm": 17.518932342529297, "learning_rate": 4.621625110468985e-06, "loss": 0.4562, "step": 86250 }, { "epoch": 0.85, "grad_norm": 13.263798713684082, "learning_rate": 4.621500988014736e-06, "loss": 0.1948, "step": 86275 }, { "epoch": 0.85, "grad_norm": 12.174098014831543, "learning_rate": 4.621376865560488e-06, "loss": 0.3812, "step": 86300 }, { "epoch": 0.85, "grad_norm": 8.638114929199219, "learning_rate": 4.6212527431062394e-06, "loss": 0.2219, "step": 86325 }, { "epoch": 0.85, "grad_norm": 22.65821075439453, "learning_rate": 4.621128620651991e-06, "loss": 0.4229, "step": 86350 }, { "epoch": 0.85, "grad_norm": 9.861823081970215, "learning_rate": 4.621004498197743e-06, "loss": 0.2141, "step": 86375 }, { "epoch": 0.85, "grad_norm": 18.081296920776367, "learning_rate": 4.620880375743494e-06, "loss": 0.3988, "step": 86400 }, { "epoch": 0.85, "grad_norm": 10.091575622558594, "learning_rate": 4.620756253289245e-06, "loss": 0.2236, "step": 86425 }, { "epoch": 0.85, "grad_norm": 24.19538116455078, "learning_rate": 4.620632130834997e-06, "loss": 0.3801, "step": 86450 }, { "epoch": 0.85, "grad_norm": 10.554863929748535, "learning_rate": 4.620508008380748e-06, "loss": 0.1979, "step": 86475 }, { "epoch": 0.85, "grad_norm": 22.37647819519043, "learning_rate": 4.6203838859265e-06, "loss": 0.3664, "step": 86500 }, { "epoch": 0.85, "grad_norm": 7.374233245849609, "learning_rate": 4.620259763472252e-06, "loss": 0.2159, "step": 86525 }, { "epoch": 0.85, "grad_norm": 16.605480194091797, "learning_rate": 4.620135641018003e-06, "loss": 0.391, "step": 86550 }, { "epoch": 0.85, "grad_norm": 10.186971664428711, "learning_rate": 4.620011518563755e-06, "loss": 0.2256, "step": 86575 }, { "epoch": 0.85, "grad_norm": 18.23634147644043, "learning_rate": 4.619887396109506e-06, "loss": 0.371, "step": 86600 }, { "epoch": 0.85, "grad_norm": 5.17247200012207, "learning_rate": 4.619763273655258e-06, "loss": 0.2017, "step": 86625 }, { "epoch": 0.85, "grad_norm": 14.963730812072754, "learning_rate": 4.619639151201009e-06, "loss": 0.3781, "step": 86650 }, { "epoch": 0.85, "grad_norm": 10.070900917053223, "learning_rate": 4.6195150287467615e-06, "loss": 0.2665, "step": 86675 }, { "epoch": 0.85, "grad_norm": 10.526152610778809, "learning_rate": 4.619390906292513e-06, "loss": 0.3569, "step": 86700 }, { "epoch": 0.85, "grad_norm": 13.150423049926758, "learning_rate": 4.619266783838264e-06, "loss": 0.2259, "step": 86725 }, { "epoch": 0.85, "grad_norm": 16.772178649902344, "learning_rate": 4.619142661384015e-06, "loss": 0.4281, "step": 86750 }, { "epoch": 0.85, "grad_norm": 5.234545707702637, "learning_rate": 4.619018538929767e-06, "loss": 0.2438, "step": 86775 }, { "epoch": 0.85, "grad_norm": 21.270465850830078, "learning_rate": 4.618894416475518e-06, "loss": 0.4141, "step": 86800 }, { "epoch": 0.85, "grad_norm": 7.867035388946533, "learning_rate": 4.61877029402127e-06, "loss": 0.2487, "step": 86825 }, { "epoch": 0.85, "grad_norm": 19.554636001586914, "learning_rate": 4.618646171567022e-06, "loss": 0.3489, "step": 86850 }, { "epoch": 0.85, "grad_norm": 8.646679878234863, "learning_rate": 4.618522049112773e-06, "loss": 0.2353, "step": 86875 }, { "epoch": 0.85, "grad_norm": 16.117908477783203, "learning_rate": 4.618397926658524e-06, "loss": 0.3755, "step": 86900 }, { "epoch": 0.85, "grad_norm": 6.161108493804932, "learning_rate": 4.618273804204276e-06, "loss": 0.2116, "step": 86925 }, { "epoch": 0.85, "grad_norm": 22.460308074951172, "learning_rate": 4.618149681750027e-06, "loss": 0.3871, "step": 86950 }, { "epoch": 0.86, "grad_norm": 11.249564170837402, "learning_rate": 4.618025559295779e-06, "loss": 0.2686, "step": 86975 }, { "epoch": 0.86, "grad_norm": 14.159438133239746, "learning_rate": 4.617901436841531e-06, "loss": 0.4474, "step": 87000 }, { "epoch": 0.86, "grad_norm": 4.236968517303467, "learning_rate": 4.617777314387283e-06, "loss": 0.1635, "step": 87025 }, { "epoch": 0.86, "grad_norm": 14.641486167907715, "learning_rate": 4.617653191933034e-06, "loss": 0.3299, "step": 87050 }, { "epoch": 0.86, "grad_norm": 11.318389892578125, "learning_rate": 4.617529069478786e-06, "loss": 0.2255, "step": 87075 }, { "epoch": 0.86, "grad_norm": 14.027242660522461, "learning_rate": 4.617404947024537e-06, "loss": 0.3667, "step": 87100 }, { "epoch": 0.86, "grad_norm": 7.910984039306641, "learning_rate": 4.617280824570288e-06, "loss": 0.2368, "step": 87125 }, { "epoch": 0.86, "grad_norm": 18.65199089050293, "learning_rate": 4.6171567021160404e-06, "loss": 0.3922, "step": 87150 }, { "epoch": 0.86, "grad_norm": 7.037265777587891, "learning_rate": 4.617032579661792e-06, "loss": 0.1761, "step": 87175 }, { "epoch": 0.86, "grad_norm": 20.507291793823242, "learning_rate": 4.616908457207543e-06, "loss": 0.3946, "step": 87200 }, { "epoch": 0.86, "grad_norm": 14.112531661987305, "learning_rate": 4.616784334753295e-06, "loss": 0.2352, "step": 87225 }, { "epoch": 0.86, "grad_norm": 20.099943161010742, "learning_rate": 4.616660212299046e-06, "loss": 0.4426, "step": 87250 }, { "epoch": 0.86, "grad_norm": 9.811150550842285, "learning_rate": 4.616536089844797e-06, "loss": 0.2442, "step": 87275 }, { "epoch": 0.86, "grad_norm": 22.015710830688477, "learning_rate": 4.616411967390549e-06, "loss": 0.4156, "step": 87300 }, { "epoch": 0.86, "grad_norm": 8.934776306152344, "learning_rate": 4.616287844936301e-06, "loss": 0.2195, "step": 87325 }, { "epoch": 0.86, "grad_norm": 10.931427955627441, "learning_rate": 4.616163722482052e-06, "loss": 0.4256, "step": 87350 }, { "epoch": 0.86, "grad_norm": 7.6739115715026855, "learning_rate": 4.616039600027804e-06, "loss": 0.1976, "step": 87375 }, { "epoch": 0.86, "grad_norm": 15.805526733398438, "learning_rate": 4.615915477573555e-06, "loss": 0.3961, "step": 87400 }, { "epoch": 0.86, "grad_norm": 5.284278392791748, "learning_rate": 4.615791355119307e-06, "loss": 0.1964, "step": 87425 }, { "epoch": 0.86, "grad_norm": 16.357995986938477, "learning_rate": 4.615667232665058e-06, "loss": 0.4485, "step": 87450 }, { "epoch": 0.86, "grad_norm": 10.045332908630371, "learning_rate": 4.61554311021081e-06, "loss": 0.1978, "step": 87475 }, { "epoch": 0.86, "grad_norm": 25.89838981628418, "learning_rate": 4.615418987756562e-06, "loss": 0.3009, "step": 87500 }, { "epoch": 0.86, "grad_norm": 7.097484588623047, "learning_rate": 4.615294865302314e-06, "loss": 0.2004, "step": 87525 }, { "epoch": 0.86, "grad_norm": 12.965370178222656, "learning_rate": 4.615170742848065e-06, "loss": 0.3819, "step": 87550 }, { "epoch": 0.86, "grad_norm": 11.756599426269531, "learning_rate": 4.615046620393816e-06, "loss": 0.2669, "step": 87575 }, { "epoch": 0.86, "grad_norm": 18.132831573486328, "learning_rate": 4.614922497939567e-06, "loss": 0.3752, "step": 87600 }, { "epoch": 0.86, "grad_norm": 5.37237548828125, "learning_rate": 4.614798375485319e-06, "loss": 0.19, "step": 87625 }, { "epoch": 0.86, "grad_norm": 11.500112533569336, "learning_rate": 4.614674253031071e-06, "loss": 0.4161, "step": 87650 }, { "epoch": 0.86, "grad_norm": 9.870452880859375, "learning_rate": 4.614550130576822e-06, "loss": 0.2322, "step": 87675 }, { "epoch": 0.86, "grad_norm": 10.916119575500488, "learning_rate": 4.614426008122574e-06, "loss": 0.3756, "step": 87700 }, { "epoch": 0.86, "grad_norm": 9.457164764404297, "learning_rate": 4.614301885668325e-06, "loss": 0.2017, "step": 87725 }, { "epoch": 0.86, "grad_norm": 17.05466651916504, "learning_rate": 4.614177763214076e-06, "loss": 0.3043, "step": 87750 }, { "epoch": 0.86, "grad_norm": 6.885769367218018, "learning_rate": 4.614053640759828e-06, "loss": 0.2704, "step": 87775 }, { "epoch": 0.86, "grad_norm": 19.571252822875977, "learning_rate": 4.6139295183055795e-06, "loss": 0.3679, "step": 87800 }, { "epoch": 0.86, "grad_norm": 9.24781608581543, "learning_rate": 4.613805395851332e-06, "loss": 0.2444, "step": 87825 }, { "epoch": 0.86, "grad_norm": 18.176952362060547, "learning_rate": 4.613681273397083e-06, "loss": 0.4125, "step": 87850 }, { "epoch": 0.86, "grad_norm": 7.055245399475098, "learning_rate": 4.613557150942835e-06, "loss": 0.1877, "step": 87875 }, { "epoch": 0.86, "grad_norm": 18.769256591796875, "learning_rate": 4.613433028488586e-06, "loss": 0.4262, "step": 87900 }, { "epoch": 0.86, "grad_norm": 12.931010246276855, "learning_rate": 4.613308906034338e-06, "loss": 0.2227, "step": 87925 }, { "epoch": 0.86, "grad_norm": 22.21595001220703, "learning_rate": 4.613189748478259e-06, "loss": 0.4846, "step": 87950 }, { "epoch": 0.86, "grad_norm": 4.909755229949951, "learning_rate": 4.613065626024011e-06, "loss": 0.2036, "step": 87975 }, { "epoch": 0.87, "grad_norm": 15.597441673278809, "learning_rate": 4.6129415035697624e-06, "loss": 0.3742, "step": 88000 }, { "epoch": 0.87, "grad_norm": 7.422199726104736, "learning_rate": 4.612817381115514e-06, "loss": 0.2108, "step": 88025 }, { "epoch": 0.87, "grad_norm": 18.314617156982422, "learning_rate": 4.612693258661266e-06, "loss": 0.3568, "step": 88050 }, { "epoch": 0.87, "grad_norm": 4.041662216186523, "learning_rate": 4.612569136207017e-06, "loss": 0.2225, "step": 88075 }, { "epoch": 0.87, "grad_norm": 20.8563232421875, "learning_rate": 4.612445013752768e-06, "loss": 0.4401, "step": 88100 }, { "epoch": 0.87, "grad_norm": 2.9252212047576904, "learning_rate": 4.61232089129852e-06, "loss": 0.2169, "step": 88125 }, { "epoch": 0.87, "grad_norm": 12.446148872375488, "learning_rate": 4.612196768844271e-06, "loss": 0.3706, "step": 88150 }, { "epoch": 0.87, "grad_norm": 15.91344928741455, "learning_rate": 4.612072646390023e-06, "loss": 0.2639, "step": 88175 }, { "epoch": 0.87, "grad_norm": 17.597808837890625, "learning_rate": 4.611948523935774e-06, "loss": 0.4313, "step": 88200 }, { "epoch": 0.87, "grad_norm": 7.119018077850342, "learning_rate": 4.611824401481526e-06, "loss": 0.1935, "step": 88225 }, { "epoch": 0.87, "grad_norm": 15.859703063964844, "learning_rate": 4.611700279027277e-06, "loss": 0.3639, "step": 88250 }, { "epoch": 0.87, "grad_norm": 8.419442176818848, "learning_rate": 4.611576156573029e-06, "loss": 0.2236, "step": 88275 }, { "epoch": 0.87, "grad_norm": 16.383150100708008, "learning_rate": 4.61145203411878e-06, "loss": 0.4282, "step": 88300 }, { "epoch": 0.87, "grad_norm": 9.417588233947754, "learning_rate": 4.6113279116645324e-06, "loss": 0.1993, "step": 88325 }, { "epoch": 0.87, "grad_norm": 7.099625110626221, "learning_rate": 4.611203789210284e-06, "loss": 0.3436, "step": 88350 }, { "epoch": 0.87, "grad_norm": 8.11490535736084, "learning_rate": 4.611079666756036e-06, "loss": 0.2073, "step": 88375 }, { "epoch": 0.87, "grad_norm": 12.104679107666016, "learning_rate": 4.610955544301787e-06, "loss": 0.3531, "step": 88400 }, { "epoch": 0.87, "grad_norm": 10.612656593322754, "learning_rate": 4.610831421847538e-06, "loss": 0.1962, "step": 88425 }, { "epoch": 0.87, "grad_norm": 20.09461212158203, "learning_rate": 4.61070729939329e-06, "loss": 0.393, "step": 88450 }, { "epoch": 0.87, "grad_norm": 10.503173828125, "learning_rate": 4.610583176939041e-06, "loss": 0.2057, "step": 88475 }, { "epoch": 0.87, "grad_norm": 19.857576370239258, "learning_rate": 4.610459054484793e-06, "loss": 0.3725, "step": 88500 }, { "epoch": 0.87, "grad_norm": 12.427562713623047, "learning_rate": 4.610334932030545e-06, "loss": 0.2119, "step": 88525 }, { "epoch": 0.87, "grad_norm": 18.766124725341797, "learning_rate": 4.610210809576296e-06, "loss": 0.3373, "step": 88550 }, { "epoch": 0.87, "grad_norm": 10.209671974182129, "learning_rate": 4.610086687122047e-06, "loss": 0.2296, "step": 88575 }, { "epoch": 0.87, "grad_norm": 22.87015724182129, "learning_rate": 4.609962564667799e-06, "loss": 0.465, "step": 88600 }, { "epoch": 0.87, "grad_norm": 9.25946044921875, "learning_rate": 4.60983844221355e-06, "loss": 0.2025, "step": 88625 }, { "epoch": 0.87, "grad_norm": 15.772133827209473, "learning_rate": 4.6097143197593016e-06, "loss": 0.3346, "step": 88650 }, { "epoch": 0.87, "grad_norm": 5.653573989868164, "learning_rate": 4.609590197305054e-06, "loss": 0.2312, "step": 88675 }, { "epoch": 0.87, "grad_norm": 7.648819923400879, "learning_rate": 4.609466074850805e-06, "loss": 0.3349, "step": 88700 }, { "epoch": 0.87, "grad_norm": 6.221137046813965, "learning_rate": 4.609341952396557e-06, "loss": 0.2259, "step": 88725 }, { "epoch": 0.87, "grad_norm": 17.068613052368164, "learning_rate": 4.609217829942308e-06, "loss": 0.368, "step": 88750 }, { "epoch": 0.87, "grad_norm": 9.891647338867188, "learning_rate": 4.60909370748806e-06, "loss": 0.246, "step": 88775 }, { "epoch": 0.87, "grad_norm": 20.80292510986328, "learning_rate": 4.608969585033811e-06, "loss": 0.3859, "step": 88800 }, { "epoch": 0.87, "grad_norm": 8.70503044128418, "learning_rate": 4.6088454625795634e-06, "loss": 0.2296, "step": 88825 }, { "epoch": 0.87, "grad_norm": 13.700130462646484, "learning_rate": 4.608721340125315e-06, "loss": 0.3571, "step": 88850 }, { "epoch": 0.87, "grad_norm": 8.860248565673828, "learning_rate": 4.608597217671066e-06, "loss": 0.21, "step": 88875 }, { "epoch": 0.87, "grad_norm": 21.394577026367188, "learning_rate": 4.608473095216818e-06, "loss": 0.3755, "step": 88900 }, { "epoch": 0.87, "grad_norm": 9.104937553405762, "learning_rate": 4.608348972762569e-06, "loss": 0.233, "step": 88925 }, { "epoch": 0.87, "grad_norm": 14.995684623718262, "learning_rate": 4.60822485030832e-06, "loss": 0.3669, "step": 88950 }, { "epoch": 0.87, "grad_norm": 6.07371711730957, "learning_rate": 4.608100727854072e-06, "loss": 0.2727, "step": 88975 }, { "epoch": 0.88, "grad_norm": 18.35125160217285, "learning_rate": 4.607976605399824e-06, "loss": 0.4064, "step": 89000 }, { "epoch": 0.88, "grad_norm": 13.600873947143555, "learning_rate": 4.607852482945575e-06, "loss": 0.1933, "step": 89025 }, { "epoch": 0.88, "grad_norm": 21.872413635253906, "learning_rate": 4.607728360491326e-06, "loss": 0.3464, "step": 89050 }, { "epoch": 0.88, "grad_norm": 17.6520938873291, "learning_rate": 4.607604238037078e-06, "loss": 0.2251, "step": 89075 }, { "epoch": 0.88, "grad_norm": 18.871713638305664, "learning_rate": 4.607480115582829e-06, "loss": 0.3881, "step": 89100 }, { "epoch": 0.88, "grad_norm": 11.617207527160645, "learning_rate": 4.607355993128581e-06, "loss": 0.2018, "step": 89125 }, { "epoch": 0.88, "grad_norm": 16.324392318725586, "learning_rate": 4.6072318706743326e-06, "loss": 0.3646, "step": 89150 }, { "epoch": 0.88, "grad_norm": 7.202541351318359, "learning_rate": 4.607107748220085e-06, "loss": 0.2107, "step": 89175 }, { "epoch": 0.88, "grad_norm": 12.927664756774902, "learning_rate": 4.606983625765836e-06, "loss": 0.3581, "step": 89200 }, { "epoch": 0.88, "grad_norm": 10.8849458694458, "learning_rate": 4.606859503311588e-06, "loss": 0.2198, "step": 89225 }, { "epoch": 0.88, "grad_norm": 15.4334135055542, "learning_rate": 4.606735380857339e-06, "loss": 0.3791, "step": 89250 }, { "epoch": 0.88, "grad_norm": 10.102041244506836, "learning_rate": 4.60661125840309e-06, "loss": 0.2022, "step": 89275 }, { "epoch": 0.88, "grad_norm": 15.717251777648926, "learning_rate": 4.606487135948842e-06, "loss": 0.3572, "step": 89300 }, { "epoch": 0.88, "grad_norm": 8.752777099609375, "learning_rate": 4.606363013494594e-06, "loss": 0.2144, "step": 89325 }, { "epoch": 0.88, "grad_norm": 16.49439811706543, "learning_rate": 4.606238891040345e-06, "loss": 0.3904, "step": 89350 }, { "epoch": 0.88, "grad_norm": 14.979409217834473, "learning_rate": 4.606114768586097e-06, "loss": 0.2009, "step": 89375 }, { "epoch": 0.88, "grad_norm": 9.218070030212402, "learning_rate": 4.605990646131848e-06, "loss": 0.3721, "step": 89400 }, { "epoch": 0.88, "grad_norm": 5.860188961029053, "learning_rate": 4.605866523677599e-06, "loss": 0.1913, "step": 89425 }, { "epoch": 0.88, "grad_norm": 12.482789039611816, "learning_rate": 4.605742401223351e-06, "loss": 0.4234, "step": 89450 }, { "epoch": 0.88, "grad_norm": 6.52739953994751, "learning_rate": 4.6056182787691026e-06, "loss": 0.1948, "step": 89475 }, { "epoch": 0.88, "grad_norm": 14.525322914123535, "learning_rate": 4.605494156314855e-06, "loss": 0.3859, "step": 89500 }, { "epoch": 0.88, "grad_norm": 8.369850158691406, "learning_rate": 4.605370033860606e-06, "loss": 0.2246, "step": 89525 }, { "epoch": 0.88, "grad_norm": 10.591818809509277, "learning_rate": 4.605245911406358e-06, "loss": 0.4369, "step": 89550 }, { "epoch": 0.88, "grad_norm": 11.126191139221191, "learning_rate": 4.605121788952109e-06, "loss": 0.1958, "step": 89575 }, { "epoch": 0.88, "grad_norm": 17.505796432495117, "learning_rate": 4.604997666497861e-06, "loss": 0.3222, "step": 89600 }, { "epoch": 0.88, "grad_norm": 7.265776634216309, "learning_rate": 4.604873544043612e-06, "loss": 0.2022, "step": 89625 }, { "epoch": 0.88, "grad_norm": 20.591197967529297, "learning_rate": 4.6047494215893636e-06, "loss": 0.371, "step": 89650 }, { "epoch": 0.88, "grad_norm": 7.5207295417785645, "learning_rate": 4.604625299135116e-06, "loss": 0.2294, "step": 89675 }, { "epoch": 0.88, "grad_norm": 7.476589202880859, "learning_rate": 4.604501176680867e-06, "loss": 0.3961, "step": 89700 }, { "epoch": 0.88, "grad_norm": 9.389328956604004, "learning_rate": 4.604377054226618e-06, "loss": 0.2921, "step": 89725 }, { "epoch": 0.88, "grad_norm": 22.003372192382812, "learning_rate": 4.60425293177237e-06, "loss": 0.3418, "step": 89750 }, { "epoch": 0.88, "grad_norm": 8.490463256835938, "learning_rate": 4.604128809318121e-06, "loss": 0.2571, "step": 89775 }, { "epoch": 0.88, "grad_norm": 12.444743156433105, "learning_rate": 4.6040046868638725e-06, "loss": 0.3672, "step": 89800 }, { "epoch": 0.88, "grad_norm": 10.544661521911621, "learning_rate": 4.603880564409625e-06, "loss": 0.2704, "step": 89825 }, { "epoch": 0.88, "grad_norm": 16.88554573059082, "learning_rate": 4.603756441955376e-06, "loss": 0.4467, "step": 89850 }, { "epoch": 0.88, "grad_norm": 14.54924201965332, "learning_rate": 4.603632319501127e-06, "loss": 0.2288, "step": 89875 }, { "epoch": 0.88, "grad_norm": 15.898250579833984, "learning_rate": 4.603508197046879e-06, "loss": 0.4588, "step": 89900 }, { "epoch": 0.88, "grad_norm": 9.390778541564941, "learning_rate": 4.60338407459263e-06, "loss": 0.223, "step": 89925 }, { "epoch": 0.88, "grad_norm": 18.150415420532227, "learning_rate": 4.603264917036552e-06, "loss": 0.3757, "step": 89950 }, { "epoch": 0.88, "grad_norm": 12.366211891174316, "learning_rate": 4.603140794582303e-06, "loss": 0.1962, "step": 89975 }, { "epoch": 0.88, "grad_norm": 17.651098251342773, "learning_rate": 4.6030166721280554e-06, "loss": 0.3544, "step": 90000 }, { "epoch": 0.89, "grad_norm": 8.96045207977295, "learning_rate": 4.602892549673807e-06, "loss": 0.1991, "step": 90025 }, { "epoch": 0.89, "grad_norm": 18.273496627807617, "learning_rate": 4.602768427219559e-06, "loss": 0.4057, "step": 90050 }, { "epoch": 0.89, "grad_norm": 8.788583755493164, "learning_rate": 4.60264430476531e-06, "loss": 0.2565, "step": 90075 }, { "epoch": 0.89, "grad_norm": 18.443565368652344, "learning_rate": 4.602520182311061e-06, "loss": 0.4205, "step": 90100 }, { "epoch": 0.89, "grad_norm": 10.610555648803711, "learning_rate": 4.602396059856813e-06, "loss": 0.2174, "step": 90125 }, { "epoch": 0.89, "grad_norm": 15.933602333068848, "learning_rate": 4.602271937402564e-06, "loss": 0.3769, "step": 90150 }, { "epoch": 0.89, "grad_norm": 13.607522964477539, "learning_rate": 4.602147814948316e-06, "loss": 0.204, "step": 90175 }, { "epoch": 0.89, "grad_norm": 15.622742652893066, "learning_rate": 4.602023692494068e-06, "loss": 0.4105, "step": 90200 }, { "epoch": 0.89, "grad_norm": 9.115107536315918, "learning_rate": 4.601899570039819e-06, "loss": 0.2263, "step": 90225 }, { "epoch": 0.89, "grad_norm": 16.70516014099121, "learning_rate": 4.60177544758557e-06, "loss": 0.3885, "step": 90250 }, { "epoch": 0.89, "grad_norm": 10.254165649414062, "learning_rate": 4.601651325131322e-06, "loss": 0.1994, "step": 90275 }, { "epoch": 0.89, "grad_norm": 18.4335994720459, "learning_rate": 4.601527202677073e-06, "loss": 0.4647, "step": 90300 }, { "epoch": 0.89, "grad_norm": 6.861987590789795, "learning_rate": 4.6014030802228246e-06, "loss": 0.2276, "step": 90325 }, { "epoch": 0.89, "grad_norm": 20.437625885009766, "learning_rate": 4.601278957768577e-06, "loss": 0.3469, "step": 90350 }, { "epoch": 0.89, "grad_norm": 10.55308723449707, "learning_rate": 4.601154835314328e-06, "loss": 0.2097, "step": 90375 }, { "epoch": 0.89, "grad_norm": 15.900065422058105, "learning_rate": 4.60103071286008e-06, "loss": 0.4008, "step": 90400 }, { "epoch": 0.89, "grad_norm": 10.83318042755127, "learning_rate": 4.600906590405831e-06, "loss": 0.2392, "step": 90425 }, { "epoch": 0.89, "grad_norm": 17.536972045898438, "learning_rate": 4.600782467951583e-06, "loss": 0.415, "step": 90450 }, { "epoch": 0.89, "grad_norm": 6.005250453948975, "learning_rate": 4.600658345497334e-06, "loss": 0.2203, "step": 90475 }, { "epoch": 0.89, "grad_norm": 18.665685653686523, "learning_rate": 4.600534223043086e-06, "loss": 0.3863, "step": 90500 }, { "epoch": 0.89, "grad_norm": 9.841679573059082, "learning_rate": 4.600410100588838e-06, "loss": 0.2421, "step": 90525 }, { "epoch": 0.89, "grad_norm": 23.21699333190918, "learning_rate": 4.600285978134589e-06, "loss": 0.3654, "step": 90550 }, { "epoch": 0.89, "grad_norm": 5.25843620300293, "learning_rate": 4.60016185568034e-06, "loss": 0.2234, "step": 90575 }, { "epoch": 0.89, "grad_norm": 17.376041412353516, "learning_rate": 4.600037733226092e-06, "loss": 0.3734, "step": 90600 }, { "epoch": 0.89, "grad_norm": 15.243937492370605, "learning_rate": 4.599913610771843e-06, "loss": 0.2246, "step": 90625 }, { "epoch": 0.89, "grad_norm": 14.955799102783203, "learning_rate": 4.5997894883175946e-06, "loss": 0.3857, "step": 90650 }, { "epoch": 0.89, "grad_norm": 9.830228805541992, "learning_rate": 4.599665365863347e-06, "loss": 0.2191, "step": 90675 }, { "epoch": 0.89, "grad_norm": 15.399231910705566, "learning_rate": 4.599541243409098e-06, "loss": 0.333, "step": 90700 }, { "epoch": 0.89, "grad_norm": 9.513561248779297, "learning_rate": 4.599417120954849e-06, "loss": 0.1988, "step": 90725 }, { "epoch": 0.89, "grad_norm": 22.958059310913086, "learning_rate": 4.599292998500601e-06, "loss": 0.3746, "step": 90750 }, { "epoch": 0.89, "grad_norm": 15.170574188232422, "learning_rate": 4.599168876046352e-06, "loss": 0.2242, "step": 90775 }, { "epoch": 0.89, "grad_norm": 16.608623504638672, "learning_rate": 4.599044753592104e-06, "loss": 0.4157, "step": 90800 }, { "epoch": 0.89, "grad_norm": 9.33162784576416, "learning_rate": 4.5989206311378556e-06, "loss": 0.1867, "step": 90825 }, { "epoch": 0.89, "grad_norm": 12.820167541503906, "learning_rate": 4.598796508683608e-06, "loss": 0.3722, "step": 90850 }, { "epoch": 0.89, "grad_norm": 11.071332931518555, "learning_rate": 4.598672386229359e-06, "loss": 0.213, "step": 90875 }, { "epoch": 0.89, "grad_norm": 13.641215324401855, "learning_rate": 4.598548263775111e-06, "loss": 0.4115, "step": 90900 }, { "epoch": 0.89, "grad_norm": 7.880025386810303, "learning_rate": 4.598424141320862e-06, "loss": 0.2084, "step": 90925 }, { "epoch": 0.89, "grad_norm": 21.48643684387207, "learning_rate": 4.598300018866613e-06, "loss": 0.4348, "step": 90950 }, { "epoch": 0.89, "grad_norm": 8.338692665100098, "learning_rate": 4.598175896412365e-06, "loss": 0.2699, "step": 90975 }, { "epoch": 0.89, "grad_norm": 15.37253189086914, "learning_rate": 4.598051773958117e-06, "loss": 0.3814, "step": 91000 }, { "epoch": 0.89, "grad_norm": 17.05373191833496, "learning_rate": 4.597927651503868e-06, "loss": 0.2057, "step": 91025 }, { "epoch": 0.9, "grad_norm": 20.04903221130371, "learning_rate": 4.59780352904962e-06, "loss": 0.3944, "step": 91050 }, { "epoch": 0.9, "grad_norm": 12.840337753295898, "learning_rate": 4.597679406595371e-06, "loss": 0.2372, "step": 91075 }, { "epoch": 0.9, "grad_norm": 11.9146089553833, "learning_rate": 4.597555284141122e-06, "loss": 0.3498, "step": 91100 }, { "epoch": 0.9, "grad_norm": 14.27911376953125, "learning_rate": 4.597431161686874e-06, "loss": 0.2142, "step": 91125 }, { "epoch": 0.9, "grad_norm": 19.045011520385742, "learning_rate": 4.5973070392326256e-06, "loss": 0.3855, "step": 91150 }, { "epoch": 0.9, "grad_norm": 9.409493446350098, "learning_rate": 4.597182916778377e-06, "loss": 0.2193, "step": 91175 }, { "epoch": 0.9, "grad_norm": 6.657181262969971, "learning_rate": 4.597058794324129e-06, "loss": 0.3524, "step": 91200 }, { "epoch": 0.9, "grad_norm": 10.038765907287598, "learning_rate": 4.59693467186988e-06, "loss": 0.1748, "step": 91225 }, { "epoch": 0.9, "grad_norm": 18.59148406982422, "learning_rate": 4.596810549415632e-06, "loss": 0.3881, "step": 91250 }, { "epoch": 0.9, "grad_norm": 8.32211971282959, "learning_rate": 4.596686426961383e-06, "loss": 0.221, "step": 91275 }, { "epoch": 0.9, "grad_norm": 9.59870719909668, "learning_rate": 4.596562304507135e-06, "loss": 0.354, "step": 91300 }, { "epoch": 0.9, "grad_norm": 7.587982654571533, "learning_rate": 4.596438182052887e-06, "loss": 0.2168, "step": 91325 }, { "epoch": 0.9, "grad_norm": 20.825637817382812, "learning_rate": 4.596314059598638e-06, "loss": 0.3837, "step": 91350 }, { "epoch": 0.9, "grad_norm": 11.432605743408203, "learning_rate": 4.59618993714439e-06, "loss": 0.2105, "step": 91375 }, { "epoch": 0.9, "grad_norm": 16.683767318725586, "learning_rate": 4.596065814690141e-06, "loss": 0.4083, "step": 91400 }, { "epoch": 0.9, "grad_norm": 11.095721244812012, "learning_rate": 4.595941692235892e-06, "loss": 0.209, "step": 91425 }, { "epoch": 0.9, "grad_norm": 14.952213287353516, "learning_rate": 4.595817569781644e-06, "loss": 0.3854, "step": 91450 }, { "epoch": 0.9, "grad_norm": 12.812115669250488, "learning_rate": 4.5956934473273955e-06, "loss": 0.1912, "step": 91475 }, { "epoch": 0.9, "grad_norm": 23.181371688842773, "learning_rate": 4.595569324873147e-06, "loss": 0.4281, "step": 91500 }, { "epoch": 0.9, "grad_norm": 8.686019897460938, "learning_rate": 4.595445202418899e-06, "loss": 0.1985, "step": 91525 }, { "epoch": 0.9, "grad_norm": 11.846772193908691, "learning_rate": 4.59532107996465e-06, "loss": 0.3393, "step": 91550 }, { "epoch": 0.9, "grad_norm": 8.267683982849121, "learning_rate": 4.595196957510401e-06, "loss": 0.2642, "step": 91575 }, { "epoch": 0.9, "grad_norm": 15.353339195251465, "learning_rate": 4.595072835056153e-06, "loss": 0.3518, "step": 91600 }, { "epoch": 0.9, "grad_norm": 15.567602157592773, "learning_rate": 4.5949487126019045e-06, "loss": 0.2248, "step": 91625 }, { "epoch": 0.9, "grad_norm": 20.02049446105957, "learning_rate": 4.5948245901476566e-06, "loss": 0.3846, "step": 91650 }, { "epoch": 0.9, "grad_norm": 6.152466297149658, "learning_rate": 4.594700467693408e-06, "loss": 0.219, "step": 91675 }, { "epoch": 0.9, "grad_norm": 13.98032283782959, "learning_rate": 4.59457634523916e-06, "loss": 0.4135, "step": 91700 }, { "epoch": 0.9, "grad_norm": 14.381750106811523, "learning_rate": 4.594452222784911e-06, "loss": 0.1811, "step": 91725 }, { "epoch": 0.9, "grad_norm": 20.09621238708496, "learning_rate": 4.594328100330663e-06, "loss": 0.4302, "step": 91750 }, { "epoch": 0.9, "grad_norm": 12.72239875793457, "learning_rate": 4.594203977876414e-06, "loss": 0.2384, "step": 91775 }, { "epoch": 0.9, "grad_norm": 16.327640533447266, "learning_rate": 4.5940798554221655e-06, "loss": 0.4005, "step": 91800 }, { "epoch": 0.9, "grad_norm": 9.784204483032227, "learning_rate": 4.593955732967918e-06, "loss": 0.1902, "step": 91825 }, { "epoch": 0.9, "grad_norm": 11.047547340393066, "learning_rate": 4.593831610513669e-06, "loss": 0.303, "step": 91850 }, { "epoch": 0.9, "grad_norm": 12.918702125549316, "learning_rate": 4.59370748805942e-06, "loss": 0.252, "step": 91875 }, { "epoch": 0.9, "grad_norm": 13.071159362792969, "learning_rate": 4.593583365605172e-06, "loss": 0.4232, "step": 91900 }, { "epoch": 0.9, "grad_norm": 4.546433448791504, "learning_rate": 4.593459243150923e-06, "loss": 0.181, "step": 91925 }, { "epoch": 0.9, "grad_norm": 21.639373779296875, "learning_rate": 4.5933351206966745e-06, "loss": 0.4378, "step": 91950 }, { "epoch": 0.9, "grad_norm": 9.518383026123047, "learning_rate": 4.5932109982424265e-06, "loss": 0.22, "step": 91975 }, { "epoch": 0.9, "grad_norm": 19.86911964416504, "learning_rate": 4.593086875788178e-06, "loss": 0.4082, "step": 92000 }, { "epoch": 0.9, "grad_norm": 10.371731758117676, "learning_rate": 4.592962753333929e-06, "loss": 0.2117, "step": 92025 }, { "epoch": 0.91, "grad_norm": 7.7625932693481445, "learning_rate": 4.592843595777851e-06, "loss": 0.4199, "step": 92050 }, { "epoch": 0.91, "grad_norm": 11.413724899291992, "learning_rate": 4.592719473323602e-06, "loss": 0.2485, "step": 92075 }, { "epoch": 0.91, "grad_norm": 22.97353172302246, "learning_rate": 4.592595350869354e-06, "loss": 0.3521, "step": 92100 }, { "epoch": 0.91, "grad_norm": 7.26945161819458, "learning_rate": 4.592471228415105e-06, "loss": 0.1856, "step": 92125 }, { "epoch": 0.91, "grad_norm": 16.61149787902832, "learning_rate": 4.592347105960857e-06, "loss": 0.413, "step": 92150 }, { "epoch": 0.91, "grad_norm": 10.628376960754395, "learning_rate": 4.592222983506609e-06, "loss": 0.2756, "step": 92175 }, { "epoch": 0.91, "grad_norm": 12.827118873596191, "learning_rate": 4.592098861052361e-06, "loss": 0.3437, "step": 92200 }, { "epoch": 0.91, "grad_norm": 7.93777322769165, "learning_rate": 4.591974738598112e-06, "loss": 0.206, "step": 92225 }, { "epoch": 0.91, "grad_norm": 18.238014221191406, "learning_rate": 4.591850616143863e-06, "loss": 0.3716, "step": 92250 }, { "epoch": 0.91, "grad_norm": 8.789666175842285, "learning_rate": 4.591726493689615e-06, "loss": 0.202, "step": 92275 }, { "epoch": 0.91, "grad_norm": 16.642005920410156, "learning_rate": 4.591602371235366e-06, "loss": 0.4564, "step": 92300 }, { "epoch": 0.91, "grad_norm": 11.983353614807129, "learning_rate": 4.5914782487811176e-06, "loss": 0.2085, "step": 92325 }, { "epoch": 0.91, "grad_norm": 21.32388687133789, "learning_rate": 4.59135412632687e-06, "loss": 0.3372, "step": 92350 }, { "epoch": 0.91, "grad_norm": 6.768614292144775, "learning_rate": 4.591230003872621e-06, "loss": 0.1886, "step": 92375 }, { "epoch": 0.91, "grad_norm": 14.892202377319336, "learning_rate": 4.591105881418372e-06, "loss": 0.4097, "step": 92400 }, { "epoch": 0.91, "grad_norm": 10.73038101196289, "learning_rate": 4.590981758964124e-06, "loss": 0.1826, "step": 92425 }, { "epoch": 0.91, "grad_norm": 13.215642929077148, "learning_rate": 4.590857636509875e-06, "loss": 0.3237, "step": 92450 }, { "epoch": 0.91, "grad_norm": 9.918741226196289, "learning_rate": 4.590733514055627e-06, "loss": 0.2027, "step": 92475 }, { "epoch": 0.91, "grad_norm": 22.232318878173828, "learning_rate": 4.590609391601379e-06, "loss": 0.4182, "step": 92500 }, { "epoch": 0.91, "grad_norm": 12.902971267700195, "learning_rate": 4.590485269147131e-06, "loss": 0.1838, "step": 92525 }, { "epoch": 0.91, "grad_norm": 19.40598487854004, "learning_rate": 4.590361146692882e-06, "loss": 0.3745, "step": 92550 }, { "epoch": 0.91, "grad_norm": 12.583097457885742, "learning_rate": 4.590237024238634e-06, "loss": 0.2095, "step": 92575 }, { "epoch": 0.91, "grad_norm": 17.162878036499023, "learning_rate": 4.590112901784385e-06, "loss": 0.3972, "step": 92600 }, { "epoch": 0.91, "grad_norm": 14.366013526916504, "learning_rate": 4.589988779330136e-06, "loss": 0.2323, "step": 92625 }, { "epoch": 0.91, "grad_norm": 16.26837730407715, "learning_rate": 4.589864656875888e-06, "loss": 0.3995, "step": 92650 }, { "epoch": 0.91, "grad_norm": 7.6920647621154785, "learning_rate": 4.58974053442164e-06, "loss": 0.2765, "step": 92675 }, { "epoch": 0.91, "grad_norm": 16.32198715209961, "learning_rate": 4.589616411967391e-06, "loss": 0.3467, "step": 92700 }, { "epoch": 0.91, "grad_norm": 12.77200984954834, "learning_rate": 4.589492289513142e-06, "loss": 0.2202, "step": 92725 }, { "epoch": 0.91, "grad_norm": 18.968994140625, "learning_rate": 4.589368167058894e-06, "loss": 0.4201, "step": 92750 }, { "epoch": 0.91, "grad_norm": 10.026745796203613, "learning_rate": 4.589244044604645e-06, "loss": 0.2516, "step": 92775 }, { "epoch": 0.91, "grad_norm": 13.960412979125977, "learning_rate": 4.5891199221503965e-06, "loss": 0.3365, "step": 92800 }, { "epoch": 0.91, "grad_norm": 11.56924057006836, "learning_rate": 4.5889957996961486e-06, "loss": 0.215, "step": 92825 }, { "epoch": 0.91, "grad_norm": 10.739965438842773, "learning_rate": 4.5888716772419e-06, "loss": 0.3979, "step": 92850 }, { "epoch": 0.91, "grad_norm": 8.552739143371582, "learning_rate": 4.588747554787652e-06, "loss": 0.2483, "step": 92875 }, { "epoch": 0.91, "grad_norm": 13.839591026306152, "learning_rate": 4.588623432333403e-06, "loss": 0.3515, "step": 92900 }, { "epoch": 0.91, "grad_norm": 9.433384895324707, "learning_rate": 4.588499309879155e-06, "loss": 0.2178, "step": 92925 }, { "epoch": 0.91, "grad_norm": 16.342517852783203, "learning_rate": 4.588375187424906e-06, "loss": 0.4067, "step": 92950 }, { "epoch": 0.91, "grad_norm": 10.483829498291016, "learning_rate": 4.588251064970658e-06, "loss": 0.1812, "step": 92975 }, { "epoch": 0.91, "grad_norm": 12.846193313598633, "learning_rate": 4.58812694251641e-06, "loss": 0.37, "step": 93000 }, { "epoch": 0.91, "grad_norm": 11.299796104431152, "learning_rate": 4.588002820062161e-06, "loss": 0.2168, "step": 93025 }, { "epoch": 0.91, "grad_norm": 16.401615142822266, "learning_rate": 4.587878697607913e-06, "loss": 0.4178, "step": 93050 }, { "epoch": 0.92, "grad_norm": 11.247954368591309, "learning_rate": 4.587754575153664e-06, "loss": 0.2303, "step": 93075 }, { "epoch": 0.92, "grad_norm": 12.709555625915527, "learning_rate": 4.587630452699415e-06, "loss": 0.3987, "step": 93100 }, { "epoch": 0.92, "grad_norm": 10.679481506347656, "learning_rate": 4.587506330245167e-06, "loss": 0.2425, "step": 93125 }, { "epoch": 0.92, "grad_norm": 15.520750999450684, "learning_rate": 4.5873822077909185e-06, "loss": 0.3429, "step": 93150 }, { "epoch": 0.92, "grad_norm": 13.936484336853027, "learning_rate": 4.58725808533667e-06, "loss": 0.2787, "step": 93175 }, { "epoch": 0.92, "grad_norm": 12.871499061584473, "learning_rate": 4.587133962882422e-06, "loss": 0.4277, "step": 93200 }, { "epoch": 0.92, "grad_norm": 13.605853080749512, "learning_rate": 4.587009840428173e-06, "loss": 0.23, "step": 93225 }, { "epoch": 0.92, "grad_norm": 14.541341781616211, "learning_rate": 4.586885717973924e-06, "loss": 0.3453, "step": 93250 }, { "epoch": 0.92, "grad_norm": 12.44194507598877, "learning_rate": 4.586761595519676e-06, "loss": 0.212, "step": 93275 }, { "epoch": 0.92, "grad_norm": 7.653554916381836, "learning_rate": 4.5866374730654275e-06, "loss": 0.4101, "step": 93300 }, { "epoch": 0.92, "grad_norm": 9.267356872558594, "learning_rate": 4.5865133506111796e-06, "loss": 0.198, "step": 93325 }, { "epoch": 0.92, "grad_norm": 16.02283477783203, "learning_rate": 4.586389228156931e-06, "loss": 0.3313, "step": 93350 }, { "epoch": 0.92, "grad_norm": 11.892133712768555, "learning_rate": 4.586265105702683e-06, "loss": 0.2, "step": 93375 }, { "epoch": 0.92, "grad_norm": 22.050153732299805, "learning_rate": 4.586140983248434e-06, "loss": 0.4113, "step": 93400 }, { "epoch": 0.92, "grad_norm": 8.274442672729492, "learning_rate": 4.586016860794186e-06, "loss": 0.2054, "step": 93425 }, { "epoch": 0.92, "grad_norm": 10.372457504272461, "learning_rate": 4.585892738339937e-06, "loss": 0.4223, "step": 93450 }, { "epoch": 0.92, "grad_norm": 8.694490432739258, "learning_rate": 4.5857686158856885e-06, "loss": 0.189, "step": 93475 }, { "epoch": 0.92, "grad_norm": 18.912904739379883, "learning_rate": 4.585644493431441e-06, "loss": 0.3533, "step": 93500 }, { "epoch": 0.92, "grad_norm": 12.12200927734375, "learning_rate": 4.585520370977192e-06, "loss": 0.2101, "step": 93525 }, { "epoch": 0.92, "grad_norm": 24.739574432373047, "learning_rate": 4.585396248522943e-06, "loss": 0.4159, "step": 93550 }, { "epoch": 0.92, "grad_norm": 7.3028435707092285, "learning_rate": 4.585272126068694e-06, "loss": 0.1956, "step": 93575 }, { "epoch": 0.92, "grad_norm": 25.297208786010742, "learning_rate": 4.585148003614446e-06, "loss": 0.439, "step": 93600 }, { "epoch": 0.92, "grad_norm": 10.180426597595215, "learning_rate": 4.5850238811601975e-06, "loss": 0.2024, "step": 93625 }, { "epoch": 0.92, "grad_norm": 16.16980743408203, "learning_rate": 4.584899758705949e-06, "loss": 0.4066, "step": 93650 }, { "epoch": 0.92, "grad_norm": 4.183172702789307, "learning_rate": 4.584775636251701e-06, "loss": 0.1794, "step": 93675 }, { "epoch": 0.92, "grad_norm": 19.354127883911133, "learning_rate": 4.584651513797452e-06, "loss": 0.3408, "step": 93700 }, { "epoch": 0.92, "grad_norm": 11.470808029174805, "learning_rate": 4.584527391343204e-06, "loss": 0.2448, "step": 93725 }, { "epoch": 0.92, "grad_norm": 9.672252655029297, "learning_rate": 4.584403268888955e-06, "loss": 0.3757, "step": 93750 }, { "epoch": 0.92, "grad_norm": 5.774045944213867, "learning_rate": 4.584279146434707e-06, "loss": 0.1815, "step": 93775 }, { "epoch": 0.92, "grad_norm": 14.766891479492188, "learning_rate": 4.5841550239804585e-06, "loss": 0.3892, "step": 93800 }, { "epoch": 0.92, "grad_norm": 9.218329429626465, "learning_rate": 4.5840309015262106e-06, "loss": 0.2202, "step": 93825 }, { "epoch": 0.92, "grad_norm": 20.422565460205078, "learning_rate": 4.583906779071962e-06, "loss": 0.4797, "step": 93850 }, { "epoch": 0.92, "grad_norm": 11.472539901733398, "learning_rate": 4.583782656617713e-06, "loss": 0.1968, "step": 93875 }, { "epoch": 0.92, "grad_norm": 22.347536087036133, "learning_rate": 4.583658534163465e-06, "loss": 0.3591, "step": 93900 }, { "epoch": 0.92, "grad_norm": 4.905228614807129, "learning_rate": 4.583534411709216e-06, "loss": 0.1946, "step": 93925 }, { "epoch": 0.92, "grad_norm": 15.285430908203125, "learning_rate": 4.5834102892549675e-06, "loss": 0.3695, "step": 93950 }, { "epoch": 0.92, "grad_norm": 7.791321754455566, "learning_rate": 4.5832861668007195e-06, "loss": 0.2204, "step": 93975 }, { "epoch": 0.92, "grad_norm": 12.966170310974121, "learning_rate": 4.583162044346471e-06, "loss": 0.3919, "step": 94000 }, { "epoch": 0.92, "grad_norm": 7.522392272949219, "learning_rate": 4.583037921892222e-06, "loss": 0.1912, "step": 94025 }, { "epoch": 0.92, "grad_norm": Infinity, "learning_rate": 4.582918764336144e-06, "loss": 0.3867, "step": 94050 }, { "epoch": 0.92, "grad_norm": 11.471181869506836, "learning_rate": 4.582794641881895e-06, "loss": 0.1969, "step": 94075 }, { "epoch": 0.93, "grad_norm": 23.57036018371582, "learning_rate": 4.582670519427647e-06, "loss": 0.4722, "step": 94100 }, { "epoch": 0.93, "grad_norm": 14.530646324157715, "learning_rate": 4.582546396973398e-06, "loss": 0.2429, "step": 94125 }, { "epoch": 0.93, "grad_norm": 13.131502151489258, "learning_rate": 4.5824222745191495e-06, "loss": 0.3811, "step": 94150 }, { "epoch": 0.93, "grad_norm": 15.095681190490723, "learning_rate": 4.582298152064902e-06, "loss": 0.2353, "step": 94175 }, { "epoch": 0.93, "grad_norm": 17.575565338134766, "learning_rate": 4.582174029610653e-06, "loss": 0.3656, "step": 94200 }, { "epoch": 0.93, "grad_norm": 8.794591903686523, "learning_rate": 4.582049907156405e-06, "loss": 0.1962, "step": 94225 }, { "epoch": 0.93, "grad_norm": 13.375934600830078, "learning_rate": 4.581925784702156e-06, "loss": 0.3527, "step": 94250 }, { "epoch": 0.93, "grad_norm": 13.03649616241455, "learning_rate": 4.581801662247908e-06, "loss": 0.227, "step": 94275 }, { "epoch": 0.93, "grad_norm": 15.0343017578125, "learning_rate": 4.581677539793659e-06, "loss": 0.3561, "step": 94300 }, { "epoch": 0.93, "grad_norm": 14.133198738098145, "learning_rate": 4.5815534173394106e-06, "loss": 0.2275, "step": 94325 }, { "epoch": 0.93, "grad_norm": 17.626323699951172, "learning_rate": 4.581429294885163e-06, "loss": 0.3704, "step": 94350 }, { "epoch": 0.93, "grad_norm": 8.980371475219727, "learning_rate": 4.581305172430914e-06, "loss": 0.2501, "step": 94375 }, { "epoch": 0.93, "grad_norm": 14.181253433227539, "learning_rate": 4.581181049976665e-06, "loss": 0.2999, "step": 94400 }, { "epoch": 0.93, "grad_norm": 5.943210601806641, "learning_rate": 4.581056927522417e-06, "loss": 0.1906, "step": 94425 }, { "epoch": 0.93, "grad_norm": 18.809600830078125, "learning_rate": 4.580932805068168e-06, "loss": 0.3795, "step": 94450 }, { "epoch": 0.93, "grad_norm": 6.426043510437012, "learning_rate": 4.5808086826139195e-06, "loss": 0.1988, "step": 94475 }, { "epoch": 0.93, "grad_norm": 18.1617488861084, "learning_rate": 4.5806845601596716e-06, "loss": 0.392, "step": 94500 }, { "epoch": 0.93, "grad_norm": 7.441276550292969, "learning_rate": 4.580560437705423e-06, "loss": 0.2019, "step": 94525 }, { "epoch": 0.93, "grad_norm": 13.880979537963867, "learning_rate": 4.580436315251174e-06, "loss": 0.3713, "step": 94550 }, { "epoch": 0.93, "grad_norm": 5.339727878570557, "learning_rate": 4.580312192796926e-06, "loss": 0.2057, "step": 94575 }, { "epoch": 0.93, "grad_norm": 18.626705169677734, "learning_rate": 4.580188070342677e-06, "loss": 0.4639, "step": 94600 }, { "epoch": 0.93, "grad_norm": 7.991731643676758, "learning_rate": 4.580063947888429e-06, "loss": 0.229, "step": 94625 }, { "epoch": 0.93, "grad_norm": 12.70996379852295, "learning_rate": 4.5799398254341805e-06, "loss": 0.3746, "step": 94650 }, { "epoch": 0.93, "grad_norm": 6.926456928253174, "learning_rate": 4.579815702979933e-06, "loss": 0.2276, "step": 94675 }, { "epoch": 0.93, "grad_norm": 20.273372650146484, "learning_rate": 4.579691580525684e-06, "loss": 0.3768, "step": 94700 }, { "epoch": 0.93, "grad_norm": 6.582809925079346, "learning_rate": 4.579567458071436e-06, "loss": 0.2334, "step": 94725 }, { "epoch": 0.93, "grad_norm": 12.384455680847168, "learning_rate": 4.579443335617187e-06, "loss": 0.3933, "step": 94750 }, { "epoch": 0.93, "grad_norm": 9.006918907165527, "learning_rate": 4.579319213162938e-06, "loss": 0.187, "step": 94775 }, { "epoch": 0.93, "grad_norm": 8.492240905761719, "learning_rate": 4.57919509070869e-06, "loss": 0.4274, "step": 94800 }, { "epoch": 0.93, "grad_norm": 6.173733711242676, "learning_rate": 4.5790709682544416e-06, "loss": 0.2147, "step": 94825 }, { "epoch": 0.93, "grad_norm": 15.349608421325684, "learning_rate": 4.578946845800193e-06, "loss": 0.4057, "step": 94850 }, { "epoch": 0.93, "grad_norm": 5.05084753036499, "learning_rate": 4.578822723345945e-06, "loss": 0.211, "step": 94875 }, { "epoch": 0.93, "grad_norm": 6.988426685333252, "learning_rate": 4.578698600891696e-06, "loss": 0.3735, "step": 94900 }, { "epoch": 0.93, "grad_norm": 10.032491683959961, "learning_rate": 4.578574478437447e-06, "loss": 0.2068, "step": 94925 }, { "epoch": 0.93, "grad_norm": 17.199846267700195, "learning_rate": 4.578450355983199e-06, "loss": 0.4068, "step": 94950 }, { "epoch": 0.93, "grad_norm": 8.79592514038086, "learning_rate": 4.5783262335289505e-06, "loss": 0.226, "step": 94975 }, { "epoch": 0.93, "grad_norm": 20.137144088745117, "learning_rate": 4.578202111074702e-06, "loss": 0.3517, "step": 95000 }, { "epoch": 0.93, "grad_norm": 7.238312244415283, "learning_rate": 4.578077988620454e-06, "loss": 0.2123, "step": 95025 }, { "epoch": 0.93, "grad_norm": 20.04203987121582, "learning_rate": 4.577953866166205e-06, "loss": 0.4308, "step": 95050 }, { "epoch": 0.93, "grad_norm": 10.692976951599121, "learning_rate": 4.577829743711957e-06, "loss": 0.208, "step": 95075 }, { "epoch": 0.94, "grad_norm": 15.292778968811035, "learning_rate": 4.577705621257708e-06, "loss": 0.349, "step": 95100 }, { "epoch": 0.94, "grad_norm": 8.481602668762207, "learning_rate": 4.57758149880346e-06, "loss": 0.2117, "step": 95125 }, { "epoch": 0.94, "grad_norm": 17.516450881958008, "learning_rate": 4.5774573763492115e-06, "loss": 0.3984, "step": 95150 }, { "epoch": 0.94, "grad_norm": 10.032029151916504, "learning_rate": 4.577333253894963e-06, "loss": 0.1941, "step": 95175 }, { "epoch": 0.94, "grad_norm": 19.827919006347656, "learning_rate": 4.577209131440715e-06, "loss": 0.3683, "step": 95200 }, { "epoch": 0.94, "grad_norm": 10.235650062561035, "learning_rate": 4.577085008986466e-06, "loss": 0.1783, "step": 95225 }, { "epoch": 0.94, "grad_norm": 13.413064002990723, "learning_rate": 4.576960886532217e-06, "loss": 0.3817, "step": 95250 }, { "epoch": 0.94, "grad_norm": 9.887409210205078, "learning_rate": 4.576836764077969e-06, "loss": 0.221, "step": 95275 }, { "epoch": 0.94, "grad_norm": 24.309206008911133, "learning_rate": 4.5767126416237205e-06, "loss": 0.4071, "step": 95300 }, { "epoch": 0.94, "grad_norm": 5.414850234985352, "learning_rate": 4.576588519169472e-06, "loss": 0.2212, "step": 95325 }, { "epoch": 0.94, "grad_norm": 9.55440902709961, "learning_rate": 4.576464396715224e-06, "loss": 0.348, "step": 95350 }, { "epoch": 0.94, "grad_norm": 6.889686584472656, "learning_rate": 4.576340274260975e-06, "loss": 0.2251, "step": 95375 }, { "epoch": 0.94, "grad_norm": 17.914663314819336, "learning_rate": 4.576216151806727e-06, "loss": 0.3684, "step": 95400 }, { "epoch": 0.94, "grad_norm": 7.110917091369629, "learning_rate": 4.576092029352478e-06, "loss": 0.2097, "step": 95425 }, { "epoch": 0.94, "grad_norm": 23.413829803466797, "learning_rate": 4.57596790689823e-06, "loss": 0.3952, "step": 95450 }, { "epoch": 0.94, "grad_norm": 11.269353866577148, "learning_rate": 4.5758437844439815e-06, "loss": 0.2658, "step": 95475 }, { "epoch": 0.94, "grad_norm": 7.148421764373779, "learning_rate": 4.575719661989734e-06, "loss": 0.3258, "step": 95500 }, { "epoch": 0.94, "grad_norm": 11.282306671142578, "learning_rate": 4.575595539535485e-06, "loss": 0.2027, "step": 95525 }, { "epoch": 0.94, "grad_norm": 14.899809837341309, "learning_rate": 4.575471417081236e-06, "loss": 0.3745, "step": 95550 }, { "epoch": 0.94, "grad_norm": 7.3272905349731445, "learning_rate": 4.575347294626988e-06, "loss": 0.2484, "step": 95575 }, { "epoch": 0.94, "grad_norm": 21.205339431762695, "learning_rate": 4.575223172172739e-06, "loss": 0.4281, "step": 95600 }, { "epoch": 0.94, "grad_norm": 9.724048614501953, "learning_rate": 4.5750990497184905e-06, "loss": 0.2039, "step": 95625 }, { "epoch": 0.94, "grad_norm": 17.429946899414062, "learning_rate": 4.5749749272642425e-06, "loss": 0.3834, "step": 95650 }, { "epoch": 0.94, "grad_norm": 12.25001335144043, "learning_rate": 4.574850804809994e-06, "loss": 0.1999, "step": 95675 }, { "epoch": 0.94, "grad_norm": 10.895976066589355, "learning_rate": 4.574726682355745e-06, "loss": 0.3097, "step": 95700 }, { "epoch": 0.94, "grad_norm": 4.222595691680908, "learning_rate": 4.574602559901497e-06, "loss": 0.1886, "step": 95725 }, { "epoch": 0.94, "grad_norm": 20.71306037902832, "learning_rate": 4.574478437447248e-06, "loss": 0.3653, "step": 95750 }, { "epoch": 0.94, "grad_norm": 18.205307006835938, "learning_rate": 4.5743543149929994e-06, "loss": 0.2697, "step": 95775 }, { "epoch": 0.94, "grad_norm": 8.273533821105957, "learning_rate": 4.5742301925387515e-06, "loss": 0.4325, "step": 95800 }, { "epoch": 0.94, "grad_norm": 11.389487266540527, "learning_rate": 4.574106070084503e-06, "loss": 0.2232, "step": 95825 }, { "epoch": 0.94, "grad_norm": 18.939546585083008, "learning_rate": 4.573981947630255e-06, "loss": 0.4032, "step": 95850 }, { "epoch": 0.94, "grad_norm": 10.945172309875488, "learning_rate": 4.573857825176006e-06, "loss": 0.2258, "step": 95875 }, { "epoch": 0.94, "grad_norm": 26.470365524291992, "learning_rate": 4.573733702721758e-06, "loss": 0.3505, "step": 95900 }, { "epoch": 0.94, "grad_norm": 13.221000671386719, "learning_rate": 4.573609580267509e-06, "loss": 0.2002, "step": 95925 }, { "epoch": 0.94, "grad_norm": 19.863563537597656, "learning_rate": 4.5734854578132605e-06, "loss": 0.3486, "step": 95950 }, { "epoch": 0.94, "grad_norm": 14.260061264038086, "learning_rate": 4.5733613353590125e-06, "loss": 0.2645, "step": 95975 }, { "epoch": 0.94, "grad_norm": 13.778512954711914, "learning_rate": 4.573237212904764e-06, "loss": 0.3699, "step": 96000 }, { "epoch": 0.94, "grad_norm": 9.484123229980469, "learning_rate": 4.573113090450515e-06, "loss": 0.1906, "step": 96025 }, { "epoch": 0.94, "grad_norm": 18.28668785095215, "learning_rate": 4.572988967996267e-06, "loss": 0.4328, "step": 96050 }, { "epoch": 0.94, "grad_norm": 10.49832820892334, "learning_rate": 4.572864845542018e-06, "loss": 0.2344, "step": 96075 }, { "epoch": 0.94, "grad_norm": 16.445499420166016, "learning_rate": 4.5727407230877694e-06, "loss": 0.4372, "step": 96100 }, { "epoch": 0.95, "grad_norm": 8.729499816894531, "learning_rate": 4.5726166006335215e-06, "loss": 0.2378, "step": 96125 }, { "epoch": 0.95, "grad_norm": 13.658315658569336, "learning_rate": 4.572492478179273e-06, "loss": 0.3855, "step": 96150 }, { "epoch": 0.95, "grad_norm": 14.781224250793457, "learning_rate": 4.572368355725024e-06, "loss": 0.2342, "step": 96175 }, { "epoch": 0.95, "grad_norm": 18.588842391967773, "learning_rate": 4.572244233270776e-06, "loss": 0.3733, "step": 96200 }, { "epoch": 0.95, "grad_norm": 6.948475360870361, "learning_rate": 4.572120110816527e-06, "loss": 0.2107, "step": 96225 }, { "epoch": 0.95, "grad_norm": 9.573775291442871, "learning_rate": 4.571995988362279e-06, "loss": 0.3721, "step": 96250 }, { "epoch": 0.95, "grad_norm": 5.8062968254089355, "learning_rate": 4.5718718659080305e-06, "loss": 0.2168, "step": 96275 }, { "epoch": 0.95, "grad_norm": 20.418886184692383, "learning_rate": 4.571752708351952e-06, "loss": 0.3487, "step": 96300 }, { "epoch": 0.95, "grad_norm": 4.360739707946777, "learning_rate": 4.5716285858977035e-06, "loss": 0.1816, "step": 96325 }, { "epoch": 0.95, "grad_norm": 17.375362396240234, "learning_rate": 4.571504463443456e-06, "loss": 0.4149, "step": 96350 }, { "epoch": 0.95, "grad_norm": 7.543102264404297, "learning_rate": 4.571380340989207e-06, "loss": 0.2515, "step": 96375 }, { "epoch": 0.95, "grad_norm": 19.72077178955078, "learning_rate": 4.571256218534959e-06, "loss": 0.383, "step": 96400 }, { "epoch": 0.95, "grad_norm": 6.140733242034912, "learning_rate": 4.57113209608071e-06, "loss": 0.2059, "step": 96425 }, { "epoch": 0.95, "grad_norm": 18.7949161529541, "learning_rate": 4.571007973626461e-06, "loss": 0.3348, "step": 96450 }, { "epoch": 0.95, "grad_norm": 9.3495512008667, "learning_rate": 4.5708838511722125e-06, "loss": 0.2412, "step": 96475 }, { "epoch": 0.95, "grad_norm": 23.666099548339844, "learning_rate": 4.5707597287179646e-06, "loss": 0.4047, "step": 96500 }, { "epoch": 0.95, "grad_norm": 10.250608444213867, "learning_rate": 4.570635606263716e-06, "loss": 0.2167, "step": 96525 }, { "epoch": 0.95, "grad_norm": 13.716413497924805, "learning_rate": 4.570511483809467e-06, "loss": 0.3847, "step": 96550 }, { "epoch": 0.95, "grad_norm": 10.50189208984375, "learning_rate": 4.570387361355219e-06, "loss": 0.2265, "step": 96575 }, { "epoch": 0.95, "grad_norm": 8.314467430114746, "learning_rate": 4.57026323890097e-06, "loss": 0.351, "step": 96600 }, { "epoch": 0.95, "grad_norm": 12.383659362792969, "learning_rate": 4.5701391164467215e-06, "loss": 0.2097, "step": 96625 }, { "epoch": 0.95, "grad_norm": 19.32497215270996, "learning_rate": 4.5700149939924735e-06, "loss": 0.3683, "step": 96650 }, { "epoch": 0.95, "grad_norm": 13.61508560180664, "learning_rate": 4.569890871538225e-06, "loss": 0.2562, "step": 96675 }, { "epoch": 0.95, "grad_norm": 17.476512908935547, "learning_rate": 4.569766749083977e-06, "loss": 0.3967, "step": 96700 }, { "epoch": 0.95, "grad_norm": 4.103864669799805, "learning_rate": 4.569642626629728e-06, "loss": 0.1705, "step": 96725 }, { "epoch": 0.95, "grad_norm": 23.834712982177734, "learning_rate": 4.56951850417548e-06, "loss": 0.3616, "step": 96750 }, { "epoch": 0.95, "grad_norm": 8.473467826843262, "learning_rate": 4.569394381721231e-06, "loss": 0.2308, "step": 96775 }, { "epoch": 0.95, "grad_norm": 21.466115951538086, "learning_rate": 4.569270259266983e-06, "loss": 0.3302, "step": 96800 }, { "epoch": 0.95, "grad_norm": 3.4594264030456543, "learning_rate": 4.5691461368127345e-06, "loss": 0.2091, "step": 96825 }, { "epoch": 0.95, "grad_norm": 22.39113998413086, "learning_rate": 4.569022014358486e-06, "loss": 0.3555, "step": 96850 }, { "epoch": 0.95, "grad_norm": 10.694025993347168, "learning_rate": 4.568897891904238e-06, "loss": 0.2081, "step": 96875 }, { "epoch": 0.95, "grad_norm": 22.451622009277344, "learning_rate": 4.568773769449989e-06, "loss": 0.3963, "step": 96900 }, { "epoch": 0.95, "grad_norm": 14.920656204223633, "learning_rate": 4.56864964699574e-06, "loss": 0.2469, "step": 96925 }, { "epoch": 0.95, "grad_norm": 5.775885105133057, "learning_rate": 4.568525524541492e-06, "loss": 0.4115, "step": 96950 }, { "epoch": 0.95, "grad_norm": 10.67842960357666, "learning_rate": 4.5684014020872435e-06, "loss": 0.2417, "step": 96975 }, { "epoch": 0.95, "grad_norm": 12.357762336730957, "learning_rate": 4.568277279632995e-06, "loss": 0.355, "step": 97000 }, { "epoch": 0.95, "grad_norm": 14.778143882751465, "learning_rate": 4.568153157178747e-06, "loss": 0.2017, "step": 97025 }, { "epoch": 0.95, "grad_norm": 18.069766998291016, "learning_rate": 4.568029034724498e-06, "loss": 0.4027, "step": 97050 }, { "epoch": 0.95, "grad_norm": 10.967702865600586, "learning_rate": 4.567904912270249e-06, "loss": 0.2217, "step": 97075 }, { "epoch": 0.95, "grad_norm": 10.564484596252441, "learning_rate": 4.567780789816001e-06, "loss": 0.4386, "step": 97100 }, { "epoch": 0.95, "grad_norm": 7.6992363929748535, "learning_rate": 4.5676566673617525e-06, "loss": 0.2731, "step": 97125 }, { "epoch": 0.96, "grad_norm": 10.844609260559082, "learning_rate": 4.5675325449075045e-06, "loss": 0.3861, "step": 97150 }, { "epoch": 0.96, "grad_norm": 8.698296546936035, "learning_rate": 4.567408422453256e-06, "loss": 0.2348, "step": 97175 }, { "epoch": 0.96, "grad_norm": 15.487995147705078, "learning_rate": 4.567284299999008e-06, "loss": 0.3315, "step": 97200 }, { "epoch": 0.96, "grad_norm": 7.41670560836792, "learning_rate": 4.567160177544759e-06, "loss": 0.2122, "step": 97225 }, { "epoch": 0.96, "grad_norm": 16.577177047729492, "learning_rate": 4.567036055090511e-06, "loss": 0.3184, "step": 97250 }, { "epoch": 0.96, "grad_norm": 10.412226676940918, "learning_rate": 4.566911932636262e-06, "loss": 0.1961, "step": 97275 }, { "epoch": 0.96, "grad_norm": 12.377504348754883, "learning_rate": 4.5667878101820135e-06, "loss": 0.3783, "step": 97300 }, { "epoch": 0.96, "grad_norm": 8.945106506347656, "learning_rate": 4.566663687727765e-06, "loss": 0.199, "step": 97325 }, { "epoch": 0.96, "grad_norm": 18.967479705810547, "learning_rate": 4.566539565273517e-06, "loss": 0.3544, "step": 97350 }, { "epoch": 0.96, "grad_norm": 11.661025047302246, "learning_rate": 4.566415442819268e-06, "loss": 0.2013, "step": 97375 }, { "epoch": 0.96, "grad_norm": 13.691046714782715, "learning_rate": 4.566291320365019e-06, "loss": 0.3902, "step": 97400 }, { "epoch": 0.96, "grad_norm": 9.753345489501953, "learning_rate": 4.566167197910771e-06, "loss": 0.2116, "step": 97425 }, { "epoch": 0.96, "grad_norm": 22.297805786132812, "learning_rate": 4.5660430754565225e-06, "loss": 0.4241, "step": 97450 }, { "epoch": 0.96, "grad_norm": 7.412646770477295, "learning_rate": 4.565918953002274e-06, "loss": 0.2202, "step": 97475 }, { "epoch": 0.96, "grad_norm": 15.045434951782227, "learning_rate": 4.565794830548026e-06, "loss": 0.3451, "step": 97500 }, { "epoch": 0.96, "grad_norm": 6.663585662841797, "learning_rate": 4.565670708093777e-06, "loss": 0.1912, "step": 97525 }, { "epoch": 0.96, "grad_norm": 19.104129791259766, "learning_rate": 4.565546585639529e-06, "loss": 0.3571, "step": 97550 }, { "epoch": 0.96, "grad_norm": 9.511506080627441, "learning_rate": 4.56542246318528e-06, "loss": 0.1821, "step": 97575 }, { "epoch": 0.96, "grad_norm": 15.102043151855469, "learning_rate": 4.565298340731032e-06, "loss": 0.3684, "step": 97600 }, { "epoch": 0.96, "grad_norm": 7.6335129737854, "learning_rate": 4.5651742182767835e-06, "loss": 0.1922, "step": 97625 }, { "epoch": 0.96, "grad_norm": 18.878938674926758, "learning_rate": 4.5650500958225355e-06, "loss": 0.3673, "step": 97650 }, { "epoch": 0.96, "grad_norm": 5.28160285949707, "learning_rate": 4.564925973368287e-06, "loss": 0.2145, "step": 97675 }, { "epoch": 0.96, "grad_norm": 12.417978286743164, "learning_rate": 4.564801850914038e-06, "loss": 0.3892, "step": 97700 }, { "epoch": 0.96, "grad_norm": 3.7189443111419678, "learning_rate": 4.56467772845979e-06, "loss": 0.2512, "step": 97725 }, { "epoch": 0.96, "grad_norm": 14.964950561523438, "learning_rate": 4.564553606005541e-06, "loss": 0.3992, "step": 97750 }, { "epoch": 0.96, "grad_norm": 6.5489091873168945, "learning_rate": 4.5644294835512924e-06, "loss": 0.2314, "step": 97775 }, { "epoch": 0.96, "grad_norm": 21.794551849365234, "learning_rate": 4.5643053610970445e-06, "loss": 0.3502, "step": 97800 }, { "epoch": 0.96, "grad_norm": 10.656743049621582, "learning_rate": 4.564181238642796e-06, "loss": 0.2056, "step": 97825 }, { "epoch": 0.96, "grad_norm": 17.657079696655273, "learning_rate": 4.564057116188547e-06, "loss": 0.4185, "step": 97850 }, { "epoch": 0.96, "grad_norm": 7.347536087036133, "learning_rate": 4.563932993734299e-06, "loss": 0.1745, "step": 97875 }, { "epoch": 0.96, "grad_norm": 23.59975814819336, "learning_rate": 4.56380887128005e-06, "loss": 0.3878, "step": 97900 }, { "epoch": 0.96, "grad_norm": 10.423113822937012, "learning_rate": 4.563684748825801e-06, "loss": 0.2054, "step": 97925 }, { "epoch": 0.96, "grad_norm": 12.201295852661133, "learning_rate": 4.5635606263715535e-06, "loss": 0.4096, "step": 97950 }, { "epoch": 0.96, "grad_norm": 10.605562210083008, "learning_rate": 4.563436503917305e-06, "loss": 0.2341, "step": 97975 }, { "epoch": 0.96, "grad_norm": 19.719501495361328, "learning_rate": 4.563312381463057e-06, "loss": 0.3502, "step": 98000 }, { "epoch": 0.96, "grad_norm": 9.197880744934082, "learning_rate": 4.563188259008808e-06, "loss": 0.2222, "step": 98025 }, { "epoch": 0.96, "grad_norm": 14.447311401367188, "learning_rate": 4.56306413655456e-06, "loss": 0.4001, "step": 98050 }, { "epoch": 0.96, "grad_norm": 5.237088680267334, "learning_rate": 4.562940014100311e-06, "loss": 0.2348, "step": 98075 }, { "epoch": 0.96, "grad_norm": 15.614374160766602, "learning_rate": 4.562815891646063e-06, "loss": 0.365, "step": 98100 }, { "epoch": 0.96, "grad_norm": 11.679885864257812, "learning_rate": 4.5626917691918145e-06, "loss": 0.2369, "step": 98125 }, { "epoch": 0.97, "grad_norm": 17.629682540893555, "learning_rate": 4.562567646737566e-06, "loss": 0.3336, "step": 98150 }, { "epoch": 0.97, "grad_norm": 1.4894167184829712, "learning_rate": 4.562443524283317e-06, "loss": 0.2135, "step": 98175 }, { "epoch": 0.97, "grad_norm": 10.27286148071289, "learning_rate": 4.562319401829069e-06, "loss": 0.3888, "step": 98200 }, { "epoch": 0.97, "grad_norm": 9.325995445251465, "learning_rate": 4.56219527937482e-06, "loss": 0.2082, "step": 98225 }, { "epoch": 0.97, "grad_norm": 10.850333213806152, "learning_rate": 4.562071156920571e-06, "loss": 0.3255, "step": 98250 }, { "epoch": 0.97, "grad_norm": 9.04765796661377, "learning_rate": 4.5619470344663234e-06, "loss": 0.2159, "step": 98275 }, { "epoch": 0.97, "grad_norm": 18.801340103149414, "learning_rate": 4.5618278769102445e-06, "loss": 0.3601, "step": 98300 }, { "epoch": 0.97, "grad_norm": 6.217791557312012, "learning_rate": 4.5617037544559965e-06, "loss": 0.2067, "step": 98325 }, { "epoch": 0.97, "grad_norm": 6.447222709655762, "learning_rate": 4.561579632001748e-06, "loss": 0.3572, "step": 98350 }, { "epoch": 0.97, "grad_norm": 10.780261039733887, "learning_rate": 4.5614555095475e-06, "loss": 0.1975, "step": 98375 }, { "epoch": 0.97, "grad_norm": 15.328022003173828, "learning_rate": 4.561331387093251e-06, "loss": 0.4002, "step": 98400 }, { "epoch": 0.97, "grad_norm": 7.8558149337768555, "learning_rate": 4.561207264639003e-06, "loss": 0.2162, "step": 98425 }, { "epoch": 0.97, "grad_norm": 18.447954177856445, "learning_rate": 4.561083142184754e-06, "loss": 0.3557, "step": 98450 }, { "epoch": 0.97, "grad_norm": 11.057623863220215, "learning_rate": 4.560959019730506e-06, "loss": 0.2644, "step": 98475 }, { "epoch": 0.97, "grad_norm": 19.664684295654297, "learning_rate": 4.5608348972762576e-06, "loss": 0.2778, "step": 98500 }, { "epoch": 0.97, "grad_norm": 7.407970905303955, "learning_rate": 4.560710774822009e-06, "loss": 0.2224, "step": 98525 }, { "epoch": 0.97, "grad_norm": 26.882888793945312, "learning_rate": 4.560586652367761e-06, "loss": 0.349, "step": 98550 }, { "epoch": 0.97, "grad_norm": 6.889134883880615, "learning_rate": 4.560462529913512e-06, "loss": 0.1745, "step": 98575 }, { "epoch": 0.97, "grad_norm": 13.915726661682129, "learning_rate": 4.560338407459263e-06, "loss": 0.4188, "step": 98600 }, { "epoch": 0.97, "grad_norm": 7.931297779083252, "learning_rate": 4.560214285005015e-06, "loss": 0.1701, "step": 98625 }, { "epoch": 0.97, "grad_norm": 10.429201126098633, "learning_rate": 4.5600901625507665e-06, "loss": 0.3384, "step": 98650 }, { "epoch": 0.97, "grad_norm": 4.945479393005371, "learning_rate": 4.559966040096518e-06, "loss": 0.1908, "step": 98675 }, { "epoch": 0.97, "grad_norm": 13.878498077392578, "learning_rate": 4.55984191764227e-06, "loss": 0.3488, "step": 98700 }, { "epoch": 0.97, "grad_norm": 6.440752029418945, "learning_rate": 4.559717795188021e-06, "loss": 0.2302, "step": 98725 }, { "epoch": 0.97, "grad_norm": 19.78122329711914, "learning_rate": 4.559593672733772e-06, "loss": 0.3787, "step": 98750 }, { "epoch": 0.97, "grad_norm": 8.793451309204102, "learning_rate": 4.559469550279524e-06, "loss": 0.1871, "step": 98775 }, { "epoch": 0.97, "grad_norm": 17.5911865234375, "learning_rate": 4.5593454278252755e-06, "loss": 0.3858, "step": 98800 }, { "epoch": 0.97, "grad_norm": 11.195772171020508, "learning_rate": 4.5592213053710275e-06, "loss": 0.2448, "step": 98825 }, { "epoch": 0.97, "grad_norm": 19.131189346313477, "learning_rate": 4.559097182916779e-06, "loss": 0.4395, "step": 98850 }, { "epoch": 0.97, "grad_norm": 10.328381538391113, "learning_rate": 4.558973060462531e-06, "loss": 0.2322, "step": 98875 }, { "epoch": 0.97, "grad_norm": 24.30857276916504, "learning_rate": 4.558848938008282e-06, "loss": 0.331, "step": 98900 }, { "epoch": 0.97, "grad_norm": 5.131441116333008, "learning_rate": 4.558724815554033e-06, "loss": 0.2218, "step": 98925 }, { "epoch": 0.97, "grad_norm": 17.58506965637207, "learning_rate": 4.558600693099785e-06, "loss": 0.3688, "step": 98950 }, { "epoch": 0.97, "grad_norm": 5.970010757446289, "learning_rate": 4.5584765706455365e-06, "loss": 0.2226, "step": 98975 }, { "epoch": 0.97, "grad_norm": 17.303741455078125, "learning_rate": 4.558352448191288e-06, "loss": 0.3536, "step": 99000 }, { "epoch": 0.97, "grad_norm": 10.42618179321289, "learning_rate": 4.55822832573704e-06, "loss": 0.2173, "step": 99025 }, { "epoch": 0.97, "grad_norm": 17.254375457763672, "learning_rate": 4.558104203282791e-06, "loss": 0.3617, "step": 99050 }, { "epoch": 0.97, "grad_norm": 6.674833297729492, "learning_rate": 4.557980080828542e-06, "loss": 0.1889, "step": 99075 }, { "epoch": 0.97, "grad_norm": 19.736133575439453, "learning_rate": 4.557855958374294e-06, "loss": 0.3921, "step": 99100 }, { "epoch": 0.97, "grad_norm": 7.560609340667725, "learning_rate": 4.5577318359200455e-06, "loss": 0.2521, "step": 99125 }, { "epoch": 0.97, "grad_norm": 17.409879684448242, "learning_rate": 4.557607713465797e-06, "loss": 0.3594, "step": 99150 }, { "epoch": 0.98, "grad_norm": 4.681962013244629, "learning_rate": 4.557483591011549e-06, "loss": 0.2006, "step": 99175 }, { "epoch": 0.98, "grad_norm": 20.551074981689453, "learning_rate": 4.5573594685573e-06, "loss": 0.3157, "step": 99200 }, { "epoch": 0.98, "grad_norm": 10.399240493774414, "learning_rate": 4.557235346103052e-06, "loss": 0.2547, "step": 99225 }, { "epoch": 0.98, "grad_norm": 17.4088077545166, "learning_rate": 4.557111223648803e-06, "loss": 0.3906, "step": 99250 }, { "epoch": 0.98, "grad_norm": 6.348793983459473, "learning_rate": 4.556987101194555e-06, "loss": 0.192, "step": 99275 }, { "epoch": 0.98, "grad_norm": 16.516328811645508, "learning_rate": 4.5568629787403065e-06, "loss": 0.3679, "step": 99300 }, { "epoch": 0.98, "grad_norm": 8.554121017456055, "learning_rate": 4.5567388562860585e-06, "loss": 0.2086, "step": 99325 }, { "epoch": 0.98, "grad_norm": 19.921039581298828, "learning_rate": 4.55661473383181e-06, "loss": 0.3567, "step": 99350 }, { "epoch": 0.98, "grad_norm": 11.75248908996582, "learning_rate": 4.556490611377561e-06, "loss": 0.2531, "step": 99375 }, { "epoch": 0.98, "grad_norm": 18.148540496826172, "learning_rate": 4.556366488923313e-06, "loss": 0.3524, "step": 99400 }, { "epoch": 0.98, "grad_norm": 13.733848571777344, "learning_rate": 4.556242366469064e-06, "loss": 0.1928, "step": 99425 }, { "epoch": 0.98, "grad_norm": 14.673670768737793, "learning_rate": 4.5561182440148154e-06, "loss": 0.4182, "step": 99450 }, { "epoch": 0.98, "grad_norm": 7.941739559173584, "learning_rate": 4.5559941215605675e-06, "loss": 0.2277, "step": 99475 }, { "epoch": 0.98, "grad_norm": 23.239110946655273, "learning_rate": 4.555869999106319e-06, "loss": 0.3336, "step": 99500 }, { "epoch": 0.98, "grad_norm": 11.222578048706055, "learning_rate": 4.55574587665207e-06, "loss": 0.2462, "step": 99525 }, { "epoch": 0.98, "grad_norm": 15.381884574890137, "learning_rate": 4.555621754197822e-06, "loss": 0.3917, "step": 99550 }, { "epoch": 0.98, "grad_norm": 12.286702156066895, "learning_rate": 4.555497631743573e-06, "loss": 0.2086, "step": 99575 }, { "epoch": 0.98, "grad_norm": 20.33023452758789, "learning_rate": 4.555373509289324e-06, "loss": 0.3364, "step": 99600 }, { "epoch": 0.98, "grad_norm": 6.83486270904541, "learning_rate": 4.5552493868350765e-06, "loss": 0.2161, "step": 99625 }, { "epoch": 0.98, "grad_norm": 8.119759559631348, "learning_rate": 4.555125264380828e-06, "loss": 0.3851, "step": 99650 }, { "epoch": 0.98, "grad_norm": 8.364238739013672, "learning_rate": 4.55500114192658e-06, "loss": 0.2493, "step": 99675 }, { "epoch": 0.98, "grad_norm": 10.536406517028809, "learning_rate": 4.554877019472331e-06, "loss": 0.3183, "step": 99700 }, { "epoch": 0.98, "grad_norm": 11.529157638549805, "learning_rate": 4.554752897018083e-06, "loss": 0.2401, "step": 99725 }, { "epoch": 0.98, "grad_norm": 15.275554656982422, "learning_rate": 4.554628774563834e-06, "loss": 0.4361, "step": 99750 }, { "epoch": 0.98, "grad_norm": 7.892274379730225, "learning_rate": 4.5545046521095854e-06, "loss": 0.1878, "step": 99775 }, { "epoch": 0.98, "grad_norm": 19.709671020507812, "learning_rate": 4.5543805296553375e-06, "loss": 0.3935, "step": 99800 }, { "epoch": 0.98, "grad_norm": 10.135801315307617, "learning_rate": 4.554256407201089e-06, "loss": 0.1913, "step": 99825 }, { "epoch": 0.98, "grad_norm": 11.747145652770996, "learning_rate": 4.55413228474684e-06, "loss": 0.4352, "step": 99850 }, { "epoch": 0.98, "grad_norm": 9.085651397705078, "learning_rate": 4.554008162292592e-06, "loss": 0.2341, "step": 99875 }, { "epoch": 0.98, "grad_norm": 22.60052490234375, "learning_rate": 4.553884039838343e-06, "loss": 0.3966, "step": 99900 }, { "epoch": 0.98, "grad_norm": 10.076904296875, "learning_rate": 4.553759917384094e-06, "loss": 0.1829, "step": 99925 }, { "epoch": 0.98, "grad_norm": 22.19022560119629, "learning_rate": 4.5536357949298465e-06, "loss": 0.3638, "step": 99950 }, { "epoch": 0.98, "grad_norm": 9.676626205444336, "learning_rate": 4.553511672475598e-06, "loss": 0.1992, "step": 99975 }, { "epoch": 0.98, "grad_norm": 10.98279094696045, "learning_rate": 4.553387550021349e-06, "loss": 0.3615, "step": 100000 }, { "epoch": 0.98, "eval_loss": 0.5074259638786316, "eval_runtime": 6017.2128, "eval_samples_per_second": 1.573, "eval_steps_per_second": 0.197, "eval_wer": 0.13860812463786776, "step": 100000 }, { "epoch": 0.98, "grad_norm": 4.3802313804626465, "learning_rate": 4.553263427567101e-06, "loss": 0.1842, "step": 100025 }, { "epoch": 0.98, "grad_norm": 12.721065521240234, "learning_rate": 4.553139305112852e-06, "loss": 0.3445, "step": 100050 }, { "epoch": 0.98, "grad_norm": 19.324203491210938, "learning_rate": 4.553015182658604e-06, "loss": 0.2291, "step": 100075 }, { "epoch": 0.98, "grad_norm": 11.792384147644043, "learning_rate": 4.552891060204355e-06, "loss": 0.3726, "step": 100100 }, { "epoch": 0.98, "grad_norm": 20.86113929748535, "learning_rate": 4.5527669377501075e-06, "loss": 0.2052, "step": 100125 }, { "epoch": 0.98, "grad_norm": 19.571796417236328, "learning_rate": 4.552642815295859e-06, "loss": 0.3896, "step": 100150 }, { "epoch": 0.98, "grad_norm": 12.562769889831543, "learning_rate": 4.552518692841611e-06, "loss": 0.3069, "step": 100175 }, { "epoch": 0.99, "grad_norm": 16.790449142456055, "learning_rate": 4.552394570387362e-06, "loss": 0.3426, "step": 100200 }, { "epoch": 0.99, "grad_norm": 7.803878307342529, "learning_rate": 4.552270447933113e-06, "loss": 0.2355, "step": 100225 }, { "epoch": 0.99, "grad_norm": 16.80035400390625, "learning_rate": 4.552146325478865e-06, "loss": 0.3329, "step": 100250 }, { "epoch": 0.99, "grad_norm": 6.826509475708008, "learning_rate": 4.5520222030246164e-06, "loss": 0.2386, "step": 100275 }, { "epoch": 0.99, "grad_norm": 17.233522415161133, "learning_rate": 4.551898080570368e-06, "loss": 0.3642, "step": 100300 }, { "epoch": 0.99, "grad_norm": 11.187000274658203, "learning_rate": 4.55177395811612e-06, "loss": 0.2116, "step": 100325 }, { "epoch": 0.99, "grad_norm": 16.144390106201172, "learning_rate": 4.551654800560041e-06, "loss": 0.3828, "step": 100350 }, { "epoch": 0.99, "grad_norm": 7.489932537078857, "learning_rate": 4.551530678105792e-06, "loss": 0.1745, "step": 100375 }, { "epoch": 0.99, "grad_norm": 6.619669437408447, "learning_rate": 4.551406555651544e-06, "loss": 0.3629, "step": 100400 }, { "epoch": 0.99, "grad_norm": 6.9871907234191895, "learning_rate": 4.551282433197295e-06, "loss": 0.2346, "step": 100425 }, { "epoch": 0.99, "grad_norm": 20.85903549194336, "learning_rate": 4.5511583107430464e-06, "loss": 0.3365, "step": 100450 }, { "epoch": 0.99, "grad_norm": 5.338232040405273, "learning_rate": 4.5510341882887985e-06, "loss": 0.2101, "step": 100475 }, { "epoch": 0.99, "grad_norm": 12.964771270751953, "learning_rate": 4.55091006583455e-06, "loss": 0.3392, "step": 100500 }, { "epoch": 0.99, "grad_norm": 6.489068984985352, "learning_rate": 4.550785943380302e-06, "loss": 0.221, "step": 100525 }, { "epoch": 0.99, "grad_norm": 18.601072311401367, "learning_rate": 4.550661820926053e-06, "loss": 0.3736, "step": 100550 }, { "epoch": 0.99, "grad_norm": 7.929945945739746, "learning_rate": 4.550537698471805e-06, "loss": 0.198, "step": 100575 }, { "epoch": 0.99, "grad_norm": 29.180505752563477, "learning_rate": 4.550413576017556e-06, "loss": 0.3421, "step": 100600 }, { "epoch": 0.99, "grad_norm": 5.991987705230713, "learning_rate": 4.550289453563308e-06, "loss": 0.1812, "step": 100625 }, { "epoch": 0.99, "grad_norm": 20.183021545410156, "learning_rate": 4.5501653311090595e-06, "loss": 0.3696, "step": 100650 }, { "epoch": 0.99, "grad_norm": 13.087162971496582, "learning_rate": 4.550041208654811e-06, "loss": 0.1996, "step": 100675 }, { "epoch": 0.99, "grad_norm": 21.085601806640625, "learning_rate": 4.549917086200563e-06, "loss": 0.4319, "step": 100700 }, { "epoch": 0.99, "grad_norm": 8.758560180664062, "learning_rate": 4.549792963746314e-06, "loss": 0.1994, "step": 100725 }, { "epoch": 0.99, "grad_norm": 18.71869468688965, "learning_rate": 4.549668841292065e-06, "loss": 0.4043, "step": 100750 }, { "epoch": 0.99, "grad_norm": 16.22039794921875, "learning_rate": 4.549544718837817e-06, "loss": 0.2307, "step": 100775 }, { "epoch": 0.99, "grad_norm": 19.9346923828125, "learning_rate": 4.5494205963835685e-06, "loss": 0.3704, "step": 100800 }, { "epoch": 0.99, "grad_norm": 9.213871955871582, "learning_rate": 4.54929647392932e-06, "loss": 0.1935, "step": 100825 }, { "epoch": 0.99, "grad_norm": 18.8478946685791, "learning_rate": 4.549172351475072e-06, "loss": 0.3822, "step": 100850 }, { "epoch": 0.99, "grad_norm": 7.968591690063477, "learning_rate": 4.549048229020823e-06, "loss": 0.2285, "step": 100875 }, { "epoch": 0.99, "grad_norm": 17.388565063476562, "learning_rate": 4.548924106566574e-06, "loss": 0.3663, "step": 100900 }, { "epoch": 0.99, "grad_norm": 6.77500057220459, "learning_rate": 4.548799984112326e-06, "loss": 0.1919, "step": 100925 }, { "epoch": 0.99, "grad_norm": 16.8601131439209, "learning_rate": 4.5486758616580774e-06, "loss": 0.3349, "step": 100950 }, { "epoch": 0.99, "grad_norm": 9.221969604492188, "learning_rate": 4.5485517392038295e-06, "loss": 0.193, "step": 100975 }, { "epoch": 0.99, "grad_norm": 14.103032112121582, "learning_rate": 4.548427616749581e-06, "loss": 0.2888, "step": 101000 }, { "epoch": 0.99, "grad_norm": 10.424159049987793, "learning_rate": 4.548303494295333e-06, "loss": 0.202, "step": 101025 }, { "epoch": 0.99, "grad_norm": 19.8280086517334, "learning_rate": 4.548179371841084e-06, "loss": 0.3938, "step": 101050 }, { "epoch": 0.99, "grad_norm": 12.474675178527832, "learning_rate": 4.548055249386835e-06, "loss": 0.2322, "step": 101075 }, { "epoch": 0.99, "grad_norm": 11.376713752746582, "learning_rate": 4.547931126932587e-06, "loss": 0.3799, "step": 101100 }, { "epoch": 0.99, "grad_norm": 9.199847221374512, "learning_rate": 4.5478070044783385e-06, "loss": 0.2114, "step": 101125 }, { "epoch": 0.99, "grad_norm": 19.253585815429688, "learning_rate": 4.54768288202409e-06, "loss": 0.4198, "step": 101150 }, { "epoch": 0.99, "grad_norm": 7.253416538238525, "learning_rate": 4.547558759569842e-06, "loss": 0.2116, "step": 101175 }, { "epoch": 1.0, "grad_norm": 14.114470481872559, "learning_rate": 4.547434637115593e-06, "loss": 0.3509, "step": 101200 }, { "epoch": 1.0, "grad_norm": 6.849737644195557, "learning_rate": 4.547310514661344e-06, "loss": 0.2198, "step": 101225 }, { "epoch": 1.0, "grad_norm": 18.36721420288086, "learning_rate": 4.547186392207096e-06, "loss": 0.3894, "step": 101250 }, { "epoch": 1.0, "grad_norm": 12.040677070617676, "learning_rate": 4.547062269752847e-06, "loss": 0.2672, "step": 101275 }, { "epoch": 1.0, "grad_norm": 25.32854461669922, "learning_rate": 4.5469381472985995e-06, "loss": 0.3369, "step": 101300 }, { "epoch": 1.0, "grad_norm": 12.113750457763672, "learning_rate": 4.546814024844351e-06, "loss": 0.1898, "step": 101325 }, { "epoch": 1.0, "grad_norm": 19.052488327026367, "learning_rate": 4.546689902390103e-06, "loss": 0.4444, "step": 101350 }, { "epoch": 1.0, "grad_norm": 6.132081985473633, "learning_rate": 4.546565779935854e-06, "loss": 0.1998, "step": 101375 }, { "epoch": 1.0, "grad_norm": 15.099235534667969, "learning_rate": 4.546441657481606e-06, "loss": 0.4017, "step": 101400 }, { "epoch": 1.0, "grad_norm": 10.4237699508667, "learning_rate": 4.546317535027357e-06, "loss": 0.2605, "step": 101425 }, { "epoch": 1.0, "grad_norm": 17.054479598999023, "learning_rate": 4.5461934125731084e-06, "loss": 0.3888, "step": 101450 }, { "epoch": 1.0, "grad_norm": 6.393292427062988, "learning_rate": 4.5460692901188605e-06, "loss": 0.2451, "step": 101475 }, { "epoch": 1.0, "grad_norm": 16.061914443969727, "learning_rate": 4.545945167664612e-06, "loss": 0.3866, "step": 101500 }, { "epoch": 1.0, "grad_norm": 9.503327369689941, "learning_rate": 4.545821045210363e-06, "loss": 0.176, "step": 101525 }, { "epoch": 1.0, "grad_norm": 19.440052032470703, "learning_rate": 4.545696922756115e-06, "loss": 0.3804, "step": 101550 }, { "epoch": 1.0, "grad_norm": 5.84713888168335, "learning_rate": 4.545572800301866e-06, "loss": 0.2249, "step": 101575 }, { "epoch": 1.0, "grad_norm": 17.151893615722656, "learning_rate": 4.545448677847617e-06, "loss": 0.4151, "step": 101600 }, { "epoch": 1.0, "grad_norm": 10.012293815612793, "learning_rate": 4.5453245553933695e-06, "loss": 0.2267, "step": 101625 }, { "epoch": 1.0, "grad_norm": 17.99065399169922, "learning_rate": 4.545200432939121e-06, "loss": 0.4128, "step": 101650 }, { "epoch": 1.0, "grad_norm": 7.894062519073486, "learning_rate": 4.545076310484872e-06, "loss": 0.1906, "step": 101675 }, { "epoch": 1.0, "grad_norm": 8.915519714355469, "learning_rate": 4.544952188030624e-06, "loss": 0.3133, "step": 101700 }, { "epoch": 1.0, "grad_norm": 5.67418098449707, "learning_rate": 4.544828065576375e-06, "loss": 0.2271, "step": 101725 }, { "epoch": 1.0, "grad_norm": 22.46010971069336, "learning_rate": 4.544703943122127e-06, "loss": 0.2375, "step": 101750 }, { "epoch": 1.0, "grad_norm": 6.1359148025512695, "learning_rate": 4.544579820667878e-06, "loss": 0.2389, "step": 101775 }, { "epoch": 1.0, "grad_norm": 13.2661771774292, "learning_rate": 4.5444556982136305e-06, "loss": 0.2392, "step": 101800 }, { "epoch": 1.0, "grad_norm": 10.826881408691406, "learning_rate": 4.544331575759382e-06, "loss": 0.2231, "step": 101825 }, { "epoch": 1.0, "grad_norm": 10.574054718017578, "learning_rate": 4.544207453305134e-06, "loss": 0.2347, "step": 101850 }, { "epoch": 1.0, "grad_norm": 7.240632057189941, "learning_rate": 4.544083330850885e-06, "loss": 0.2134, "step": 101875 }, { "epoch": 1.0, "grad_norm": 19.262752532958984, "learning_rate": 4.543959208396636e-06, "loss": 0.2354, "step": 101900 }, { "epoch": 1.0, "grad_norm": 4.052224159240723, "learning_rate": 4.543835085942387e-06, "loss": 0.1805, "step": 101925 }, { "epoch": 1.0, "grad_norm": 13.466486930847168, "learning_rate": 4.5437109634881394e-06, "loss": 0.2407, "step": 101950 }, { "epoch": 1.0, "grad_norm": 5.6621222496032715, "learning_rate": 4.543586841033891e-06, "loss": 0.1677, "step": 101975 }, { "epoch": 1.0, "grad_norm": 15.916447639465332, "learning_rate": 4.543462718579642e-06, "loss": 0.2179, "step": 102000 }, { "epoch": 1.0, "grad_norm": 7.374277591705322, "learning_rate": 4.543338596125394e-06, "loss": 0.2263, "step": 102025 }, { "epoch": 1.0, "grad_norm": 10.126968383789062, "learning_rate": 4.543214473671145e-06, "loss": 0.25, "step": 102050 }, { "epoch": 1.0, "grad_norm": 8.467601776123047, "learning_rate": 4.543090351216896e-06, "loss": 0.2453, "step": 102075 }, { "epoch": 1.0, "grad_norm": 13.010984420776367, "learning_rate": 4.542966228762648e-06, "loss": 0.2376, "step": 102100 }, { "epoch": 1.0, "grad_norm": 5.432332515716553, "learning_rate": 4.5428421063084e-06, "loss": 0.1912, "step": 102125 }, { "epoch": 1.0, "grad_norm": 13.339651107788086, "learning_rate": 4.542717983854152e-06, "loss": 0.2195, "step": 102150 }, { "epoch": 1.0, "grad_norm": 4.341249942779541, "learning_rate": 4.542593861399903e-06, "loss": 0.2356, "step": 102175 }, { "epoch": 1.0, "grad_norm": 10.508928298950195, "learning_rate": 4.542469738945655e-06, "loss": 0.234, "step": 102200 }, { "epoch": 1.01, "grad_norm": 3.0041346549987793, "learning_rate": 4.542345616491406e-06, "loss": 0.1848, "step": 102225 }, { "epoch": 1.01, "grad_norm": 22.220638275146484, "learning_rate": 4.542221494037158e-06, "loss": 0.2581, "step": 102250 }, { "epoch": 1.01, "grad_norm": 5.670867919921875, "learning_rate": 4.5420973715829094e-06, "loss": 0.2053, "step": 102275 }, { "epoch": 1.01, "grad_norm": 14.635087013244629, "learning_rate": 4.541973249128661e-06, "loss": 0.2214, "step": 102300 }, { "epoch": 1.01, "grad_norm": 8.16334056854248, "learning_rate": 4.541849126674413e-06, "loss": 0.2619, "step": 102325 }, { "epoch": 1.01, "grad_norm": 15.881762504577637, "learning_rate": 4.541725004220164e-06, "loss": 0.2335, "step": 102350 }, { "epoch": 1.01, "grad_norm": 1.3318711519241333, "learning_rate": 4.541605846664086e-06, "loss": 0.2211, "step": 102375 }, { "epoch": 1.01, "grad_norm": 11.598958969116211, "learning_rate": 4.541481724209837e-06, "loss": 0.2442, "step": 102400 }, { "epoch": 1.01, "grad_norm": 7.8896942138671875, "learning_rate": 4.541357601755588e-06, "loss": 0.2258, "step": 102425 }, { "epoch": 1.01, "grad_norm": 11.304174423217773, "learning_rate": 4.54123347930134e-06, "loss": 0.2972, "step": 102450 }, { "epoch": 1.01, "grad_norm": 3.8981435298919678, "learning_rate": 4.5411093568470915e-06, "loss": 0.2297, "step": 102475 }, { "epoch": 1.01, "grad_norm": 11.870085716247559, "learning_rate": 4.540985234392843e-06, "loss": 0.2443, "step": 102500 }, { "epoch": 1.01, "grad_norm": 0.8139597773551941, "learning_rate": 4.540861111938594e-06, "loss": 0.195, "step": 102525 }, { "epoch": 1.01, "grad_norm": 17.591354370117188, "learning_rate": 4.540736989484346e-06, "loss": 0.2773, "step": 102550 }, { "epoch": 1.01, "grad_norm": 2.171381950378418, "learning_rate": 4.540612867030097e-06, "loss": 0.2504, "step": 102575 }, { "epoch": 1.01, "grad_norm": 18.64579963684082, "learning_rate": 4.540488744575849e-06, "loss": 0.2541, "step": 102600 }, { "epoch": 1.01, "grad_norm": 1.7121622562408447, "learning_rate": 4.5403646221216004e-06, "loss": 0.1887, "step": 102625 }, { "epoch": 1.01, "grad_norm": 15.55997371673584, "learning_rate": 4.5402404996673525e-06, "loss": 0.248, "step": 102650 }, { "epoch": 1.01, "grad_norm": 10.479740142822266, "learning_rate": 4.540116377213104e-06, "loss": 0.2412, "step": 102675 }, { "epoch": 1.01, "grad_norm": 13.498379707336426, "learning_rate": 4.539992254758856e-06, "loss": 0.1915, "step": 102700 }, { "epoch": 1.01, "grad_norm": 6.1965436935424805, "learning_rate": 4.539868132304607e-06, "loss": 0.2166, "step": 102725 }, { "epoch": 1.01, "grad_norm": 16.238115310668945, "learning_rate": 4.539744009850358e-06, "loss": 0.2313, "step": 102750 }, { "epoch": 1.01, "grad_norm": 3.548427104949951, "learning_rate": 4.53961988739611e-06, "loss": 0.1984, "step": 102775 }, { "epoch": 1.01, "grad_norm": 12.31411361694336, "learning_rate": 4.5394957649418615e-06, "loss": 0.2671, "step": 102800 }, { "epoch": 1.01, "grad_norm": 6.406460285186768, "learning_rate": 4.539371642487613e-06, "loss": 0.2466, "step": 102825 }, { "epoch": 1.01, "grad_norm": 7.33601188659668, "learning_rate": 4.539247520033365e-06, "loss": 0.2054, "step": 102850 }, { "epoch": 1.01, "grad_norm": 4.423810958862305, "learning_rate": 4.539123397579116e-06, "loss": 0.2233, "step": 102875 }, { "epoch": 1.01, "grad_norm": 15.367547988891602, "learning_rate": 4.538999275124867e-06, "loss": 0.2357, "step": 102900 }, { "epoch": 1.01, "grad_norm": 5.201877593994141, "learning_rate": 4.538875152670619e-06, "loss": 0.2272, "step": 102925 }, { "epoch": 1.01, "grad_norm": 17.85761260986328, "learning_rate": 4.53875103021637e-06, "loss": 0.2535, "step": 102950 }, { "epoch": 1.01, "grad_norm": 10.244269371032715, "learning_rate": 4.538626907762122e-06, "loss": 0.2724, "step": 102975 }, { "epoch": 1.01, "grad_norm": 12.880690574645996, "learning_rate": 4.538502785307874e-06, "loss": 0.2465, "step": 103000 }, { "epoch": 1.01, "grad_norm": 3.356356382369995, "learning_rate": 4.538378662853625e-06, "loss": 0.2177, "step": 103025 }, { "epoch": 1.01, "grad_norm": 11.793950080871582, "learning_rate": 4.538254540399377e-06, "loss": 0.2045, "step": 103050 }, { "epoch": 1.01, "grad_norm": 5.426454067230225, "learning_rate": 4.538130417945128e-06, "loss": 0.2194, "step": 103075 }, { "epoch": 1.01, "grad_norm": 11.471905708312988, "learning_rate": 4.53800629549088e-06, "loss": 0.1919, "step": 103100 }, { "epoch": 1.01, "grad_norm": 4.773153305053711, "learning_rate": 4.5378821730366314e-06, "loss": 0.2046, "step": 103125 }, { "epoch": 1.01, "grad_norm": 15.414002418518066, "learning_rate": 4.5377580505823835e-06, "loss": 0.2589, "step": 103150 }, { "epoch": 1.01, "grad_norm": 8.753411293029785, "learning_rate": 4.537633928128135e-06, "loss": 0.2237, "step": 103175 }, { "epoch": 1.01, "grad_norm": 18.274673461914062, "learning_rate": 4.537509805673886e-06, "loss": 0.241, "step": 103200 }, { "epoch": 1.01, "grad_norm": 6.307366371154785, "learning_rate": 4.537385683219638e-06, "loss": 0.248, "step": 103225 }, { "epoch": 1.02, "grad_norm": 12.676342010498047, "learning_rate": 4.537261560765389e-06, "loss": 0.2532, "step": 103250 }, { "epoch": 1.02, "grad_norm": 1.4330309629440308, "learning_rate": 4.53713743831114e-06, "loss": 0.221, "step": 103275 }, { "epoch": 1.02, "grad_norm": 17.89944839477539, "learning_rate": 4.5370133158568925e-06, "loss": 0.1873, "step": 103300 }, { "epoch": 1.02, "grad_norm": 4.357291221618652, "learning_rate": 4.536889193402644e-06, "loss": 0.1794, "step": 103325 }, { "epoch": 1.02, "grad_norm": 10.2885160446167, "learning_rate": 4.536765070948395e-06, "loss": 0.2245, "step": 103350 }, { "epoch": 1.02, "grad_norm": 5.085901260375977, "learning_rate": 4.536640948494146e-06, "loss": 0.1478, "step": 103375 }, { "epoch": 1.02, "grad_norm": 15.164827346801758, "learning_rate": 4.536516826039898e-06, "loss": 0.2947, "step": 103400 }, { "epoch": 1.02, "grad_norm": 2.9100399017333984, "learning_rate": 4.536392703585649e-06, "loss": 0.236, "step": 103425 }, { "epoch": 1.02, "grad_norm": 12.969420433044434, "learning_rate": 4.5362685811314014e-06, "loss": 0.2457, "step": 103450 }, { "epoch": 1.02, "grad_norm": 7.100059986114502, "learning_rate": 4.536144458677153e-06, "loss": 0.2364, "step": 103475 }, { "epoch": 1.02, "grad_norm": 14.484976768493652, "learning_rate": 4.536020336222905e-06, "loss": 0.271, "step": 103500 }, { "epoch": 1.02, "grad_norm": 9.5597562789917, "learning_rate": 4.535896213768656e-06, "loss": 0.2017, "step": 103525 }, { "epoch": 1.02, "grad_norm": 22.698083877563477, "learning_rate": 4.535772091314408e-06, "loss": 0.2571, "step": 103550 }, { "epoch": 1.02, "grad_norm": 4.904496192932129, "learning_rate": 4.535647968860159e-06, "loss": 0.2127, "step": 103575 }, { "epoch": 1.02, "grad_norm": 10.103422164916992, "learning_rate": 4.53552384640591e-06, "loss": 0.2123, "step": 103600 }, { "epoch": 1.02, "grad_norm": 2.6498444080352783, "learning_rate": 4.5353997239516624e-06, "loss": 0.2008, "step": 103625 }, { "epoch": 1.02, "grad_norm": 10.7595796585083, "learning_rate": 4.535275601497414e-06, "loss": 0.1941, "step": 103650 }, { "epoch": 1.02, "grad_norm": 7.248941421508789, "learning_rate": 4.535151479043165e-06, "loss": 0.1941, "step": 103675 }, { "epoch": 1.02, "grad_norm": 11.069443702697754, "learning_rate": 4.535027356588917e-06, "loss": 0.1888, "step": 103700 }, { "epoch": 1.02, "grad_norm": 3.487006187438965, "learning_rate": 4.534903234134668e-06, "loss": 0.2388, "step": 103725 }, { "epoch": 1.02, "grad_norm": 10.363460540771484, "learning_rate": 4.534779111680419e-06, "loss": 0.2209, "step": 103750 }, { "epoch": 1.02, "grad_norm": 8.369401931762695, "learning_rate": 4.534654989226171e-06, "loss": 0.2014, "step": 103775 }, { "epoch": 1.02, "grad_norm": 12.37180233001709, "learning_rate": 4.534530866771923e-06, "loss": 0.2132, "step": 103800 }, { "epoch": 1.02, "grad_norm": 5.53675651550293, "learning_rate": 4.534406744317674e-06, "loss": 0.2205, "step": 103825 }, { "epoch": 1.02, "grad_norm": 16.743064880371094, "learning_rate": 4.534282621863426e-06, "loss": 0.2826, "step": 103850 }, { "epoch": 1.02, "grad_norm": 7.112672328948975, "learning_rate": 4.534158499409177e-06, "loss": 0.2325, "step": 103875 }, { "epoch": 1.02, "grad_norm": 16.061237335205078, "learning_rate": 4.534034376954929e-06, "loss": 0.257, "step": 103900 }, { "epoch": 1.02, "grad_norm": 7.349565029144287, "learning_rate": 4.53391025450068e-06, "loss": 0.2595, "step": 103925 }, { "epoch": 1.02, "grad_norm": 12.910367965698242, "learning_rate": 4.5337861320464324e-06, "loss": 0.2745, "step": 103950 }, { "epoch": 1.02, "grad_norm": 12.11129379272461, "learning_rate": 4.533662009592184e-06, "loss": 0.2135, "step": 103975 }, { "epoch": 1.02, "grad_norm": 14.610150337219238, "learning_rate": 4.533537887137936e-06, "loss": 0.2277, "step": 104000 }, { "epoch": 1.02, "grad_norm": 6.790509223937988, "learning_rate": 4.533413764683687e-06, "loss": 0.2246, "step": 104025 }, { "epoch": 1.02, "grad_norm": 18.07135581970215, "learning_rate": 4.533289642229438e-06, "loss": 0.2458, "step": 104050 }, { "epoch": 1.02, "grad_norm": 6.30102014541626, "learning_rate": 4.53316551977519e-06, "loss": 0.2298, "step": 104075 }, { "epoch": 1.02, "grad_norm": 12.70740795135498, "learning_rate": 4.533041397320941e-06, "loss": 0.2625, "step": 104100 }, { "epoch": 1.02, "grad_norm": 1.1074036359786987, "learning_rate": 4.532917274866693e-06, "loss": 0.1917, "step": 104125 }, { "epoch": 1.02, "grad_norm": 19.381505966186523, "learning_rate": 4.532793152412445e-06, "loss": 0.249, "step": 104150 }, { "epoch": 1.02, "grad_norm": 8.360507011413574, "learning_rate": 4.532669029958196e-06, "loss": 0.2047, "step": 104175 }, { "epoch": 1.02, "grad_norm": 13.115887641906738, "learning_rate": 4.532544907503947e-06, "loss": 0.2312, "step": 104200 }, { "epoch": 1.02, "grad_norm": 4.147325038909912, "learning_rate": 4.532420785049699e-06, "loss": 0.2182, "step": 104225 }, { "epoch": 1.03, "grad_norm": 13.085421562194824, "learning_rate": 4.53229666259545e-06, "loss": 0.2396, "step": 104250 }, { "epoch": 1.03, "grad_norm": 8.070707321166992, "learning_rate": 4.532172540141202e-06, "loss": 0.1948, "step": 104275 }, { "epoch": 1.03, "grad_norm": 12.789878845214844, "learning_rate": 4.532048417686954e-06, "loss": 0.2282, "step": 104300 }, { "epoch": 1.03, "grad_norm": 3.8045613765716553, "learning_rate": 4.531924295232706e-06, "loss": 0.2287, "step": 104325 }, { "epoch": 1.03, "grad_norm": 7.162386417388916, "learning_rate": 4.531800172778457e-06, "loss": 0.219, "step": 104350 }, { "epoch": 1.03, "grad_norm": 3.626304864883423, "learning_rate": 4.531676050324208e-06, "loss": 0.1999, "step": 104375 }, { "epoch": 1.03, "grad_norm": 16.466093063354492, "learning_rate": 4.53155689276813e-06, "loss": 0.2459, "step": 104400 }, { "epoch": 1.03, "grad_norm": 6.355140209197998, "learning_rate": 4.531432770313881e-06, "loss": 0.2127, "step": 104425 }, { "epoch": 1.03, "grad_norm": 15.255403518676758, "learning_rate": 4.531308647859633e-06, "loss": 0.2922, "step": 104450 }, { "epoch": 1.03, "grad_norm": 3.043429136276245, "learning_rate": 4.5311845254053845e-06, "loss": 0.2214, "step": 104475 }, { "epoch": 1.03, "grad_norm": 14.643498420715332, "learning_rate": 4.531060402951136e-06, "loss": 0.2816, "step": 104500 }, { "epoch": 1.03, "grad_norm": 3.2628417015075684, "learning_rate": 4.530936280496888e-06, "loss": 0.1956, "step": 104525 }, { "epoch": 1.03, "grad_norm": 12.594734191894531, "learning_rate": 4.530812158042639e-06, "loss": 0.2267, "step": 104550 }, { "epoch": 1.03, "grad_norm": 3.3661298751831055, "learning_rate": 4.53068803558839e-06, "loss": 0.1834, "step": 104575 }, { "epoch": 1.03, "grad_norm": 10.96851634979248, "learning_rate": 4.530563913134142e-06, "loss": 0.2426, "step": 104600 }, { "epoch": 1.03, "grad_norm": 4.201507568359375, "learning_rate": 4.5304397906798934e-06, "loss": 0.2232, "step": 104625 }, { "epoch": 1.03, "grad_norm": 9.89231014251709, "learning_rate": 4.530315668225645e-06, "loss": 0.2387, "step": 104650 }, { "epoch": 1.03, "grad_norm": 4.5607452392578125, "learning_rate": 4.530191545771397e-06, "loss": 0.2128, "step": 104675 }, { "epoch": 1.03, "grad_norm": 14.166301727294922, "learning_rate": 4.530067423317148e-06, "loss": 0.2438, "step": 104700 }, { "epoch": 1.03, "grad_norm": 5.650515556335449, "learning_rate": 4.5299433008629e-06, "loss": 0.2249, "step": 104725 }, { "epoch": 1.03, "grad_norm": 11.76877498626709, "learning_rate": 4.529819178408651e-06, "loss": 0.2829, "step": 104750 }, { "epoch": 1.03, "grad_norm": 2.8232922554016113, "learning_rate": 4.529695055954403e-06, "loss": 0.2344, "step": 104775 }, { "epoch": 1.03, "grad_norm": 12.12878131866455, "learning_rate": 4.5295709335001545e-06, "loss": 0.2532, "step": 104800 }, { "epoch": 1.03, "grad_norm": 3.9142134189605713, "learning_rate": 4.529446811045906e-06, "loss": 0.2036, "step": 104825 }, { "epoch": 1.03, "grad_norm": 8.146797180175781, "learning_rate": 4.529322688591658e-06, "loss": 0.2252, "step": 104850 }, { "epoch": 1.03, "grad_norm": 5.140865802764893, "learning_rate": 4.529198566137409e-06, "loss": 0.1933, "step": 104875 }, { "epoch": 1.03, "grad_norm": 10.86912727355957, "learning_rate": 4.52907444368316e-06, "loss": 0.2589, "step": 104900 }, { "epoch": 1.03, "grad_norm": 10.614889144897461, "learning_rate": 4.528950321228912e-06, "loss": 0.2325, "step": 104925 }, { "epoch": 1.03, "grad_norm": 15.44543170928955, "learning_rate": 4.528826198774663e-06, "loss": 0.2281, "step": 104950 }, { "epoch": 1.03, "grad_norm": 5.049801349639893, "learning_rate": 4.528702076320415e-06, "loss": 0.2208, "step": 104975 }, { "epoch": 1.03, "grad_norm": 15.880064964294434, "learning_rate": 4.528577953866167e-06, "loss": 0.2753, "step": 105000 }, { "epoch": 1.03, "grad_norm": 3.464700222015381, "learning_rate": 4.528453831411918e-06, "loss": 0.1954, "step": 105025 }, { "epoch": 1.03, "grad_norm": 17.402978897094727, "learning_rate": 4.528329708957669e-06, "loss": 0.2424, "step": 105050 }, { "epoch": 1.03, "grad_norm": 8.281856536865234, "learning_rate": 4.528205586503421e-06, "loss": 0.2383, "step": 105075 }, { "epoch": 1.03, "grad_norm": 10.546968460083008, "learning_rate": 4.528081464049172e-06, "loss": 0.2383, "step": 105100 }, { "epoch": 1.03, "grad_norm": 5.2745747566223145, "learning_rate": 4.5279573415949244e-06, "loss": 0.2445, "step": 105125 }, { "epoch": 1.03, "grad_norm": 17.000606536865234, "learning_rate": 4.527833219140676e-06, "loss": 0.243, "step": 105150 }, { "epoch": 1.03, "grad_norm": 5.106378555297852, "learning_rate": 4.527709096686428e-06, "loss": 0.2352, "step": 105175 }, { "epoch": 1.03, "grad_norm": 16.746734619140625, "learning_rate": 4.527584974232179e-06, "loss": 0.2795, "step": 105200 }, { "epoch": 1.03, "grad_norm": 5.946643829345703, "learning_rate": 4.527460851777931e-06, "loss": 0.2137, "step": 105225 }, { "epoch": 1.03, "grad_norm": 15.378342628479004, "learning_rate": 4.527336729323682e-06, "loss": 0.2265, "step": 105250 }, { "epoch": 1.04, "grad_norm": 5.463327884674072, "learning_rate": 4.527212606869433e-06, "loss": 0.2448, "step": 105275 }, { "epoch": 1.04, "grad_norm": 13.641458511352539, "learning_rate": 4.5270884844151855e-06, "loss": 0.2571, "step": 105300 }, { "epoch": 1.04, "grad_norm": 5.6143903732299805, "learning_rate": 4.526964361960937e-06, "loss": 0.2422, "step": 105325 }, { "epoch": 1.04, "grad_norm": 16.136219024658203, "learning_rate": 4.526840239506688e-06, "loss": 0.2284, "step": 105350 }, { "epoch": 1.04, "grad_norm": 8.10959529876709, "learning_rate": 4.52671611705244e-06, "loss": 0.1932, "step": 105375 }, { "epoch": 1.04, "grad_norm": 14.073551177978516, "learning_rate": 4.526591994598191e-06, "loss": 0.2622, "step": 105400 }, { "epoch": 1.04, "grad_norm": 4.216369152069092, "learning_rate": 4.526467872143942e-06, "loss": 0.2515, "step": 105425 }, { "epoch": 1.04, "grad_norm": 20.485790252685547, "learning_rate": 4.526343749689694e-06, "loss": 0.2113, "step": 105450 }, { "epoch": 1.04, "grad_norm": 6.1934123039245605, "learning_rate": 4.526219627235446e-06, "loss": 0.1931, "step": 105475 }, { "epoch": 1.04, "grad_norm": 19.469032287597656, "learning_rate": 4.526095504781197e-06, "loss": 0.261, "step": 105500 }, { "epoch": 1.04, "grad_norm": 4.473113059997559, "learning_rate": 4.525971382326949e-06, "loss": 0.2184, "step": 105525 }, { "epoch": 1.04, "grad_norm": 12.40891170501709, "learning_rate": 4.5258472598727e-06, "loss": 0.2466, "step": 105550 }, { "epoch": 1.04, "grad_norm": 0.7257053852081299, "learning_rate": 4.525723137418452e-06, "loss": 0.2164, "step": 105575 }, { "epoch": 1.04, "grad_norm": 10.51340103149414, "learning_rate": 4.525599014964203e-06, "loss": 0.2372, "step": 105600 }, { "epoch": 1.04, "grad_norm": 6.948488712310791, "learning_rate": 4.5254748925099554e-06, "loss": 0.2311, "step": 105625 }, { "epoch": 1.04, "grad_norm": 15.543458938598633, "learning_rate": 4.525350770055707e-06, "loss": 0.2382, "step": 105650 }, { "epoch": 1.04, "grad_norm": 7.663340091705322, "learning_rate": 4.525226647601458e-06, "loss": 0.2184, "step": 105675 }, { "epoch": 1.04, "grad_norm": 13.058907508850098, "learning_rate": 4.52510252514721e-06, "loss": 0.2009, "step": 105700 }, { "epoch": 1.04, "grad_norm": 7.036744594573975, "learning_rate": 4.524978402692961e-06, "loss": 0.2462, "step": 105725 }, { "epoch": 1.04, "grad_norm": 14.16745376586914, "learning_rate": 4.524854280238712e-06, "loss": 0.2024, "step": 105750 }, { "epoch": 1.04, "grad_norm": 2.497077465057373, "learning_rate": 4.524730157784464e-06, "loss": 0.1948, "step": 105775 }, { "epoch": 1.04, "grad_norm": 13.926666259765625, "learning_rate": 4.524606035330216e-06, "loss": 0.2547, "step": 105800 }, { "epoch": 1.04, "grad_norm": 4.0935235023498535, "learning_rate": 4.524481912875967e-06, "loss": 0.1734, "step": 105825 }, { "epoch": 1.04, "grad_norm": 11.401476860046387, "learning_rate": 4.524357790421719e-06, "loss": 0.2299, "step": 105850 }, { "epoch": 1.04, "grad_norm": 7.760377883911133, "learning_rate": 4.52423366796747e-06, "loss": 0.19, "step": 105875 }, { "epoch": 1.04, "grad_norm": 10.4451265335083, "learning_rate": 4.524109545513221e-06, "loss": 0.222, "step": 105900 }, { "epoch": 1.04, "grad_norm": 7.765705585479736, "learning_rate": 4.523985423058973e-06, "loss": 0.249, "step": 105925 }, { "epoch": 1.04, "grad_norm": 13.301383972167969, "learning_rate": 4.523861300604725e-06, "loss": 0.2185, "step": 105950 }, { "epoch": 1.04, "grad_norm": 4.031529903411865, "learning_rate": 4.523737178150477e-06, "loss": 0.23, "step": 105975 }, { "epoch": 1.04, "grad_norm": 9.896986961364746, "learning_rate": 4.523613055696228e-06, "loss": 0.1642, "step": 106000 }, { "epoch": 1.04, "grad_norm": 5.40430212020874, "learning_rate": 4.52348893324198e-06, "loss": 0.2204, "step": 106025 }, { "epoch": 1.04, "grad_norm": 12.391572952270508, "learning_rate": 4.523364810787731e-06, "loss": 0.2517, "step": 106050 }, { "epoch": 1.04, "grad_norm": 8.40433406829834, "learning_rate": 4.523240688333483e-06, "loss": 0.2175, "step": 106075 }, { "epoch": 1.04, "grad_norm": 19.70020294189453, "learning_rate": 4.523116565879234e-06, "loss": 0.2781, "step": 106100 }, { "epoch": 1.04, "grad_norm": 4.39729118347168, "learning_rate": 4.522992443424986e-06, "loss": 0.2352, "step": 106125 }, { "epoch": 1.04, "grad_norm": 10.579943656921387, "learning_rate": 4.522868320970738e-06, "loss": 0.2059, "step": 106150 }, { "epoch": 1.04, "grad_norm": 6.18843412399292, "learning_rate": 4.522744198516489e-06, "loss": 0.1847, "step": 106175 }, { "epoch": 1.04, "grad_norm": 20.491641998291016, "learning_rate": 4.52262007606224e-06, "loss": 0.2328, "step": 106200 }, { "epoch": 1.04, "grad_norm": 7.707535266876221, "learning_rate": 4.522495953607992e-06, "loss": 0.2243, "step": 106225 }, { "epoch": 1.04, "grad_norm": 19.70177459716797, "learning_rate": 4.522371831153743e-06, "loss": 0.2693, "step": 106250 }, { "epoch": 1.04, "grad_norm": 4.779733180999756, "learning_rate": 4.5222477086994946e-06, "loss": 0.2361, "step": 106275 }, { "epoch": 1.05, "grad_norm": 14.233829498291016, "learning_rate": 4.522123586245247e-06, "loss": 0.2714, "step": 106300 }, { "epoch": 1.05, "grad_norm": 3.223048448562622, "learning_rate": 4.521999463790998e-06, "loss": 0.2191, "step": 106325 }, { "epoch": 1.05, "grad_norm": 14.994794845581055, "learning_rate": 4.521875341336749e-06, "loss": 0.2086, "step": 106350 }, { "epoch": 1.05, "grad_norm": 2.256244659423828, "learning_rate": 4.521751218882501e-06, "loss": 0.2357, "step": 106375 }, { "epoch": 1.05, "grad_norm": 15.775801658630371, "learning_rate": 4.521627096428252e-06, "loss": 0.2476, "step": 106400 }, { "epoch": 1.05, "grad_norm": 5.890263080596924, "learning_rate": 4.521502973974004e-06, "loss": 0.2277, "step": 106425 }, { "epoch": 1.05, "grad_norm": 14.524986267089844, "learning_rate": 4.521378851519756e-06, "loss": 0.2316, "step": 106450 }, { "epoch": 1.05, "grad_norm": 3.2587811946868896, "learning_rate": 4.521254729065508e-06, "loss": 0.2164, "step": 106475 }, { "epoch": 1.05, "grad_norm": 15.803814888000488, "learning_rate": 4.521130606611259e-06, "loss": 0.2817, "step": 106500 }, { "epoch": 1.05, "grad_norm": 6.647246837615967, "learning_rate": 4.52100648415701e-06, "loss": 0.2105, "step": 106525 }, { "epoch": 1.05, "grad_norm": 10.089524269104004, "learning_rate": 4.520882361702762e-06, "loss": 0.2372, "step": 106550 }, { "epoch": 1.05, "grad_norm": 5.909298419952393, "learning_rate": 4.520763204146683e-06, "loss": 0.2742, "step": 106575 }, { "epoch": 1.05, "grad_norm": 12.361051559448242, "learning_rate": 4.520639081692435e-06, "loss": 0.2353, "step": 106600 }, { "epoch": 1.05, "grad_norm": 5.684657096862793, "learning_rate": 4.520514959238186e-06, "loss": 0.2285, "step": 106625 }, { "epoch": 1.05, "grad_norm": 10.58039665222168, "learning_rate": 4.520390836783938e-06, "loss": 0.2332, "step": 106650 }, { "epoch": 1.05, "grad_norm": 6.3173298835754395, "learning_rate": 4.52026671432969e-06, "loss": 0.2064, "step": 106675 }, { "epoch": 1.05, "grad_norm": 15.317818641662598, "learning_rate": 4.520142591875441e-06, "loss": 0.2192, "step": 106700 }, { "epoch": 1.05, "grad_norm": 2.7759809494018555, "learning_rate": 4.520018469421192e-06, "loss": 0.2164, "step": 106725 }, { "epoch": 1.05, "grad_norm": 16.735998153686523, "learning_rate": 4.519894346966944e-06, "loss": 0.2427, "step": 106750 }, { "epoch": 1.05, "grad_norm": 4.866046905517578, "learning_rate": 4.519770224512695e-06, "loss": 0.2116, "step": 106775 }, { "epoch": 1.05, "grad_norm": 13.868760108947754, "learning_rate": 4.519646102058447e-06, "loss": 0.2486, "step": 106800 }, { "epoch": 1.05, "grad_norm": 0.5765955448150635, "learning_rate": 4.519521979604199e-06, "loss": 0.2265, "step": 106825 }, { "epoch": 1.05, "grad_norm": 8.978546142578125, "learning_rate": 4.51939785714995e-06, "loss": 0.259, "step": 106850 }, { "epoch": 1.05, "grad_norm": 6.96856164932251, "learning_rate": 4.519273734695702e-06, "loss": 0.2336, "step": 106875 }, { "epoch": 1.05, "grad_norm": 7.410726070404053, "learning_rate": 4.519149612241453e-06, "loss": 0.2069, "step": 106900 }, { "epoch": 1.05, "grad_norm": 5.861246585845947, "learning_rate": 4.519025489787205e-06, "loss": 0.2405, "step": 106925 }, { "epoch": 1.05, "grad_norm": 20.088947296142578, "learning_rate": 4.518901367332956e-06, "loss": 0.2266, "step": 106950 }, { "epoch": 1.05, "grad_norm": 7.452606678009033, "learning_rate": 4.5187772448787085e-06, "loss": 0.2162, "step": 106975 }, { "epoch": 1.05, "grad_norm": 17.098798751831055, "learning_rate": 4.51865312242446e-06, "loss": 0.2855, "step": 107000 }, { "epoch": 1.05, "grad_norm": 8.932005882263184, "learning_rate": 4.518528999970211e-06, "loss": 0.1733, "step": 107025 }, { "epoch": 1.05, "grad_norm": 13.78237247467041, "learning_rate": 4.518404877515962e-06, "loss": 0.2768, "step": 107050 }, { "epoch": 1.05, "grad_norm": 4.675990104675293, "learning_rate": 4.518280755061714e-06, "loss": 0.1966, "step": 107075 }, { "epoch": 1.05, "grad_norm": 21.14057731628418, "learning_rate": 4.518156632607465e-06, "loss": 0.2545, "step": 107100 }, { "epoch": 1.05, "grad_norm": 8.189858436584473, "learning_rate": 4.518032510153217e-06, "loss": 0.2457, "step": 107125 }, { "epoch": 1.05, "grad_norm": 13.51554012298584, "learning_rate": 4.517908387698969e-06, "loss": 0.2689, "step": 107150 }, { "epoch": 1.05, "grad_norm": 4.817105293273926, "learning_rate": 4.51778426524472e-06, "loss": 0.1714, "step": 107175 }, { "epoch": 1.05, "grad_norm": 15.62668514251709, "learning_rate": 4.517660142790472e-06, "loss": 0.2365, "step": 107200 }, { "epoch": 1.05, "grad_norm": 7.935177326202393, "learning_rate": 4.517536020336223e-06, "loss": 0.2439, "step": 107225 }, { "epoch": 1.05, "grad_norm": 22.56490707397461, "learning_rate": 4.517411897881975e-06, "loss": 0.2768, "step": 107250 }, { "epoch": 1.05, "grad_norm": 10.313173294067383, "learning_rate": 4.517287775427726e-06, "loss": 0.2555, "step": 107275 }, { "epoch": 1.05, "grad_norm": 15.259490966796875, "learning_rate": 4.5171636529734784e-06, "loss": 0.2025, "step": 107300 }, { "epoch": 1.06, "grad_norm": 6.294310092926025, "learning_rate": 4.51703953051923e-06, "loss": 0.2082, "step": 107325 }, { "epoch": 1.06, "grad_norm": 14.307046890258789, "learning_rate": 4.516915408064981e-06, "loss": 0.2526, "step": 107350 }, { "epoch": 1.06, "grad_norm": 6.001895904541016, "learning_rate": 4.516791285610733e-06, "loss": 0.2064, "step": 107375 }, { "epoch": 1.06, "grad_norm": 17.05562400817871, "learning_rate": 4.516667163156484e-06, "loss": 0.2436, "step": 107400 }, { "epoch": 1.06, "grad_norm": 4.910409927368164, "learning_rate": 4.516543040702235e-06, "loss": 0.1881, "step": 107425 }, { "epoch": 1.06, "grad_norm": 13.06965446472168, "learning_rate": 4.516418918247987e-06, "loss": 0.2133, "step": 107450 }, { "epoch": 1.06, "grad_norm": 4.05324649810791, "learning_rate": 4.516294795793739e-06, "loss": 0.2231, "step": 107475 }, { "epoch": 1.06, "grad_norm": 14.377440452575684, "learning_rate": 4.51617067333949e-06, "loss": 0.208, "step": 107500 }, { "epoch": 1.06, "grad_norm": 4.008727073669434, "learning_rate": 4.516046550885242e-06, "loss": 0.2108, "step": 107525 }, { "epoch": 1.06, "grad_norm": 14.35369873046875, "learning_rate": 4.515922428430993e-06, "loss": 0.2755, "step": 107550 }, { "epoch": 1.06, "grad_norm": 0.44964128732681274, "learning_rate": 4.515798305976744e-06, "loss": 0.1935, "step": 107575 }, { "epoch": 1.06, "grad_norm": 17.01863670349121, "learning_rate": 4.515674183522496e-06, "loss": 0.2352, "step": 107600 }, { "epoch": 1.06, "grad_norm": 1.6678093671798706, "learning_rate": 4.515550061068248e-06, "loss": 0.278, "step": 107625 }, { "epoch": 1.06, "grad_norm": 10.255858421325684, "learning_rate": 4.515425938614e-06, "loss": 0.2665, "step": 107650 }, { "epoch": 1.06, "grad_norm": 4.003510475158691, "learning_rate": 4.515301816159751e-06, "loss": 0.2215, "step": 107675 }, { "epoch": 1.06, "grad_norm": 12.909357070922852, "learning_rate": 4.515177693705503e-06, "loss": 0.2497, "step": 107700 }, { "epoch": 1.06, "grad_norm": 7.810475826263428, "learning_rate": 4.515053571251254e-06, "loss": 0.2119, "step": 107725 }, { "epoch": 1.06, "grad_norm": 20.237518310546875, "learning_rate": 4.514929448797006e-06, "loss": 0.2338, "step": 107750 }, { "epoch": 1.06, "grad_norm": 6.443227291107178, "learning_rate": 4.514805326342757e-06, "loss": 0.2116, "step": 107775 }, { "epoch": 1.06, "grad_norm": 15.356633186340332, "learning_rate": 4.514681203888509e-06, "loss": 0.2993, "step": 107800 }, { "epoch": 1.06, "grad_norm": 8.525967597961426, "learning_rate": 4.514557081434261e-06, "loss": 0.2103, "step": 107825 }, { "epoch": 1.06, "grad_norm": 17.383756637573242, "learning_rate": 4.514432958980012e-06, "loss": 0.2213, "step": 107850 }, { "epoch": 1.06, "grad_norm": 6.806885719299316, "learning_rate": 4.514308836525763e-06, "loss": 0.2168, "step": 107875 }, { "epoch": 1.06, "grad_norm": 16.038246154785156, "learning_rate": 4.514184714071515e-06, "loss": 0.2237, "step": 107900 }, { "epoch": 1.06, "grad_norm": 5.456735134124756, "learning_rate": 4.514060591617266e-06, "loss": 0.2195, "step": 107925 }, { "epoch": 1.06, "grad_norm": 15.0889253616333, "learning_rate": 4.5139364691630176e-06, "loss": 0.2414, "step": 107950 }, { "epoch": 1.06, "grad_norm": 2.763756513595581, "learning_rate": 4.513812346708769e-06, "loss": 0.1944, "step": 107975 }, { "epoch": 1.06, "grad_norm": 10.07257080078125, "learning_rate": 4.513688224254521e-06, "loss": 0.2489, "step": 108000 }, { "epoch": 1.06, "grad_norm": 5.532043933868408, "learning_rate": 4.513564101800272e-06, "loss": 0.2215, "step": 108025 }, { "epoch": 1.06, "grad_norm": 14.290433883666992, "learning_rate": 4.513439979346024e-06, "loss": 0.2423, "step": 108050 }, { "epoch": 1.06, "grad_norm": 8.135652542114258, "learning_rate": 4.513315856891775e-06, "loss": 0.2171, "step": 108075 }, { "epoch": 1.06, "grad_norm": 14.0072021484375, "learning_rate": 4.513191734437527e-06, "loss": 0.2433, "step": 108100 }, { "epoch": 1.06, "grad_norm": 6.951128959655762, "learning_rate": 4.513067611983279e-06, "loss": 0.2173, "step": 108125 }, { "epoch": 1.06, "grad_norm": 21.349708557128906, "learning_rate": 4.512943489529031e-06, "loss": 0.2527, "step": 108150 }, { "epoch": 1.06, "grad_norm": 7.182005882263184, "learning_rate": 4.512819367074782e-06, "loss": 0.2176, "step": 108175 }, { "epoch": 1.06, "grad_norm": 12.451837539672852, "learning_rate": 4.512695244620533e-06, "loss": 0.2333, "step": 108200 }, { "epoch": 1.06, "grad_norm": 7.222490310668945, "learning_rate": 4.512571122166285e-06, "loss": 0.2171, "step": 108225 }, { "epoch": 1.06, "grad_norm": 15.537263870239258, "learning_rate": 4.512446999712036e-06, "loss": 0.2812, "step": 108250 }, { "epoch": 1.06, "grad_norm": 7.087011337280273, "learning_rate": 4.5123228772577876e-06, "loss": 0.1998, "step": 108275 }, { "epoch": 1.06, "grad_norm": 16.68742561340332, "learning_rate": 4.51219875480354e-06, "loss": 0.1922, "step": 108300 }, { "epoch": 1.07, "grad_norm": 11.029580116271973, "learning_rate": 4.512074632349291e-06, "loss": 0.1738, "step": 108325 }, { "epoch": 1.07, "grad_norm": 25.239025115966797, "learning_rate": 4.511950509895042e-06, "loss": 0.3015, "step": 108350 }, { "epoch": 1.07, "grad_norm": 16.335582733154297, "learning_rate": 4.511826387440794e-06, "loss": 0.2112, "step": 108375 }, { "epoch": 1.07, "grad_norm": 15.147705078125, "learning_rate": 4.511702264986545e-06, "loss": 0.2066, "step": 108400 }, { "epoch": 1.07, "grad_norm": 6.662466049194336, "learning_rate": 4.5115781425322965e-06, "loss": 0.2459, "step": 108425 }, { "epoch": 1.07, "grad_norm": 10.379270553588867, "learning_rate": 4.5114540200780486e-06, "loss": 0.2636, "step": 108450 }, { "epoch": 1.07, "grad_norm": 6.896368980407715, "learning_rate": 4.5113298976238e-06, "loss": 0.1994, "step": 108475 }, { "epoch": 1.07, "grad_norm": 9.595584869384766, "learning_rate": 4.511205775169552e-06, "loss": 0.2717, "step": 108500 }, { "epoch": 1.07, "grad_norm": 4.809151649475098, "learning_rate": 4.511081652715303e-06, "loss": 0.2093, "step": 108525 }, { "epoch": 1.07, "grad_norm": 10.482481002807617, "learning_rate": 4.510957530261055e-06, "loss": 0.2545, "step": 108550 }, { "epoch": 1.07, "grad_norm": 6.687763214111328, "learning_rate": 4.510833407806806e-06, "loss": 0.1833, "step": 108575 }, { "epoch": 1.07, "grad_norm": 22.688858032226562, "learning_rate": 4.510709285352558e-06, "loss": 0.2592, "step": 108600 }, { "epoch": 1.07, "grad_norm": 6.210742950439453, "learning_rate": 4.51058516289831e-06, "loss": 0.234, "step": 108625 }, { "epoch": 1.07, "grad_norm": 16.85900115966797, "learning_rate": 4.510461040444061e-06, "loss": 0.2381, "step": 108650 }, { "epoch": 1.07, "grad_norm": 7.553066730499268, "learning_rate": 4.510336917989813e-06, "loss": 0.2331, "step": 108675 }, { "epoch": 1.07, "grad_norm": 10.217497825622559, "learning_rate": 4.510212795535564e-06, "loss": 0.2195, "step": 108700 }, { "epoch": 1.07, "grad_norm": 8.512428283691406, "learning_rate": 4.510088673081315e-06, "loss": 0.1912, "step": 108725 }, { "epoch": 1.07, "grad_norm": Infinity, "learning_rate": 4.509969515525237e-06, "loss": 0.2771, "step": 108750 }, { "epoch": 1.07, "grad_norm": 2.886005401611328, "learning_rate": 4.509845393070988e-06, "loss": 0.2174, "step": 108775 }, { "epoch": 1.07, "grad_norm": 11.735700607299805, "learning_rate": 4.50972127061674e-06, "loss": 0.2533, "step": 108800 }, { "epoch": 1.07, "grad_norm": 5.53899621963501, "learning_rate": 4.509597148162492e-06, "loss": 0.2042, "step": 108825 }, { "epoch": 1.07, "grad_norm": 10.018054962158203, "learning_rate": 4.509473025708243e-06, "loss": 0.2292, "step": 108850 }, { "epoch": 1.07, "grad_norm": 6.795118808746338, "learning_rate": 4.509348903253994e-06, "loss": 0.231, "step": 108875 }, { "epoch": 1.07, "grad_norm": 9.332884788513184, "learning_rate": 4.509224780799746e-06, "loss": 0.2599, "step": 108900 }, { "epoch": 1.07, "grad_norm": 8.76186752319336, "learning_rate": 4.509100658345497e-06, "loss": 0.2203, "step": 108925 }, { "epoch": 1.07, "grad_norm": 9.084293365478516, "learning_rate": 4.508976535891249e-06, "loss": 0.2451, "step": 108950 }, { "epoch": 1.07, "grad_norm": 14.521463394165039, "learning_rate": 4.508852413437001e-06, "loss": 0.2512, "step": 108975 }, { "epoch": 1.07, "grad_norm": 19.04134178161621, "learning_rate": 4.508728290982753e-06, "loss": 0.2216, "step": 109000 }, { "epoch": 1.07, "grad_norm": 4.147385597229004, "learning_rate": 4.508604168528504e-06, "loss": 0.2017, "step": 109025 }, { "epoch": 1.07, "grad_norm": 12.559477806091309, "learning_rate": 4.508480046074256e-06, "loss": 0.2619, "step": 109050 }, { "epoch": 1.07, "grad_norm": 4.184773921966553, "learning_rate": 4.508355923620007e-06, "loss": 0.2462, "step": 109075 }, { "epoch": 1.07, "grad_norm": 9.39501667022705, "learning_rate": 4.508231801165758e-06, "loss": 0.2833, "step": 109100 }, { "epoch": 1.07, "grad_norm": 5.725104808807373, "learning_rate": 4.50810767871151e-06, "loss": 0.2427, "step": 109125 }, { "epoch": 1.07, "grad_norm": 17.737533569335938, "learning_rate": 4.507983556257262e-06, "loss": 0.2637, "step": 109150 }, { "epoch": 1.07, "grad_norm": 6.37410306930542, "learning_rate": 4.507859433803013e-06, "loss": 0.2122, "step": 109175 }, { "epoch": 1.07, "grad_norm": 14.108942031860352, "learning_rate": 4.507735311348765e-06, "loss": 0.2444, "step": 109200 }, { "epoch": 1.07, "grad_norm": 6.770502090454102, "learning_rate": 4.507611188894516e-06, "loss": 0.2242, "step": 109225 }, { "epoch": 1.07, "grad_norm": 13.796961784362793, "learning_rate": 4.507487066440267e-06, "loss": 0.2459, "step": 109250 }, { "epoch": 1.07, "grad_norm": 8.102927207946777, "learning_rate": 4.507362943986019e-06, "loss": 0.2253, "step": 109275 }, { "epoch": 1.07, "grad_norm": 10.16893482208252, "learning_rate": 4.507238821531771e-06, "loss": 0.2437, "step": 109300 }, { "epoch": 1.07, "grad_norm": 8.231243133544922, "learning_rate": 4.507114699077522e-06, "loss": 0.2239, "step": 109325 }, { "epoch": 1.08, "grad_norm": 14.771888732910156, "learning_rate": 4.506990576623274e-06, "loss": 0.2719, "step": 109350 }, { "epoch": 1.08, "grad_norm": 5.771956920623779, "learning_rate": 4.506866454169025e-06, "loss": 0.2555, "step": 109375 }, { "epoch": 1.08, "grad_norm": 12.210638046264648, "learning_rate": 4.506742331714777e-06, "loss": 0.2445, "step": 109400 }, { "epoch": 1.08, "grad_norm": 3.052156686782837, "learning_rate": 4.506618209260528e-06, "loss": 0.2714, "step": 109425 }, { "epoch": 1.08, "grad_norm": 17.242502212524414, "learning_rate": 4.50649408680628e-06, "loss": 0.2634, "step": 109450 }, { "epoch": 1.08, "grad_norm": 6.179760932922363, "learning_rate": 4.506369964352032e-06, "loss": 0.2322, "step": 109475 }, { "epoch": 1.08, "grad_norm": 9.976914405822754, "learning_rate": 4.506245841897783e-06, "loss": 0.2525, "step": 109500 }, { "epoch": 1.08, "grad_norm": 6.678314208984375, "learning_rate": 4.506121719443535e-06, "loss": 0.2126, "step": 109525 }, { "epoch": 1.08, "grad_norm": 11.921488761901855, "learning_rate": 4.505997596989286e-06, "loss": 0.2641, "step": 109550 }, { "epoch": 1.08, "grad_norm": 4.772701263427734, "learning_rate": 4.505873474535037e-06, "loss": 0.2342, "step": 109575 }, { "epoch": 1.08, "grad_norm": 16.94615936279297, "learning_rate": 4.505749352080789e-06, "loss": 0.2376, "step": 109600 }, { "epoch": 1.08, "grad_norm": 5.939991474151611, "learning_rate": 4.5056252296265406e-06, "loss": 0.213, "step": 109625 }, { "epoch": 1.08, "grad_norm": 11.325761795043945, "learning_rate": 4.505501107172292e-06, "loss": 0.2053, "step": 109650 }, { "epoch": 1.08, "grad_norm": 6.816395282745361, "learning_rate": 4.505376984718044e-06, "loss": 0.2206, "step": 109675 }, { "epoch": 1.08, "grad_norm": 9.941510200500488, "learning_rate": 4.505252862263795e-06, "loss": 0.2349, "step": 109700 }, { "epoch": 1.08, "grad_norm": 6.9623823165893555, "learning_rate": 4.505128739809546e-06, "loss": 0.2333, "step": 109725 }, { "epoch": 1.08, "grad_norm": 16.703012466430664, "learning_rate": 4.505004617355298e-06, "loss": 0.2469, "step": 109750 }, { "epoch": 1.08, "grad_norm": 4.706454753875732, "learning_rate": 4.5048804949010495e-06, "loss": 0.2304, "step": 109775 }, { "epoch": 1.08, "grad_norm": 18.68450164794922, "learning_rate": 4.504756372446802e-06, "loss": 0.2272, "step": 109800 }, { "epoch": 1.08, "grad_norm": 2.3847763538360596, "learning_rate": 4.504632249992553e-06, "loss": 0.2426, "step": 109825 }, { "epoch": 1.08, "grad_norm": 22.807632446289062, "learning_rate": 4.504508127538305e-06, "loss": 0.2531, "step": 109850 }, { "epoch": 1.08, "grad_norm": 5.878471374511719, "learning_rate": 4.504384005084056e-06, "loss": 0.194, "step": 109875 }, { "epoch": 1.08, "grad_norm": 14.269476890563965, "learning_rate": 4.504259882629808e-06, "loss": 0.3051, "step": 109900 }, { "epoch": 1.08, "grad_norm": 3.0596694946289062, "learning_rate": 4.504135760175559e-06, "loss": 0.2082, "step": 109925 }, { "epoch": 1.08, "grad_norm": 11.797225952148438, "learning_rate": 4.5040116377213106e-06, "loss": 0.1822, "step": 109950 }, { "epoch": 1.08, "grad_norm": 5.313216686248779, "learning_rate": 4.503887515267063e-06, "loss": 0.2487, "step": 109975 }, { "epoch": 1.08, "grad_norm": 9.118577003479004, "learning_rate": 4.503763392812814e-06, "loss": 0.2501, "step": 110000 }, { "epoch": 1.08, "grad_norm": 3.3423972129821777, "learning_rate": 4.503639270358565e-06, "loss": 0.224, "step": 110025 }, { "epoch": 1.08, "grad_norm": 7.4376935958862305, "learning_rate": 4.503515147904317e-06, "loss": 0.247, "step": 110050 }, { "epoch": 1.08, "grad_norm": 4.448740005493164, "learning_rate": 4.503391025450068e-06, "loss": 0.2511, "step": 110075 }, { "epoch": 1.08, "grad_norm": 13.998114585876465, "learning_rate": 4.5032669029958195e-06, "loss": 0.2604, "step": 110100 }, { "epoch": 1.08, "grad_norm": 4.207564353942871, "learning_rate": 4.503142780541572e-06, "loss": 0.2227, "step": 110125 }, { "epoch": 1.08, "grad_norm": 15.78851318359375, "learning_rate": 4.503018658087323e-06, "loss": 0.289, "step": 110150 }, { "epoch": 1.08, "grad_norm": 1.7234994173049927, "learning_rate": 4.502894535633075e-06, "loss": 0.2154, "step": 110175 }, { "epoch": 1.08, "grad_norm": 10.128108978271484, "learning_rate": 4.502770413178826e-06, "loss": 0.226, "step": 110200 }, { "epoch": 1.08, "grad_norm": 15.516324043273926, "learning_rate": 4.502646290724578e-06, "loss": 0.2391, "step": 110225 }, { "epoch": 1.08, "grad_norm": 16.449926376342773, "learning_rate": 4.502522168270329e-06, "loss": 0.2303, "step": 110250 }, { "epoch": 1.08, "grad_norm": 5.295284271240234, "learning_rate": 4.5023980458160805e-06, "loss": 0.2339, "step": 110275 }, { "epoch": 1.08, "grad_norm": 18.50181770324707, "learning_rate": 4.502273923361833e-06, "loss": 0.2272, "step": 110300 }, { "epoch": 1.08, "grad_norm": 3.5240797996520996, "learning_rate": 4.502149800907584e-06, "loss": 0.2071, "step": 110325 }, { "epoch": 1.08, "grad_norm": 12.736327171325684, "learning_rate": 4.502025678453335e-06, "loss": 0.2319, "step": 110350 }, { "epoch": 1.09, "grad_norm": 1.8251219987869263, "learning_rate": 4.501901555999087e-06, "loss": 0.2202, "step": 110375 }, { "epoch": 1.09, "grad_norm": 25.864036560058594, "learning_rate": 4.501777433544838e-06, "loss": 0.2824, "step": 110400 }, { "epoch": 1.09, "grad_norm": 4.849056720733643, "learning_rate": 4.5016533110905895e-06, "loss": 0.1723, "step": 110425 }, { "epoch": 1.09, "grad_norm": 15.556670188903809, "learning_rate": 4.5015291886363416e-06, "loss": 0.2687, "step": 110450 }, { "epoch": 1.09, "grad_norm": 4.461236476898193, "learning_rate": 4.501405066182093e-06, "loss": 0.1968, "step": 110475 }, { "epoch": 1.09, "grad_norm": 21.16875648498535, "learning_rate": 4.501280943727844e-06, "loss": 0.2679, "step": 110500 }, { "epoch": 1.09, "grad_norm": 3.3774020671844482, "learning_rate": 4.501156821273596e-06, "loss": 0.24, "step": 110525 }, { "epoch": 1.09, "grad_norm": 15.583011627197266, "learning_rate": 4.501032698819347e-06, "loss": 0.1901, "step": 110550 }, { "epoch": 1.09, "grad_norm": 5.384138584136963, "learning_rate": 4.500908576365099e-06, "loss": 0.1855, "step": 110575 }, { "epoch": 1.09, "grad_norm": 20.055301666259766, "learning_rate": 4.5007844539108505e-06, "loss": 0.332, "step": 110600 }, { "epoch": 1.09, "grad_norm": 1.0883734226226807, "learning_rate": 4.500660331456603e-06, "loss": 0.2349, "step": 110625 }, { "epoch": 1.09, "grad_norm": 12.927927017211914, "learning_rate": 4.500536209002354e-06, "loss": 0.2229, "step": 110650 }, { "epoch": 1.09, "grad_norm": 6.018649101257324, "learning_rate": 4.500412086548106e-06, "loss": 0.2437, "step": 110675 }, { "epoch": 1.09, "grad_norm": 13.61585521697998, "learning_rate": 4.500287964093857e-06, "loss": 0.2437, "step": 110700 }, { "epoch": 1.09, "grad_norm": 6.412230014801025, "learning_rate": 4.500163841639608e-06, "loss": 0.2628, "step": 110725 }, { "epoch": 1.09, "grad_norm": 14.892928123474121, "learning_rate": 4.50003971918536e-06, "loss": 0.2069, "step": 110750 }, { "epoch": 1.09, "grad_norm": 7.183464527130127, "learning_rate": 4.499920561629281e-06, "loss": 0.2502, "step": 110775 }, { "epoch": 1.09, "grad_norm": 11.168428421020508, "learning_rate": 4.4997964391750326e-06, "loss": 0.2105, "step": 110800 }, { "epoch": 1.09, "grad_norm": 3.2397308349609375, "learning_rate": 4.499672316720785e-06, "loss": 0.1794, "step": 110825 }, { "epoch": 1.09, "grad_norm": 11.757399559020996, "learning_rate": 4.499548194266536e-06, "loss": 0.214, "step": 110850 }, { "epoch": 1.09, "grad_norm": 4.510288238525391, "learning_rate": 4.499424071812287e-06, "loss": 0.1926, "step": 110875 }, { "epoch": 1.09, "grad_norm": 21.09894371032715, "learning_rate": 4.499299949358039e-06, "loss": 0.2375, "step": 110900 }, { "epoch": 1.09, "grad_norm": 7.935361385345459, "learning_rate": 4.49917582690379e-06, "loss": 0.2084, "step": 110925 }, { "epoch": 1.09, "grad_norm": 11.239960670471191, "learning_rate": 4.4990517044495415e-06, "loss": 0.1976, "step": 110950 }, { "epoch": 1.09, "grad_norm": 10.569887161254883, "learning_rate": 4.498927581995294e-06, "loss": 0.2433, "step": 110975 }, { "epoch": 1.09, "grad_norm": 9.010354042053223, "learning_rate": 4.498803459541045e-06, "loss": 0.2122, "step": 111000 }, { "epoch": 1.09, "grad_norm": 6.510087490081787, "learning_rate": 4.498679337086797e-06, "loss": 0.2024, "step": 111025 }, { "epoch": 1.09, "grad_norm": 21.05518341064453, "learning_rate": 4.498555214632548e-06, "loss": 0.2428, "step": 111050 }, { "epoch": 1.09, "grad_norm": 9.804901123046875, "learning_rate": 4.4984310921783e-06, "loss": 0.2502, "step": 111075 }, { "epoch": 1.09, "grad_norm": 18.01642417907715, "learning_rate": 4.498306969724051e-06, "loss": 0.2164, "step": 111100 }, { "epoch": 1.09, "grad_norm": 7.4580864906311035, "learning_rate": 4.498182847269803e-06, "loss": 0.2417, "step": 111125 }, { "epoch": 1.09, "grad_norm": 15.59954833984375, "learning_rate": 4.498058724815555e-06, "loss": 0.249, "step": 111150 }, { "epoch": 1.09, "grad_norm": 4.066530704498291, "learning_rate": 4.497934602361306e-06, "loss": 0.2116, "step": 111175 }, { "epoch": 1.09, "grad_norm": 12.18643856048584, "learning_rate": 4.497810479907058e-06, "loss": 0.1813, "step": 111200 }, { "epoch": 1.09, "grad_norm": 4.278628349304199, "learning_rate": 4.497686357452809e-06, "loss": 0.2243, "step": 111225 }, { "epoch": 1.09, "grad_norm": 19.009973526000977, "learning_rate": 4.49756223499856e-06, "loss": 0.2793, "step": 111250 }, { "epoch": 1.09, "grad_norm": 6.315158843994141, "learning_rate": 4.497438112544312e-06, "loss": 0.2622, "step": 111275 }, { "epoch": 1.09, "grad_norm": 10.401479721069336, "learning_rate": 4.497313990090064e-06, "loss": 0.2376, "step": 111300 }, { "epoch": 1.09, "grad_norm": 3.220996618270874, "learning_rate": 4.497189867635815e-06, "loss": 0.1991, "step": 111325 }, { "epoch": 1.09, "grad_norm": 14.971529006958008, "learning_rate": 4.497065745181567e-06, "loss": 0.2441, "step": 111350 }, { "epoch": 1.1, "grad_norm": 4.773128986358643, "learning_rate": 4.496941622727318e-06, "loss": 0.1865, "step": 111375 }, { "epoch": 1.1, "grad_norm": 8.115482330322266, "learning_rate": 4.496817500273069e-06, "loss": 0.2295, "step": 111400 }, { "epoch": 1.1, "grad_norm": 2.2180838584899902, "learning_rate": 4.496693377818821e-06, "loss": 0.2327, "step": 111425 }, { "epoch": 1.1, "grad_norm": 12.522989273071289, "learning_rate": 4.4965692553645725e-06, "loss": 0.1902, "step": 111450 }, { "epoch": 1.1, "grad_norm": 4.723382472991943, "learning_rate": 4.496445132910325e-06, "loss": 0.2318, "step": 111475 }, { "epoch": 1.1, "grad_norm": 11.558216094970703, "learning_rate": 4.496321010456076e-06, "loss": 0.2372, "step": 111500 }, { "epoch": 1.1, "grad_norm": 5.029779434204102, "learning_rate": 4.496196888001828e-06, "loss": 0.2483, "step": 111525 }, { "epoch": 1.1, "grad_norm": 14.780891418457031, "learning_rate": 4.496072765547579e-06, "loss": 0.2203, "step": 111550 }, { "epoch": 1.1, "grad_norm": 4.782107830047607, "learning_rate": 4.495948643093331e-06, "loss": 0.2257, "step": 111575 }, { "epoch": 1.1, "grad_norm": 8.22297191619873, "learning_rate": 4.495824520639082e-06, "loss": 0.2677, "step": 111600 }, { "epoch": 1.1, "grad_norm": 8.660697937011719, "learning_rate": 4.4957003981848336e-06, "loss": 0.2733, "step": 111625 }, { "epoch": 1.1, "grad_norm": 18.11262321472168, "learning_rate": 4.495576275730585e-06, "loss": 0.2455, "step": 111650 }, { "epoch": 1.1, "grad_norm": 7.720383644104004, "learning_rate": 4.495452153276337e-06, "loss": 0.2408, "step": 111675 }, { "epoch": 1.1, "grad_norm": 17.370304107666016, "learning_rate": 4.495328030822088e-06, "loss": 0.2451, "step": 111700 }, { "epoch": 1.1, "grad_norm": 7.822360515594482, "learning_rate": 4.495203908367839e-06, "loss": 0.2583, "step": 111725 }, { "epoch": 1.1, "grad_norm": 17.024765014648438, "learning_rate": 4.495079785913591e-06, "loss": 0.2378, "step": 111750 }, { "epoch": 1.1, "grad_norm": 5.97878885269165, "learning_rate": 4.4949556634593425e-06, "loss": 0.1867, "step": 111775 }, { "epoch": 1.1, "grad_norm": 16.631196975708008, "learning_rate": 4.494831541005094e-06, "loss": 0.244, "step": 111800 }, { "epoch": 1.1, "grad_norm": 9.459216117858887, "learning_rate": 4.494707418550846e-06, "loss": 0.2203, "step": 111825 }, { "epoch": 1.1, "grad_norm": 15.18989372253418, "learning_rate": 4.494583296096597e-06, "loss": 0.2105, "step": 111850 }, { "epoch": 1.1, "grad_norm": 7.433958053588867, "learning_rate": 4.494459173642349e-06, "loss": 0.2164, "step": 111875 }, { "epoch": 1.1, "grad_norm": 18.722209930419922, "learning_rate": 4.4943350511881e-06, "loss": 0.2224, "step": 111900 }, { "epoch": 1.1, "grad_norm": 2.593123197555542, "learning_rate": 4.494210928733852e-06, "loss": 0.1877, "step": 111925 }, { "epoch": 1.1, "grad_norm": 11.266956329345703, "learning_rate": 4.4940868062796035e-06, "loss": 0.2654, "step": 111950 }, { "epoch": 1.1, "grad_norm": 3.8407773971557617, "learning_rate": 4.493962683825356e-06, "loss": 0.2281, "step": 111975 }, { "epoch": 1.1, "grad_norm": 10.85766315460205, "learning_rate": 4.493838561371107e-06, "loss": 0.2189, "step": 112000 }, { "epoch": 1.1, "grad_norm": 4.1498260498046875, "learning_rate": 4.493714438916858e-06, "loss": 0.2218, "step": 112025 }, { "epoch": 1.1, "grad_norm": 15.885366439819336, "learning_rate": 4.49359031646261e-06, "loss": 0.2618, "step": 112050 }, { "epoch": 1.1, "grad_norm": 5.367768287658691, "learning_rate": 4.493466194008361e-06, "loss": 0.2172, "step": 112075 }, { "epoch": 1.1, "grad_norm": 14.824026107788086, "learning_rate": 4.4933420715541125e-06, "loss": 0.2742, "step": 112100 }, { "epoch": 1.1, "grad_norm": 10.91468334197998, "learning_rate": 4.4932179490998646e-06, "loss": 0.2007, "step": 112125 }, { "epoch": 1.1, "grad_norm": 11.422001838684082, "learning_rate": 4.493093826645616e-06, "loss": 0.2556, "step": 112150 }, { "epoch": 1.1, "grad_norm": 6.281533718109131, "learning_rate": 4.492969704191367e-06, "loss": 0.2116, "step": 112175 }, { "epoch": 1.1, "grad_norm": 15.123581886291504, "learning_rate": 4.492845581737119e-06, "loss": 0.2491, "step": 112200 }, { "epoch": 1.1, "grad_norm": 13.68552017211914, "learning_rate": 4.49272145928287e-06, "loss": 0.2391, "step": 112225 }, { "epoch": 1.1, "grad_norm": 12.212230682373047, "learning_rate": 4.4925973368286215e-06, "loss": 0.269, "step": 112250 }, { "epoch": 1.1, "grad_norm": 7.177237510681152, "learning_rate": 4.4924732143743735e-06, "loss": 0.2201, "step": 112275 }, { "epoch": 1.1, "grad_norm": 17.77448844909668, "learning_rate": 4.492349091920125e-06, "loss": 0.2534, "step": 112300 }, { "epoch": 1.1, "grad_norm": 3.8376429080963135, "learning_rate": 4.492224969465877e-06, "loss": 0.1949, "step": 112325 }, { "epoch": 1.1, "grad_norm": 15.494226455688477, "learning_rate": 4.492100847011628e-06, "loss": 0.2538, "step": 112350 }, { "epoch": 1.1, "grad_norm": 2.270728588104248, "learning_rate": 4.49197672455738e-06, "loss": 0.2057, "step": 112375 }, { "epoch": 1.11, "grad_norm": 15.73924446105957, "learning_rate": 4.491852602103131e-06, "loss": 0.2888, "step": 112400 }, { "epoch": 1.11, "grad_norm": 3.462629556655884, "learning_rate": 4.491728479648883e-06, "loss": 0.2372, "step": 112425 }, { "epoch": 1.11, "grad_norm": 10.243722915649414, "learning_rate": 4.4916043571946346e-06, "loss": 0.1808, "step": 112450 }, { "epoch": 1.11, "grad_norm": 4.733704090118408, "learning_rate": 4.491480234740386e-06, "loss": 0.2173, "step": 112475 }, { "epoch": 1.11, "grad_norm": 13.095016479492188, "learning_rate": 4.491356112286137e-06, "loss": 0.2392, "step": 112500 }, { "epoch": 1.11, "grad_norm": 6.072172164916992, "learning_rate": 4.491231989831889e-06, "loss": 0.2815, "step": 112525 }, { "epoch": 1.11, "grad_norm": 12.916955947875977, "learning_rate": 4.49110786737764e-06, "loss": 0.2459, "step": 112550 }, { "epoch": 1.11, "grad_norm": 0.4725329577922821, "learning_rate": 4.4909837449233915e-06, "loss": 0.1927, "step": 112575 }, { "epoch": 1.11, "grad_norm": 12.111724853515625, "learning_rate": 4.4908596224691435e-06, "loss": 0.2017, "step": 112600 }, { "epoch": 1.11, "grad_norm": 6.642116546630859, "learning_rate": 4.490735500014895e-06, "loss": 0.2299, "step": 112625 }, { "epoch": 1.11, "grad_norm": 11.293473243713379, "learning_rate": 4.490611377560646e-06, "loss": 0.269, "step": 112650 }, { "epoch": 1.11, "grad_norm": 12.462928771972656, "learning_rate": 4.490487255106398e-06, "loss": 0.2331, "step": 112675 }, { "epoch": 1.11, "grad_norm": 8.49095344543457, "learning_rate": 4.490363132652149e-06, "loss": 0.1949, "step": 112700 }, { "epoch": 1.11, "grad_norm": 2.9803624153137207, "learning_rate": 4.490239010197901e-06, "loss": 0.2744, "step": 112725 }, { "epoch": 1.11, "grad_norm": 9.405791282653809, "learning_rate": 4.4901148877436525e-06, "loss": 0.2349, "step": 112750 }, { "epoch": 1.11, "grad_norm": 2.7289814949035645, "learning_rate": 4.4899907652894045e-06, "loss": 0.237, "step": 112775 }, { "epoch": 1.11, "grad_norm": 24.67723274230957, "learning_rate": 4.489866642835156e-06, "loss": 0.2532, "step": 112800 }, { "epoch": 1.11, "grad_norm": 5.447256565093994, "learning_rate": 4.489742520380908e-06, "loss": 0.2187, "step": 112825 }, { "epoch": 1.11, "grad_norm": 17.12317657470703, "learning_rate": 4.489618397926659e-06, "loss": 0.2432, "step": 112850 }, { "epoch": 1.11, "grad_norm": 2.5739753246307373, "learning_rate": 4.48949427547241e-06, "loss": 0.1801, "step": 112875 }, { "epoch": 1.11, "grad_norm": 13.133005142211914, "learning_rate": 4.489370153018162e-06, "loss": 0.2288, "step": 112900 }, { "epoch": 1.11, "grad_norm": 7.867350101470947, "learning_rate": 4.489250995462083e-06, "loss": 0.2039, "step": 112925 }, { "epoch": 1.11, "grad_norm": 18.22170639038086, "learning_rate": 4.489126873007835e-06, "loss": 0.2759, "step": 112950 }, { "epoch": 1.11, "grad_norm": 3.6106228828430176, "learning_rate": 4.489002750553587e-06, "loss": 0.2084, "step": 112975 }, { "epoch": 1.11, "grad_norm": 14.874045372009277, "learning_rate": 4.488878628099338e-06, "loss": 0.206, "step": 113000 }, { "epoch": 1.11, "grad_norm": 7.999373435974121, "learning_rate": 4.48875450564509e-06, "loss": 0.2462, "step": 113025 }, { "epoch": 1.11, "grad_norm": 13.984731674194336, "learning_rate": 4.488630383190841e-06, "loss": 0.222, "step": 113050 }, { "epoch": 1.11, "grad_norm": 4.920877933502197, "learning_rate": 4.488506260736592e-06, "loss": 0.2261, "step": 113075 }, { "epoch": 1.11, "grad_norm": 11.05852222442627, "learning_rate": 4.488382138282344e-06, "loss": 0.2345, "step": 113100 }, { "epoch": 1.11, "grad_norm": 3.3547637462615967, "learning_rate": 4.4882580158280955e-06, "loss": 0.1991, "step": 113125 }, { "epoch": 1.11, "grad_norm": 11.124029159545898, "learning_rate": 4.488133893373848e-06, "loss": 0.2943, "step": 113150 }, { "epoch": 1.11, "grad_norm": 7.181310176849365, "learning_rate": 4.488009770919599e-06, "loss": 0.2272, "step": 113175 }, { "epoch": 1.11, "grad_norm": 6.810547828674316, "learning_rate": 4.487885648465351e-06, "loss": 0.2418, "step": 113200 }, { "epoch": 1.11, "grad_norm": 4.119988441467285, "learning_rate": 4.487761526011102e-06, "loss": 0.2008, "step": 113225 }, { "epoch": 1.11, "grad_norm": 15.32079029083252, "learning_rate": 4.487637403556853e-06, "loss": 0.2131, "step": 113250 }, { "epoch": 1.11, "grad_norm": 6.589697360992432, "learning_rate": 4.487513281102605e-06, "loss": 0.2256, "step": 113275 }, { "epoch": 1.11, "grad_norm": 28.661334991455078, "learning_rate": 4.4873891586483566e-06, "loss": 0.203, "step": 113300 }, { "epoch": 1.11, "grad_norm": 7.459014415740967, "learning_rate": 4.487265036194108e-06, "loss": 0.1901, "step": 113325 }, { "epoch": 1.11, "grad_norm": 13.187843322753906, "learning_rate": 4.48714091373986e-06, "loss": 0.2591, "step": 113350 }, { "epoch": 1.11, "grad_norm": 3.216261863708496, "learning_rate": 4.487016791285611e-06, "loss": 0.2276, "step": 113375 }, { "epoch": 1.11, "grad_norm": 13.941329956054688, "learning_rate": 4.486892668831362e-06, "loss": 0.2219, "step": 113400 }, { "epoch": 1.12, "grad_norm": 3.0199363231658936, "learning_rate": 4.486768546377114e-06, "loss": 0.1977, "step": 113425 }, { "epoch": 1.12, "grad_norm": 5.09420108795166, "learning_rate": 4.4866444239228655e-06, "loss": 0.2094, "step": 113450 }, { "epoch": 1.12, "grad_norm": 3.9360241889953613, "learning_rate": 4.486520301468617e-06, "loss": 0.2049, "step": 113475 }, { "epoch": 1.12, "grad_norm": 17.38736915588379, "learning_rate": 4.486396179014369e-06, "loss": 0.2443, "step": 113500 }, { "epoch": 1.12, "grad_norm": 6.097104072570801, "learning_rate": 4.48627205656012e-06, "loss": 0.2155, "step": 113525 }, { "epoch": 1.12, "grad_norm": 15.84929084777832, "learning_rate": 4.486147934105872e-06, "loss": 0.2942, "step": 113550 }, { "epoch": 1.12, "grad_norm": 4.964081764221191, "learning_rate": 4.486023811651623e-06, "loss": 0.2081, "step": 113575 }, { "epoch": 1.12, "grad_norm": 14.745463371276855, "learning_rate": 4.485899689197375e-06, "loss": 0.2184, "step": 113600 }, { "epoch": 1.12, "grad_norm": 5.285598278045654, "learning_rate": 4.4857755667431266e-06, "loss": 0.2344, "step": 113625 }, { "epoch": 1.12, "grad_norm": 10.558722496032715, "learning_rate": 4.485651444288879e-06, "loss": 0.2693, "step": 113650 }, { "epoch": 1.12, "grad_norm": 5.822566032409668, "learning_rate": 4.48552732183463e-06, "loss": 0.214, "step": 113675 }, { "epoch": 1.12, "grad_norm": 11.516705513000488, "learning_rate": 4.485403199380381e-06, "loss": 0.2237, "step": 113700 }, { "epoch": 1.12, "grad_norm": 12.528892517089844, "learning_rate": 4.485279076926133e-06, "loss": 0.2033, "step": 113725 }, { "epoch": 1.12, "grad_norm": 16.216838836669922, "learning_rate": 4.485154954471884e-06, "loss": 0.2539, "step": 113750 }, { "epoch": 1.12, "grad_norm": 7.386081218719482, "learning_rate": 4.4850308320176355e-06, "loss": 0.2219, "step": 113775 }, { "epoch": 1.12, "grad_norm": 12.074861526489258, "learning_rate": 4.484906709563388e-06, "loss": 0.2489, "step": 113800 }, { "epoch": 1.12, "grad_norm": 7.93116569519043, "learning_rate": 4.484782587109139e-06, "loss": 0.2432, "step": 113825 }, { "epoch": 1.12, "grad_norm": 13.332633972167969, "learning_rate": 4.48465846465489e-06, "loss": 0.2777, "step": 113850 }, { "epoch": 1.12, "grad_norm": 4.668777942657471, "learning_rate": 4.484534342200642e-06, "loss": 0.241, "step": 113875 }, { "epoch": 1.12, "grad_norm": 14.237258911132812, "learning_rate": 4.484410219746393e-06, "loss": 0.2339, "step": 113900 }, { "epoch": 1.12, "grad_norm": 4.67604398727417, "learning_rate": 4.4842860972921445e-06, "loss": 0.2211, "step": 113925 }, { "epoch": 1.12, "grad_norm": 9.481017112731934, "learning_rate": 4.4841619748378965e-06, "loss": 0.2155, "step": 113950 }, { "epoch": 1.12, "grad_norm": 3.500735282897949, "learning_rate": 4.484037852383648e-06, "loss": 0.2169, "step": 113975 }, { "epoch": 1.12, "grad_norm": 10.384133338928223, "learning_rate": 4.4839137299294e-06, "loss": 0.2332, "step": 114000 }, { "epoch": 1.12, "grad_norm": 18.43089485168457, "learning_rate": 4.483789607475151e-06, "loss": 0.2333, "step": 114025 }, { "epoch": 1.12, "grad_norm": 22.031963348388672, "learning_rate": 4.483665485020903e-06, "loss": 0.244, "step": 114050 }, { "epoch": 1.12, "grad_norm": 8.574146270751953, "learning_rate": 4.483541362566654e-06, "loss": 0.2177, "step": 114075 }, { "epoch": 1.12, "grad_norm": 11.992765426635742, "learning_rate": 4.4834172401124055e-06, "loss": 0.2574, "step": 114100 }, { "epoch": 1.12, "grad_norm": 7.1717939376831055, "learning_rate": 4.4832931176581576e-06, "loss": 0.1958, "step": 114125 }, { "epoch": 1.12, "grad_norm": 18.478017807006836, "learning_rate": 4.483168995203909e-06, "loss": 0.242, "step": 114150 }, { "epoch": 1.12, "grad_norm": 12.858627319335938, "learning_rate": 4.48304487274966e-06, "loss": 0.2594, "step": 114175 }, { "epoch": 1.12, "grad_norm": 12.6348237991333, "learning_rate": 4.482920750295412e-06, "loss": 0.2503, "step": 114200 }, { "epoch": 1.12, "grad_norm": 18.669628143310547, "learning_rate": 4.482796627841163e-06, "loss": 0.2193, "step": 114225 }, { "epoch": 1.12, "grad_norm": 10.637825012207031, "learning_rate": 4.4826725053869145e-06, "loss": 0.2439, "step": 114250 }, { "epoch": 1.12, "grad_norm": 5.527063369750977, "learning_rate": 4.4825483829326665e-06, "loss": 0.2083, "step": 114275 }, { "epoch": 1.12, "grad_norm": 12.6210355758667, "learning_rate": 4.482424260478418e-06, "loss": 0.2244, "step": 114300 }, { "epoch": 1.12, "grad_norm": 2.9300594329833984, "learning_rate": 4.482300138024169e-06, "loss": 0.228, "step": 114325 }, { "epoch": 1.12, "grad_norm": 10.90460205078125, "learning_rate": 4.482176015569921e-06, "loss": 0.2328, "step": 114350 }, { "epoch": 1.12, "grad_norm": 6.719529628753662, "learning_rate": 4.482051893115672e-06, "loss": 0.1998, "step": 114375 }, { "epoch": 1.12, "grad_norm": 16.728689193725586, "learning_rate": 4.481927770661424e-06, "loss": 0.2571, "step": 114400 }, { "epoch": 1.13, "grad_norm": 11.221575736999512, "learning_rate": 4.4818036482071755e-06, "loss": 0.1988, "step": 114425 }, { "epoch": 1.13, "grad_norm": 13.941556930541992, "learning_rate": 4.4816795257529275e-06, "loss": 0.2148, "step": 114450 }, { "epoch": 1.13, "grad_norm": 4.66641902923584, "learning_rate": 4.481555403298679e-06, "loss": 0.1934, "step": 114475 }, { "epoch": 1.13, "grad_norm": 21.769454956054688, "learning_rate": 4.481431280844431e-06, "loss": 0.2446, "step": 114500 }, { "epoch": 1.13, "grad_norm": 3.0362462997436523, "learning_rate": 4.481307158390182e-06, "loss": 0.2164, "step": 114525 }, { "epoch": 1.13, "grad_norm": 11.789581298828125, "learning_rate": 4.481183035935933e-06, "loss": 0.224, "step": 114550 }, { "epoch": 1.13, "grad_norm": 3.9132773876190186, "learning_rate": 4.481058913481685e-06, "loss": 0.2268, "step": 114575 }, { "epoch": 1.13, "grad_norm": 17.825387954711914, "learning_rate": 4.4809347910274365e-06, "loss": 0.2481, "step": 114600 }, { "epoch": 1.13, "grad_norm": 8.121390342712402, "learning_rate": 4.480810668573188e-06, "loss": 0.2423, "step": 114625 }, { "epoch": 1.13, "grad_norm": 14.971010208129883, "learning_rate": 4.48068654611894e-06, "loss": 0.2711, "step": 114650 }, { "epoch": 1.13, "grad_norm": 1.7376747131347656, "learning_rate": 4.480562423664691e-06, "loss": 0.2538, "step": 114675 }, { "epoch": 1.13, "grad_norm": 13.858798027038574, "learning_rate": 4.480438301210442e-06, "loss": 0.2295, "step": 114700 }, { "epoch": 1.13, "grad_norm": 4.138721942901611, "learning_rate": 4.480314178756194e-06, "loss": 0.2207, "step": 114725 }, { "epoch": 1.13, "grad_norm": 7.267801761627197, "learning_rate": 4.4801900563019455e-06, "loss": 0.2145, "step": 114750 }, { "epoch": 1.13, "grad_norm": 6.451443195343018, "learning_rate": 4.480065933847697e-06, "loss": 0.19, "step": 114775 }, { "epoch": 1.13, "grad_norm": 15.305608749389648, "learning_rate": 4.479941811393449e-06, "loss": 0.2902, "step": 114800 }, { "epoch": 1.13, "grad_norm": 3.3827099800109863, "learning_rate": 4.4798176889392e-06, "loss": 0.2198, "step": 114825 }, { "epoch": 1.13, "grad_norm": 7.436825275421143, "learning_rate": 4.479693566484952e-06, "loss": 0.2012, "step": 114850 }, { "epoch": 1.13, "grad_norm": 5.568363666534424, "learning_rate": 4.479569444030703e-06, "loss": 0.2187, "step": 114875 }, { "epoch": 1.13, "grad_norm": 8.679008483886719, "learning_rate": 4.479445321576455e-06, "loss": 0.2623, "step": 114900 }, { "epoch": 1.13, "grad_norm": 9.644526481628418, "learning_rate": 4.4793211991222065e-06, "loss": 0.2418, "step": 114925 }, { "epoch": 1.13, "grad_norm": 12.651623725891113, "learning_rate": 4.479202041566128e-06, "loss": 0.2978, "step": 114950 }, { "epoch": 1.13, "grad_norm": 7.549099922180176, "learning_rate": 4.47907791911188e-06, "loss": 0.2294, "step": 114975 }, { "epoch": 1.13, "grad_norm": 12.803536415100098, "learning_rate": 4.478953796657631e-06, "loss": 0.2356, "step": 115000 }, { "epoch": 1.13, "grad_norm": 8.101631164550781, "learning_rate": 4.478829674203383e-06, "loss": 0.2073, "step": 115025 }, { "epoch": 1.13, "grad_norm": 15.520109176635742, "learning_rate": 4.478705551749134e-06, "loss": 0.2467, "step": 115050 }, { "epoch": 1.13, "grad_norm": 8.513771057128906, "learning_rate": 4.478581429294885e-06, "loss": 0.2781, "step": 115075 }, { "epoch": 1.13, "grad_norm": 14.430205345153809, "learning_rate": 4.478457306840637e-06, "loss": 0.261, "step": 115100 }, { "epoch": 1.13, "grad_norm": 0.8046502470970154, "learning_rate": 4.4783331843863885e-06, "loss": 0.2109, "step": 115125 }, { "epoch": 1.13, "grad_norm": 11.650810241699219, "learning_rate": 4.47820906193214e-06, "loss": 0.2331, "step": 115150 }, { "epoch": 1.13, "grad_norm": 6.428269386291504, "learning_rate": 4.478084939477892e-06, "loss": 0.2258, "step": 115175 }, { "epoch": 1.13, "grad_norm": 19.28970718383789, "learning_rate": 4.477960817023643e-06, "loss": 0.2439, "step": 115200 }, { "epoch": 1.13, "grad_norm": 2.543017625808716, "learning_rate": 4.477836694569394e-06, "loss": 0.2206, "step": 115225 }, { "epoch": 1.13, "grad_norm": 15.787814140319824, "learning_rate": 4.477712572115146e-06, "loss": 0.239, "step": 115250 }, { "epoch": 1.13, "grad_norm": 7.605461120605469, "learning_rate": 4.4775884496608975e-06, "loss": 0.24, "step": 115275 }, { "epoch": 1.13, "grad_norm": 21.48152732849121, "learning_rate": 4.4774643272066496e-06, "loss": 0.2464, "step": 115300 }, { "epoch": 1.13, "grad_norm": 7.066677570343018, "learning_rate": 4.477340204752401e-06, "loss": 0.1948, "step": 115325 }, { "epoch": 1.13, "grad_norm": 19.032955169677734, "learning_rate": 4.477216082298153e-06, "loss": 0.2616, "step": 115350 }, { "epoch": 1.13, "grad_norm": 11.658295631408691, "learning_rate": 4.477091959843904e-06, "loss": 0.1939, "step": 115375 }, { "epoch": 1.13, "grad_norm": 17.25507926940918, "learning_rate": 4.476967837389655e-06, "loss": 0.3341, "step": 115400 }, { "epoch": 1.13, "grad_norm": 6.849422931671143, "learning_rate": 4.476843714935407e-06, "loss": 0.1939, "step": 115425 }, { "epoch": 1.14, "grad_norm": 16.905136108398438, "learning_rate": 4.4767195924811585e-06, "loss": 0.2782, "step": 115450 }, { "epoch": 1.14, "grad_norm": 9.468233108520508, "learning_rate": 4.47659547002691e-06, "loss": 0.2547, "step": 115475 }, { "epoch": 1.14, "grad_norm": 10.353633880615234, "learning_rate": 4.476471347572662e-06, "loss": 0.2406, "step": 115500 }, { "epoch": 1.14, "grad_norm": 3.288904905319214, "learning_rate": 4.476347225118413e-06, "loss": 0.2337, "step": 115525 }, { "epoch": 1.14, "grad_norm": 12.628498077392578, "learning_rate": 4.476223102664164e-06, "loss": 0.2006, "step": 115550 }, { "epoch": 1.14, "grad_norm": 8.305684089660645, "learning_rate": 4.476098980209916e-06, "loss": 0.2283, "step": 115575 }, { "epoch": 1.14, "grad_norm": 15.867902755737305, "learning_rate": 4.4759748577556675e-06, "loss": 0.2529, "step": 115600 }, { "epoch": 1.14, "grad_norm": 1.7912392616271973, "learning_rate": 4.475850735301419e-06, "loss": 0.2547, "step": 115625 }, { "epoch": 1.14, "grad_norm": 11.94015884399414, "learning_rate": 4.475726612847171e-06, "loss": 0.2315, "step": 115650 }, { "epoch": 1.14, "grad_norm": 8.06818675994873, "learning_rate": 4.475602490392922e-06, "loss": 0.2239, "step": 115675 }, { "epoch": 1.14, "grad_norm": 14.553339958190918, "learning_rate": 4.475478367938674e-06, "loss": 0.2261, "step": 115700 }, { "epoch": 1.14, "grad_norm": 5.56470251083374, "learning_rate": 4.475354245484425e-06, "loss": 0.2305, "step": 115725 }, { "epoch": 1.14, "grad_norm": 15.894433975219727, "learning_rate": 4.475230123030177e-06, "loss": 0.3013, "step": 115750 }, { "epoch": 1.14, "grad_norm": 5.754423141479492, "learning_rate": 4.4751060005759285e-06, "loss": 0.2449, "step": 115775 }, { "epoch": 1.14, "grad_norm": 13.456670761108398, "learning_rate": 4.4749818781216806e-06, "loss": 0.2585, "step": 115800 }, { "epoch": 1.14, "grad_norm": 4.146223545074463, "learning_rate": 4.474857755667432e-06, "loss": 0.2814, "step": 115825 }, { "epoch": 1.14, "grad_norm": 12.906267166137695, "learning_rate": 4.474733633213183e-06, "loss": 0.2275, "step": 115850 }, { "epoch": 1.14, "grad_norm": 14.185441970825195, "learning_rate": 4.474609510758935e-06, "loss": 0.2583, "step": 115875 }, { "epoch": 1.14, "grad_norm": 15.713212966918945, "learning_rate": 4.474485388304686e-06, "loss": 0.2993, "step": 115900 }, { "epoch": 1.14, "grad_norm": 5.5900068283081055, "learning_rate": 4.4743612658504375e-06, "loss": 0.202, "step": 115925 }, { "epoch": 1.14, "grad_norm": 18.850299835205078, "learning_rate": 4.4742371433961895e-06, "loss": 0.2493, "step": 115950 }, { "epoch": 1.14, "grad_norm": 6.531790256500244, "learning_rate": 4.474113020941941e-06, "loss": 0.2245, "step": 115975 }, { "epoch": 1.14, "grad_norm": 15.566919326782227, "learning_rate": 4.473988898487692e-06, "loss": 0.2597, "step": 116000 }, { "epoch": 1.14, "grad_norm": 12.963507652282715, "learning_rate": 4.473864776033444e-06, "loss": 0.2117, "step": 116025 }, { "epoch": 1.14, "grad_norm": 16.550207138061523, "learning_rate": 4.473740653579195e-06, "loss": 0.2874, "step": 116050 }, { "epoch": 1.14, "grad_norm": 9.48051929473877, "learning_rate": 4.473616531124947e-06, "loss": 0.2347, "step": 116075 }, { "epoch": 1.14, "grad_norm": 11.049368858337402, "learning_rate": 4.4734924086706985e-06, "loss": 0.2663, "step": 116100 }, { "epoch": 1.14, "grad_norm": 5.563664436340332, "learning_rate": 4.4733682862164506e-06, "loss": 0.2001, "step": 116125 }, { "epoch": 1.14, "grad_norm": 14.04527473449707, "learning_rate": 4.473244163762202e-06, "loss": 0.2227, "step": 116150 }, { "epoch": 1.14, "grad_norm": 7.6476898193359375, "learning_rate": 4.473120041307954e-06, "loss": 0.2323, "step": 116175 }, { "epoch": 1.14, "grad_norm": 7.8854522705078125, "learning_rate": 4.472995918853705e-06, "loss": 0.2587, "step": 116200 }, { "epoch": 1.14, "grad_norm": 3.927236318588257, "learning_rate": 4.472871796399456e-06, "loss": 0.186, "step": 116225 }, { "epoch": 1.14, "grad_norm": 15.77586555480957, "learning_rate": 4.4727476739452075e-06, "loss": 0.22, "step": 116250 }, { "epoch": 1.14, "grad_norm": 13.096461296081543, "learning_rate": 4.4726235514909595e-06, "loss": 0.2338, "step": 116275 }, { "epoch": 1.14, "grad_norm": 15.971858978271484, "learning_rate": 4.472499429036711e-06, "loss": 0.221, "step": 116300 }, { "epoch": 1.14, "grad_norm": 5.40244722366333, "learning_rate": 4.472375306582462e-06, "loss": 0.2435, "step": 116325 }, { "epoch": 1.14, "grad_norm": 10.954453468322754, "learning_rate": 4.472251184128214e-06, "loss": 0.2719, "step": 116350 }, { "epoch": 1.14, "grad_norm": 3.5759501457214355, "learning_rate": 4.472127061673965e-06, "loss": 0.2021, "step": 116375 }, { "epoch": 1.14, "grad_norm": 7.373152732849121, "learning_rate": 4.472002939219716e-06, "loss": 0.237, "step": 116400 }, { "epoch": 1.14, "grad_norm": 4.979018211364746, "learning_rate": 4.4718788167654685e-06, "loss": 0.2303, "step": 116425 }, { "epoch": 1.14, "grad_norm": 12.652438163757324, "learning_rate": 4.47175469431122e-06, "loss": 0.2663, "step": 116450 }, { "epoch": 1.15, "grad_norm": 8.761533737182617, "learning_rate": 4.471630571856972e-06, "loss": 0.2332, "step": 116475 }, { "epoch": 1.15, "grad_norm": 16.73120880126953, "learning_rate": 4.471506449402723e-06, "loss": 0.1942, "step": 116500 }, { "epoch": 1.15, "grad_norm": 2.4868221282958984, "learning_rate": 4.471382326948475e-06, "loss": 0.1877, "step": 116525 }, { "epoch": 1.15, "grad_norm": 16.75344467163086, "learning_rate": 4.471258204494226e-06, "loss": 0.2145, "step": 116550 }, { "epoch": 1.15, "grad_norm": 6.9057207107543945, "learning_rate": 4.471134082039978e-06, "loss": 0.2048, "step": 116575 }, { "epoch": 1.15, "grad_norm": 17.072307586669922, "learning_rate": 4.4710099595857295e-06, "loss": 0.2609, "step": 116600 }, { "epoch": 1.15, "grad_norm": 2.296383857727051, "learning_rate": 4.470885837131481e-06, "loss": 0.2385, "step": 116625 }, { "epoch": 1.15, "grad_norm": 9.877859115600586, "learning_rate": 4.470761714677233e-06, "loss": 0.2906, "step": 116650 }, { "epoch": 1.15, "grad_norm": 6.757360458374023, "learning_rate": 4.470637592222984e-06, "loss": 0.2482, "step": 116675 }, { "epoch": 1.15, "grad_norm": 13.08565902709961, "learning_rate": 4.470513469768735e-06, "loss": 0.2629, "step": 116700 }, { "epoch": 1.15, "grad_norm": 7.252120018005371, "learning_rate": 4.470389347314487e-06, "loss": 0.2013, "step": 116725 }, { "epoch": 1.15, "grad_norm": 11.475332260131836, "learning_rate": 4.4702652248602385e-06, "loss": 0.2595, "step": 116750 }, { "epoch": 1.15, "grad_norm": 7.406757831573486, "learning_rate": 4.47014110240599e-06, "loss": 0.2188, "step": 116775 }, { "epoch": 1.15, "grad_norm": 13.238475799560547, "learning_rate": 4.470016979951742e-06, "loss": 0.2249, "step": 116800 }, { "epoch": 1.15, "grad_norm": 7.189756870269775, "learning_rate": 4.469892857497493e-06, "loss": 0.2126, "step": 116825 }, { "epoch": 1.15, "grad_norm": 16.06045150756836, "learning_rate": 4.469768735043244e-06, "loss": 0.2307, "step": 116850 }, { "epoch": 1.15, "grad_norm": 3.560784339904785, "learning_rate": 4.469644612588996e-06, "loss": 0.2408, "step": 116875 }, { "epoch": 1.15, "grad_norm": 13.706376075744629, "learning_rate": 4.4695204901347474e-06, "loss": 0.2345, "step": 116900 }, { "epoch": 1.15, "grad_norm": 7.161724090576172, "learning_rate": 4.4693963676804995e-06, "loss": 0.2311, "step": 116925 }, { "epoch": 1.15, "grad_norm": 14.897126197814941, "learning_rate": 4.469272245226251e-06, "loss": 0.2119, "step": 116950 }, { "epoch": 1.15, "grad_norm": 5.360987663269043, "learning_rate": 4.469148122772003e-06, "loss": 0.2274, "step": 116975 }, { "epoch": 1.15, "grad_norm": 7.646580219268799, "learning_rate": 4.469024000317754e-06, "loss": 0.2517, "step": 117000 }, { "epoch": 1.15, "grad_norm": 4.07539701461792, "learning_rate": 4.468899877863506e-06, "loss": 0.2023, "step": 117025 }, { "epoch": 1.15, "grad_norm": 9.744495391845703, "learning_rate": 4.468775755409257e-06, "loss": 0.2559, "step": 117050 }, { "epoch": 1.15, "grad_norm": 1.2778955698013306, "learning_rate": 4.4686516329550084e-06, "loss": 0.1975, "step": 117075 }, { "epoch": 1.15, "grad_norm": 10.594094276428223, "learning_rate": 4.46852751050076e-06, "loss": 0.2603, "step": 117100 }, { "epoch": 1.15, "grad_norm": 5.3822221755981445, "learning_rate": 4.468403388046512e-06, "loss": 0.2596, "step": 117125 }, { "epoch": 1.15, "grad_norm": 15.428547859191895, "learning_rate": 4.468279265592263e-06, "loss": 0.2531, "step": 117150 }, { "epoch": 1.15, "grad_norm": 1.8775235414505005, "learning_rate": 4.468155143138014e-06, "loss": 0.2051, "step": 117175 }, { "epoch": 1.15, "grad_norm": 12.163172721862793, "learning_rate": 4.468031020683766e-06, "loss": 0.2524, "step": 117200 }, { "epoch": 1.15, "grad_norm": 7.766628742218018, "learning_rate": 4.467906898229517e-06, "loss": 0.2382, "step": 117225 }, { "epoch": 1.15, "grad_norm": 15.7675142288208, "learning_rate": 4.467782775775269e-06, "loss": 0.288, "step": 117250 }, { "epoch": 1.15, "grad_norm": 3.187128782272339, "learning_rate": 4.467658653321021e-06, "loss": 0.1823, "step": 117275 }, { "epoch": 1.15, "grad_norm": 14.941460609436035, "learning_rate": 4.467534530866772e-06, "loss": 0.2419, "step": 117300 }, { "epoch": 1.15, "grad_norm": 4.454707145690918, "learning_rate": 4.467410408412524e-06, "loss": 0.1975, "step": 117325 }, { "epoch": 1.15, "grad_norm": 16.269968032836914, "learning_rate": 4.467286285958275e-06, "loss": 0.2501, "step": 117350 }, { "epoch": 1.15, "grad_norm": 7.750811576843262, "learning_rate": 4.467162163504027e-06, "loss": 0.2477, "step": 117375 }, { "epoch": 1.15, "grad_norm": 13.888630867004395, "learning_rate": 4.4670380410497784e-06, "loss": 0.2729, "step": 117400 }, { "epoch": 1.15, "grad_norm": 7.62655782699585, "learning_rate": 4.4669188834937e-06, "loss": 0.2398, "step": 117425 }, { "epoch": 1.15, "grad_norm": 9.945452690124512, "learning_rate": 4.4667947610394515e-06, "loss": 0.2216, "step": 117450 }, { "epoch": 1.16, "grad_norm": 9.340754508972168, "learning_rate": 4.4666706385852036e-06, "loss": 0.2381, "step": 117475 }, { "epoch": 1.16, "grad_norm": 11.04405403137207, "learning_rate": 4.466546516130955e-06, "loss": 0.2503, "step": 117500 }, { "epoch": 1.16, "grad_norm": 6.978420257568359, "learning_rate": 4.466422393676706e-06, "loss": 0.2152, "step": 117525 }, { "epoch": 1.16, "grad_norm": 12.3898344039917, "learning_rate": 4.466298271222458e-06, "loss": 0.227, "step": 117550 }, { "epoch": 1.16, "grad_norm": 3.2191765308380127, "learning_rate": 4.466174148768209e-06, "loss": 0.2031, "step": 117575 }, { "epoch": 1.16, "grad_norm": 13.501291275024414, "learning_rate": 4.4660500263139605e-06, "loss": 0.2253, "step": 117600 }, { "epoch": 1.16, "grad_norm": 2.251424789428711, "learning_rate": 4.4659259038597125e-06, "loss": 0.2121, "step": 117625 }, { "epoch": 1.16, "grad_norm": 13.501798629760742, "learning_rate": 4.465801781405464e-06, "loss": 0.2553, "step": 117650 }, { "epoch": 1.16, "grad_norm": 3.8259353637695312, "learning_rate": 4.465677658951215e-06, "loss": 0.2192, "step": 117675 }, { "epoch": 1.16, "grad_norm": 15.09561824798584, "learning_rate": 4.465553536496966e-06, "loss": 0.2424, "step": 117700 }, { "epoch": 1.16, "grad_norm": 6.5654520988464355, "learning_rate": 4.465429414042718e-06, "loss": 0.1995, "step": 117725 }, { "epoch": 1.16, "grad_norm": 10.421347618103027, "learning_rate": 4.4653052915884694e-06, "loss": 0.207, "step": 117750 }, { "epoch": 1.16, "grad_norm": 3.3223609924316406, "learning_rate": 4.4651811691342215e-06, "loss": 0.191, "step": 117775 }, { "epoch": 1.16, "grad_norm": 8.346799850463867, "learning_rate": 4.465057046679973e-06, "loss": 0.2138, "step": 117800 }, { "epoch": 1.16, "grad_norm": 11.866011619567871, "learning_rate": 4.464932924225725e-06, "loss": 0.2206, "step": 117825 }, { "epoch": 1.16, "grad_norm": 13.366493225097656, "learning_rate": 4.464808801771476e-06, "loss": 0.2246, "step": 117850 }, { "epoch": 1.16, "grad_norm": 6.7573933601379395, "learning_rate": 4.464684679317228e-06, "loss": 0.288, "step": 117875 }, { "epoch": 1.16, "grad_norm": 18.50559425354004, "learning_rate": 4.464560556862979e-06, "loss": 0.2196, "step": 117900 }, { "epoch": 1.16, "grad_norm": 4.549787998199463, "learning_rate": 4.4644364344087305e-06, "loss": 0.2156, "step": 117925 }, { "epoch": 1.16, "grad_norm": 8.318772315979004, "learning_rate": 4.4643123119544825e-06, "loss": 0.2336, "step": 117950 }, { "epoch": 1.16, "grad_norm": 1.7944749593734741, "learning_rate": 4.464188189500234e-06, "loss": 0.2353, "step": 117975 }, { "epoch": 1.16, "grad_norm": 11.452723503112793, "learning_rate": 4.464064067045985e-06, "loss": 0.2569, "step": 118000 }, { "epoch": 1.16, "grad_norm": 9.579943656921387, "learning_rate": 4.463939944591737e-06, "loss": 0.2505, "step": 118025 }, { "epoch": 1.16, "grad_norm": 21.199338912963867, "learning_rate": 4.463815822137488e-06, "loss": 0.2648, "step": 118050 }, { "epoch": 1.16, "grad_norm": 4.349104404449463, "learning_rate": 4.4636916996832394e-06, "loss": 0.198, "step": 118075 }, { "epoch": 1.16, "grad_norm": 14.799816131591797, "learning_rate": 4.4635675772289915e-06, "loss": 0.241, "step": 118100 }, { "epoch": 1.16, "grad_norm": 6.676498889923096, "learning_rate": 4.463443454774743e-06, "loss": 0.2285, "step": 118125 }, { "epoch": 1.16, "grad_norm": 10.81812572479248, "learning_rate": 4.463319332320494e-06, "loss": 0.2076, "step": 118150 }, { "epoch": 1.16, "grad_norm": 7.252157211303711, "learning_rate": 4.463195209866246e-06, "loss": 0.2584, "step": 118175 }, { "epoch": 1.16, "grad_norm": 9.073789596557617, "learning_rate": 4.463071087411997e-06, "loss": 0.2323, "step": 118200 }, { "epoch": 1.16, "grad_norm": 0.9898905158042908, "learning_rate": 4.462946964957749e-06, "loss": 0.2227, "step": 118225 }, { "epoch": 1.16, "grad_norm": 17.240079879760742, "learning_rate": 4.4628228425035004e-06, "loss": 0.2813, "step": 118250 }, { "epoch": 1.16, "grad_norm": 6.193803787231445, "learning_rate": 4.4626987200492525e-06, "loss": 0.2175, "step": 118275 }, { "epoch": 1.16, "grad_norm": 14.919618606567383, "learning_rate": 4.462574597595004e-06, "loss": 0.2144, "step": 118300 }, { "epoch": 1.16, "grad_norm": 5.210625648498535, "learning_rate": 4.462450475140756e-06, "loss": 0.2504, "step": 118325 }, { "epoch": 1.16, "grad_norm": 13.013381004333496, "learning_rate": 4.462326352686507e-06, "loss": 0.2324, "step": 118350 }, { "epoch": 1.16, "grad_norm": 2.9206204414367676, "learning_rate": 4.462202230232258e-06, "loss": 0.2526, "step": 118375 }, { "epoch": 1.16, "grad_norm": 13.181787490844727, "learning_rate": 4.46207810777801e-06, "loss": 0.2692, "step": 118400 }, { "epoch": 1.16, "grad_norm": 3.588836193084717, "learning_rate": 4.4619539853237615e-06, "loss": 0.1922, "step": 118425 }, { "epoch": 1.16, "grad_norm": 17.672849655151367, "learning_rate": 4.461829862869513e-06, "loss": 0.2953, "step": 118450 }, { "epoch": 1.16, "grad_norm": 8.896321296691895, "learning_rate": 4.461705740415265e-06, "loss": 0.2236, "step": 118475 }, { "epoch": 1.17, "grad_norm": 11.684264183044434, "learning_rate": 4.461581617961016e-06, "loss": 0.2203, "step": 118500 }, { "epoch": 1.17, "grad_norm": 12.930438995361328, "learning_rate": 4.461457495506767e-06, "loss": 0.2389, "step": 118525 }, { "epoch": 1.17, "grad_norm": 14.306344032287598, "learning_rate": 4.461333373052518e-06, "loss": 0.2723, "step": 118550 }, { "epoch": 1.17, "grad_norm": 7.042609691619873, "learning_rate": 4.4612092505982704e-06, "loss": 0.2204, "step": 118575 }, { "epoch": 1.17, "grad_norm": 13.072858810424805, "learning_rate": 4.461085128144022e-06, "loss": 0.2375, "step": 118600 }, { "epoch": 1.17, "grad_norm": 5.164185047149658, "learning_rate": 4.460961005689774e-06, "loss": 0.2631, "step": 118625 }, { "epoch": 1.17, "grad_norm": 7.425860404968262, "learning_rate": 4.460836883235525e-06, "loss": 0.2147, "step": 118650 }, { "epoch": 1.17, "grad_norm": 4.79361629486084, "learning_rate": 4.460712760781277e-06, "loss": 0.2253, "step": 118675 }, { "epoch": 1.17, "grad_norm": 18.311811447143555, "learning_rate": 4.460588638327028e-06, "loss": 0.2, "step": 118700 }, { "epoch": 1.17, "grad_norm": 7.566603183746338, "learning_rate": 4.46046451587278e-06, "loss": 0.2137, "step": 118725 }, { "epoch": 1.17, "grad_norm": 18.61831283569336, "learning_rate": 4.4603403934185315e-06, "loss": 0.2562, "step": 118750 }, { "epoch": 1.17, "grad_norm": 8.09924602508545, "learning_rate": 4.460216270964283e-06, "loss": 0.2481, "step": 118775 }, { "epoch": 1.17, "grad_norm": 13.811172485351562, "learning_rate": 4.460092148510035e-06, "loss": 0.2493, "step": 118800 }, { "epoch": 1.17, "grad_norm": 8.803220748901367, "learning_rate": 4.459968026055786e-06, "loss": 0.2237, "step": 118825 }, { "epoch": 1.17, "grad_norm": 11.299448013305664, "learning_rate": 4.459843903601537e-06, "loss": 0.2496, "step": 118850 }, { "epoch": 1.17, "grad_norm": 6.9552764892578125, "learning_rate": 4.459719781147289e-06, "loss": 0.2186, "step": 118875 }, { "epoch": 1.17, "grad_norm": 13.096515655517578, "learning_rate": 4.45959565869304e-06, "loss": 0.2504, "step": 118900 }, { "epoch": 1.17, "grad_norm": 6.980811595916748, "learning_rate": 4.459471536238792e-06, "loss": 0.2405, "step": 118925 }, { "epoch": 1.17, "grad_norm": 14.027453422546387, "learning_rate": 4.459347413784544e-06, "loss": 0.2635, "step": 118950 }, { "epoch": 1.17, "grad_norm": 0.10355024039745331, "learning_rate": 4.459223291330295e-06, "loss": 0.1765, "step": 118975 }, { "epoch": 1.17, "grad_norm": 14.608202934265137, "learning_rate": 4.459099168876047e-06, "loss": 0.2431, "step": 119000 }, { "epoch": 1.17, "grad_norm": 4.181258678436279, "learning_rate": 4.458975046421798e-06, "loss": 0.2012, "step": 119025 }, { "epoch": 1.17, "grad_norm": 13.243192672729492, "learning_rate": 4.45885092396755e-06, "loss": 0.2475, "step": 119050 }, { "epoch": 1.17, "grad_norm": 7.603994846343994, "learning_rate": 4.4587268015133014e-06, "loss": 0.2266, "step": 119075 }, { "epoch": 1.17, "grad_norm": 14.9522123336792, "learning_rate": 4.4586026790590535e-06, "loss": 0.3139, "step": 119100 }, { "epoch": 1.17, "grad_norm": 4.463139057159424, "learning_rate": 4.458478556604805e-06, "loss": 0.2078, "step": 119125 }, { "epoch": 1.17, "grad_norm": 37.0229377746582, "learning_rate": 4.458354434150556e-06, "loss": 0.2231, "step": 119150 }, { "epoch": 1.17, "grad_norm": 3.27752947807312, "learning_rate": 4.458230311696308e-06, "loss": 0.2157, "step": 119175 }, { "epoch": 1.17, "grad_norm": 11.041106224060059, "learning_rate": 4.458106189242059e-06, "loss": 0.2151, "step": 119200 }, { "epoch": 1.17, "grad_norm": 5.734372615814209, "learning_rate": 4.45798206678781e-06, "loss": 0.2801, "step": 119225 }, { "epoch": 1.17, "grad_norm": 13.724990844726562, "learning_rate": 4.4578579443335625e-06, "loss": 0.2423, "step": 119250 }, { "epoch": 1.17, "grad_norm": 3.8356199264526367, "learning_rate": 4.457733821879314e-06, "loss": 0.2176, "step": 119275 }, { "epoch": 1.17, "grad_norm": 8.312846183776855, "learning_rate": 4.457609699425065e-06, "loss": 0.2016, "step": 119300 }, { "epoch": 1.17, "grad_norm": 4.632774829864502, "learning_rate": 4.457485576970817e-06, "loss": 0.2636, "step": 119325 }, { "epoch": 1.17, "grad_norm": 17.37137794494629, "learning_rate": 4.457361454516568e-06, "loss": 0.253, "step": 119350 }, { "epoch": 1.17, "grad_norm": 3.302597761154175, "learning_rate": 4.457237332062319e-06, "loss": 0.1921, "step": 119375 }, { "epoch": 1.17, "grad_norm": 17.157270431518555, "learning_rate": 4.457113209608071e-06, "loss": 0.1988, "step": 119400 }, { "epoch": 1.17, "grad_norm": 6.176710605621338, "learning_rate": 4.456989087153823e-06, "loss": 0.248, "step": 119425 }, { "epoch": 1.17, "grad_norm": 14.057723999023438, "learning_rate": 4.456864964699575e-06, "loss": 0.2363, "step": 119450 }, { "epoch": 1.17, "grad_norm": 6.136453628540039, "learning_rate": 4.456740842245326e-06, "loss": 0.2079, "step": 119475 }, { "epoch": 1.17, "grad_norm": 17.13286018371582, "learning_rate": 4.456616719791078e-06, "loss": 0.2248, "step": 119500 }, { "epoch": 1.18, "grad_norm": 3.8269460201263428, "learning_rate": 4.456492597336829e-06, "loss": 0.2663, "step": 119525 }, { "epoch": 1.18, "grad_norm": 15.868256568908691, "learning_rate": 4.45636847488258e-06, "loss": 0.235, "step": 119550 }, { "epoch": 1.18, "grad_norm": 3.6521661281585693, "learning_rate": 4.4562443524283324e-06, "loss": 0.2252, "step": 119575 }, { "epoch": 1.18, "grad_norm": 9.286741256713867, "learning_rate": 4.456120229974084e-06, "loss": 0.2597, "step": 119600 }, { "epoch": 1.18, "grad_norm": 9.65634536743164, "learning_rate": 4.455996107519835e-06, "loss": 0.2286, "step": 119625 }, { "epoch": 1.18, "grad_norm": 17.24164390563965, "learning_rate": 4.455871985065587e-06, "loss": 0.2496, "step": 119650 }, { "epoch": 1.18, "grad_norm": 4.795980930328369, "learning_rate": 4.455752827509508e-06, "loss": 0.1963, "step": 119675 }, { "epoch": 1.18, "grad_norm": 19.6124267578125, "learning_rate": 4.45562870505526e-06, "loss": 0.2528, "step": 119700 }, { "epoch": 1.18, "grad_norm": 0.3327452540397644, "learning_rate": 4.455504582601011e-06, "loss": 0.1898, "step": 119725 }, { "epoch": 1.18, "grad_norm": 15.26043701171875, "learning_rate": 4.4553804601467624e-06, "loss": 0.2474, "step": 119750 }, { "epoch": 1.18, "grad_norm": 5.931008815765381, "learning_rate": 4.4552563376925145e-06, "loss": 0.2103, "step": 119775 }, { "epoch": 1.18, "grad_norm": 19.603153228759766, "learning_rate": 4.455132215238266e-06, "loss": 0.2774, "step": 119800 }, { "epoch": 1.18, "grad_norm": 3.7374889850616455, "learning_rate": 4.455008092784017e-06, "loss": 0.1919, "step": 119825 }, { "epoch": 1.18, "grad_norm": 17.69268035888672, "learning_rate": 4.454883970329769e-06, "loss": 0.283, "step": 119850 }, { "epoch": 1.18, "grad_norm": 1.993746042251587, "learning_rate": 4.45475984787552e-06, "loss": 0.2094, "step": 119875 }, { "epoch": 1.18, "grad_norm": 11.649100303649902, "learning_rate": 4.454635725421272e-06, "loss": 0.1635, "step": 119900 }, { "epoch": 1.18, "grad_norm": 4.10515022277832, "learning_rate": 4.4545116029670235e-06, "loss": 0.2071, "step": 119925 }, { "epoch": 1.18, "grad_norm": 16.948577880859375, "learning_rate": 4.4543874805127755e-06, "loss": 0.2415, "step": 119950 }, { "epoch": 1.18, "grad_norm": 7.127413749694824, "learning_rate": 4.454263358058527e-06, "loss": 0.2214, "step": 119975 }, { "epoch": 1.18, "grad_norm": 9.88760757446289, "learning_rate": 4.454139235604278e-06, "loss": 0.2394, "step": 120000 }, { "epoch": 1.18, "eval_loss": 0.4857564866542816, "eval_runtime": 6034.5494, "eval_samples_per_second": 1.569, "eval_steps_per_second": 0.196, "eval_wer": 0.13408549539689693, "step": 120000 }, { "epoch": 1.18, "grad_norm": 4.960424423217773, "learning_rate": 4.45401511315003e-06, "loss": 0.2408, "step": 120025 }, { "epoch": 1.18, "grad_norm": 14.006760597229004, "learning_rate": 4.453890990695781e-06, "loss": 0.2052, "step": 120050 }, { "epoch": 1.18, "grad_norm": 1.8441836833953857, "learning_rate": 4.453766868241532e-06, "loss": 0.2062, "step": 120075 }, { "epoch": 1.18, "grad_norm": 15.113335609436035, "learning_rate": 4.4536427457872845e-06, "loss": 0.2259, "step": 120100 }, { "epoch": 1.18, "grad_norm": 10.674938201904297, "learning_rate": 4.453518623333036e-06, "loss": 0.2086, "step": 120125 }, { "epoch": 1.18, "grad_norm": 13.013998985290527, "learning_rate": 4.453394500878787e-06, "loss": 0.2055, "step": 120150 }, { "epoch": 1.18, "grad_norm": 6.804362773895264, "learning_rate": 4.453270378424539e-06, "loss": 0.2474, "step": 120175 }, { "epoch": 1.18, "grad_norm": 11.318408966064453, "learning_rate": 4.45314625597029e-06, "loss": 0.2885, "step": 120200 }, { "epoch": 1.18, "grad_norm": 3.21856951713562, "learning_rate": 4.453022133516041e-06, "loss": 0.1862, "step": 120225 }, { "epoch": 1.18, "grad_norm": 14.471854209899902, "learning_rate": 4.4528980110617934e-06, "loss": 0.2607, "step": 120250 }, { "epoch": 1.18, "grad_norm": 3.5422050952911377, "learning_rate": 4.452773888607545e-06, "loss": 0.2333, "step": 120275 }, { "epoch": 1.18, "grad_norm": 13.808206558227539, "learning_rate": 4.452649766153297e-06, "loss": 0.2406, "step": 120300 }, { "epoch": 1.18, "grad_norm": 2.391484498977661, "learning_rate": 4.452525643699048e-06, "loss": 0.1911, "step": 120325 }, { "epoch": 1.18, "grad_norm": 12.210954666137695, "learning_rate": 4.4524015212448e-06, "loss": 0.2412, "step": 120350 }, { "epoch": 1.18, "grad_norm": 4.116211414337158, "learning_rate": 4.452277398790551e-06, "loss": 0.1846, "step": 120375 }, { "epoch": 1.18, "grad_norm": 10.899921417236328, "learning_rate": 4.452153276336303e-06, "loss": 0.2729, "step": 120400 }, { "epoch": 1.18, "grad_norm": 3.0156638622283936, "learning_rate": 4.4520291538820545e-06, "loss": 0.266, "step": 120425 }, { "epoch": 1.18, "grad_norm": 22.316852569580078, "learning_rate": 4.451905031427806e-06, "loss": 0.2342, "step": 120450 }, { "epoch": 1.18, "grad_norm": 3.950619697570801, "learning_rate": 4.451780908973558e-06, "loss": 0.1913, "step": 120475 }, { "epoch": 1.18, "grad_norm": 17.9024658203125, "learning_rate": 4.451656786519309e-06, "loss": 0.2326, "step": 120500 }, { "epoch": 1.19, "grad_norm": 5.132750988006592, "learning_rate": 4.45153266406506e-06, "loss": 0.2598, "step": 120525 }, { "epoch": 1.19, "grad_norm": 15.73157787322998, "learning_rate": 4.451408541610812e-06, "loss": 0.2489, "step": 120550 }, { "epoch": 1.19, "grad_norm": 5.556176662445068, "learning_rate": 4.451284419156563e-06, "loss": 0.2139, "step": 120575 }, { "epoch": 1.19, "grad_norm": 11.12197208404541, "learning_rate": 4.451160296702315e-06, "loss": 0.2236, "step": 120600 }, { "epoch": 1.19, "grad_norm": 6.5132527351379395, "learning_rate": 4.451036174248067e-06, "loss": 0.225, "step": 120625 }, { "epoch": 1.19, "grad_norm": 14.116519927978516, "learning_rate": 4.450912051793818e-06, "loss": 0.2179, "step": 120650 }, { "epoch": 1.19, "grad_norm": 6.981400966644287, "learning_rate": 4.450787929339569e-06, "loss": 0.2128, "step": 120675 }, { "epoch": 1.19, "grad_norm": 13.659311294555664, "learning_rate": 4.450663806885321e-06, "loss": 0.2225, "step": 120700 }, { "epoch": 1.19, "grad_norm": 4.459853649139404, "learning_rate": 4.450539684431072e-06, "loss": 0.2408, "step": 120725 }, { "epoch": 1.19, "grad_norm": 13.263668060302734, "learning_rate": 4.4504155619768244e-06, "loss": 0.2266, "step": 120750 }, { "epoch": 1.19, "grad_norm": 3.3833649158477783, "learning_rate": 4.450291439522576e-06, "loss": 0.2479, "step": 120775 }, { "epoch": 1.19, "grad_norm": 14.678350448608398, "learning_rate": 4.450167317068328e-06, "loss": 0.2117, "step": 120800 }, { "epoch": 1.19, "grad_norm": 4.833949089050293, "learning_rate": 4.450043194614079e-06, "loss": 0.2086, "step": 120825 }, { "epoch": 1.19, "grad_norm": 12.72225570678711, "learning_rate": 4.44991907215983e-06, "loss": 0.2522, "step": 120850 }, { "epoch": 1.19, "grad_norm": 9.364036560058594, "learning_rate": 4.449794949705582e-06, "loss": 0.2222, "step": 120875 }, { "epoch": 1.19, "grad_norm": 10.776032447814941, "learning_rate": 4.449670827251333e-06, "loss": 0.2365, "step": 120900 }, { "epoch": 1.19, "grad_norm": 2.2048561573028564, "learning_rate": 4.449546704797085e-06, "loss": 0.2319, "step": 120925 }, { "epoch": 1.19, "grad_norm": 44.45309829711914, "learning_rate": 4.449422582342837e-06, "loss": 0.2776, "step": 120950 }, { "epoch": 1.19, "grad_norm": 2.529203176498413, "learning_rate": 4.449298459888588e-06, "loss": 0.2123, "step": 120975 }, { "epoch": 1.19, "grad_norm": 16.98739242553711, "learning_rate": 4.449174337434339e-06, "loss": 0.2409, "step": 121000 }, { "epoch": 1.19, "grad_norm": 4.517546653747559, "learning_rate": 4.449050214980091e-06, "loss": 0.2355, "step": 121025 }, { "epoch": 1.19, "grad_norm": 20.078184127807617, "learning_rate": 4.448926092525842e-06, "loss": 0.2481, "step": 121050 }, { "epoch": 1.19, "grad_norm": 5.992881774902344, "learning_rate": 4.448801970071594e-06, "loss": 0.2194, "step": 121075 }, { "epoch": 1.19, "grad_norm": 16.31248664855957, "learning_rate": 4.448677847617346e-06, "loss": 0.2281, "step": 121100 }, { "epoch": 1.19, "grad_norm": 6.3864946365356445, "learning_rate": 4.448553725163097e-06, "loss": 0.2302, "step": 121125 }, { "epoch": 1.19, "grad_norm": 26.467147827148438, "learning_rate": 4.448429602708849e-06, "loss": 0.264, "step": 121150 }, { "epoch": 1.19, "grad_norm": 34.64649963378906, "learning_rate": 4.4483054802546e-06, "loss": 0.2302, "step": 121175 }, { "epoch": 1.19, "grad_norm": 14.909754753112793, "learning_rate": 4.448181357800352e-06, "loss": 0.2642, "step": 121200 }, { "epoch": 1.19, "grad_norm": 9.206755638122559, "learning_rate": 4.448057235346103e-06, "loss": 0.225, "step": 121225 }, { "epoch": 1.19, "grad_norm": 11.352951049804688, "learning_rate": 4.4479331128918554e-06, "loss": 0.22, "step": 121250 }, { "epoch": 1.19, "grad_norm": 3.960634469985962, "learning_rate": 4.447808990437607e-06, "loss": 0.2209, "step": 121275 }, { "epoch": 1.19, "grad_norm": 12.723858833312988, "learning_rate": 4.447684867983358e-06, "loss": 0.2353, "step": 121300 }, { "epoch": 1.19, "grad_norm": 7.183352470397949, "learning_rate": 4.44756074552911e-06, "loss": 0.1866, "step": 121325 }, { "epoch": 1.19, "grad_norm": 14.915167808532715, "learning_rate": 4.447436623074861e-06, "loss": 0.2519, "step": 121350 }, { "epoch": 1.19, "grad_norm": 1.5000923871994019, "learning_rate": 4.447312500620612e-06, "loss": 0.2383, "step": 121375 }, { "epoch": 1.19, "grad_norm": 12.09439754486084, "learning_rate": 4.447188378166364e-06, "loss": 0.2558, "step": 121400 }, { "epoch": 1.19, "grad_norm": 10.132753372192383, "learning_rate": 4.447064255712116e-06, "loss": 0.2214, "step": 121425 }, { "epoch": 1.19, "grad_norm": 14.363073348999023, "learning_rate": 4.446940133257867e-06, "loss": 0.2285, "step": 121450 }, { "epoch": 1.19, "grad_norm": 6.665421485900879, "learning_rate": 4.446816010803619e-06, "loss": 0.2444, "step": 121475 }, { "epoch": 1.19, "grad_norm": 13.857609748840332, "learning_rate": 4.44669188834937e-06, "loss": 0.2477, "step": 121500 }, { "epoch": 1.19, "grad_norm": 7.380456924438477, "learning_rate": 4.446567765895121e-06, "loss": 0.2424, "step": 121525 }, { "epoch": 1.2, "grad_norm": 11.050479888916016, "learning_rate": 4.446443643440873e-06, "loss": 0.2895, "step": 121550 }, { "epoch": 1.2, "grad_norm": 1.8387794494628906, "learning_rate": 4.446319520986625e-06, "loss": 0.2791, "step": 121575 }, { "epoch": 1.2, "grad_norm": 14.592872619628906, "learning_rate": 4.446195398532377e-06, "loss": 0.2478, "step": 121600 }, { "epoch": 1.2, "grad_norm": 2.6139047145843506, "learning_rate": 4.446071276078128e-06, "loss": 0.2564, "step": 121625 }, { "epoch": 1.2, "grad_norm": 13.896299362182617, "learning_rate": 4.44594715362388e-06, "loss": 0.2267, "step": 121650 }, { "epoch": 1.2, "grad_norm": 4.889485836029053, "learning_rate": 4.445823031169631e-06, "loss": 0.2078, "step": 121675 }, { "epoch": 1.2, "grad_norm": 10.681735038757324, "learning_rate": 4.445698908715382e-06, "loss": 0.2142, "step": 121700 }, { "epoch": 1.2, "grad_norm": 6.133481979370117, "learning_rate": 4.445574786261134e-06, "loss": 0.2475, "step": 121725 }, { "epoch": 1.2, "grad_norm": 8.131498336791992, "learning_rate": 4.445450663806886e-06, "loss": 0.2534, "step": 121750 }, { "epoch": 1.2, "grad_norm": 6.314455509185791, "learning_rate": 4.445326541352637e-06, "loss": 0.2257, "step": 121775 }, { "epoch": 1.2, "grad_norm": 14.601545333862305, "learning_rate": 4.445202418898389e-06, "loss": 0.237, "step": 121800 }, { "epoch": 1.2, "grad_norm": 5.6620378494262695, "learning_rate": 4.44507829644414e-06, "loss": 0.196, "step": 121825 }, { "epoch": 1.2, "grad_norm": 15.0803804397583, "learning_rate": 4.444954173989891e-06, "loss": 0.2416, "step": 121850 }, { "epoch": 1.2, "grad_norm": 6.336686611175537, "learning_rate": 4.444830051535643e-06, "loss": 0.2221, "step": 121875 }, { "epoch": 1.2, "grad_norm": 14.823356628417969, "learning_rate": 4.4447059290813946e-06, "loss": 0.2174, "step": 121900 }, { "epoch": 1.2, "grad_norm": 7.497519493103027, "learning_rate": 4.444581806627147e-06, "loss": 0.2035, "step": 121925 }, { "epoch": 1.2, "grad_norm": 5.9116530418396, "learning_rate": 4.444457684172898e-06, "loss": 0.2096, "step": 121950 }, { "epoch": 1.2, "grad_norm": 8.34266185760498, "learning_rate": 4.44433356171865e-06, "loss": 0.2384, "step": 121975 }, { "epoch": 1.2, "grad_norm": 9.855315208435059, "learning_rate": 4.444209439264401e-06, "loss": 0.2382, "step": 122000 }, { "epoch": 1.2, "grad_norm": 2.7100830078125, "learning_rate": 4.444090281708323e-06, "loss": 0.2429, "step": 122025 }, { "epoch": 1.2, "grad_norm": 12.671279907226562, "learning_rate": 4.443966159254074e-06, "loss": 0.2394, "step": 122050 }, { "epoch": 1.2, "grad_norm": 5.980801105499268, "learning_rate": 4.443842036799826e-06, "loss": 0.2025, "step": 122075 }, { "epoch": 1.2, "grad_norm": 12.836297988891602, "learning_rate": 4.4437179143455775e-06, "loss": 0.2229, "step": 122100 }, { "epoch": 1.2, "grad_norm": 4.410719871520996, "learning_rate": 4.443593791891329e-06, "loss": 0.1962, "step": 122125 }, { "epoch": 1.2, "grad_norm": 23.57489585876465, "learning_rate": 4.443469669437081e-06, "loss": 0.2454, "step": 122150 }, { "epoch": 1.2, "grad_norm": 3.878519296646118, "learning_rate": 4.443345546982832e-06, "loss": 0.235, "step": 122175 }, { "epoch": 1.2, "grad_norm": 14.274882316589355, "learning_rate": 4.443221424528583e-06, "loss": 0.2181, "step": 122200 }, { "epoch": 1.2, "grad_norm": 5.783509254455566, "learning_rate": 4.443097302074335e-06, "loss": 0.224, "step": 122225 }, { "epoch": 1.2, "grad_norm": 9.473777770996094, "learning_rate": 4.4429731796200864e-06, "loss": 0.2434, "step": 122250 }, { "epoch": 1.2, "grad_norm": 4.074331283569336, "learning_rate": 4.442849057165838e-06, "loss": 0.1969, "step": 122275 }, { "epoch": 1.2, "grad_norm": 11.752099990844727, "learning_rate": 4.442724934711589e-06, "loss": 0.2589, "step": 122300 }, { "epoch": 1.2, "grad_norm": 8.711676597595215, "learning_rate": 4.442600812257341e-06, "loss": 0.2161, "step": 122325 }, { "epoch": 1.2, "grad_norm": 15.249004364013672, "learning_rate": 4.442476689803092e-06, "loss": 0.2709, "step": 122350 }, { "epoch": 1.2, "grad_norm": 4.045304298400879, "learning_rate": 4.442352567348844e-06, "loss": 0.2313, "step": 122375 }, { "epoch": 1.2, "grad_norm": 15.317008018493652, "learning_rate": 4.442228444894595e-06, "loss": 0.2354, "step": 122400 }, { "epoch": 1.2, "grad_norm": 8.733501434326172, "learning_rate": 4.4421043224403474e-06, "loss": 0.2305, "step": 122425 }, { "epoch": 1.2, "grad_norm": 11.666900634765625, "learning_rate": 4.441980199986099e-06, "loss": 0.2386, "step": 122450 }, { "epoch": 1.2, "grad_norm": 14.467906951904297, "learning_rate": 4.441856077531851e-06, "loss": 0.2761, "step": 122475 }, { "epoch": 1.2, "grad_norm": 3.4676859378814697, "learning_rate": 4.441731955077602e-06, "loss": 0.238, "step": 122500 }, { "epoch": 1.2, "grad_norm": 9.654719352722168, "learning_rate": 4.441607832623353e-06, "loss": 0.2198, "step": 122525 }, { "epoch": 1.2, "grad_norm": 16.13530158996582, "learning_rate": 4.441483710169105e-06, "loss": 0.2301, "step": 122550 }, { "epoch": 1.21, "grad_norm": 7.202239513397217, "learning_rate": 4.441359587714856e-06, "loss": 0.1889, "step": 122575 }, { "epoch": 1.21, "grad_norm": 17.096349716186523, "learning_rate": 4.441235465260608e-06, "loss": 0.2588, "step": 122600 }, { "epoch": 1.21, "grad_norm": 7.60046911239624, "learning_rate": 4.44111134280636e-06, "loss": 0.2484, "step": 122625 }, { "epoch": 1.21, "grad_norm": 16.833045959472656, "learning_rate": 4.440987220352111e-06, "loss": 0.2345, "step": 122650 }, { "epoch": 1.21, "grad_norm": 9.041800498962402, "learning_rate": 4.440863097897862e-06, "loss": 0.2127, "step": 122675 }, { "epoch": 1.21, "grad_norm": 13.336944580078125, "learning_rate": 4.440738975443614e-06, "loss": 0.2718, "step": 122700 }, { "epoch": 1.21, "grad_norm": 3.185966968536377, "learning_rate": 4.440614852989365e-06, "loss": 0.221, "step": 122725 }, { "epoch": 1.21, "grad_norm": 18.612585067749023, "learning_rate": 4.440490730535117e-06, "loss": 0.2118, "step": 122750 }, { "epoch": 1.21, "grad_norm": 6.528243541717529, "learning_rate": 4.440366608080869e-06, "loss": 0.1681, "step": 122775 }, { "epoch": 1.21, "grad_norm": 13.761297225952148, "learning_rate": 4.44024248562662e-06, "loss": 0.2714, "step": 122800 }, { "epoch": 1.21, "grad_norm": 7.598201274871826, "learning_rate": 4.440118363172372e-06, "loss": 0.2644, "step": 122825 }, { "epoch": 1.21, "grad_norm": 10.88911247253418, "learning_rate": 4.439994240718123e-06, "loss": 0.2464, "step": 122850 }, { "epoch": 1.21, "grad_norm": 5.367681980133057, "learning_rate": 4.439870118263875e-06, "loss": 0.2412, "step": 122875 }, { "epoch": 1.21, "grad_norm": 16.236692428588867, "learning_rate": 4.439745995809626e-06, "loss": 0.2466, "step": 122900 }, { "epoch": 1.21, "grad_norm": 6.564229965209961, "learning_rate": 4.4396218733553785e-06, "loss": 0.206, "step": 122925 }, { "epoch": 1.21, "grad_norm": 13.26955795288086, "learning_rate": 4.43949775090113e-06, "loss": 0.2524, "step": 122950 }, { "epoch": 1.21, "grad_norm": 3.2916202545166016, "learning_rate": 4.439373628446881e-06, "loss": 0.1724, "step": 122975 }, { "epoch": 1.21, "grad_norm": 12.423375129699707, "learning_rate": 4.439249505992633e-06, "loss": 0.2361, "step": 123000 }, { "epoch": 1.21, "grad_norm": 1.7153761386871338, "learning_rate": 4.439125383538384e-06, "loss": 0.2486, "step": 123025 }, { "epoch": 1.21, "grad_norm": 14.085512161254883, "learning_rate": 4.439001261084135e-06, "loss": 0.1981, "step": 123050 }, { "epoch": 1.21, "grad_norm": 2.069648027420044, "learning_rate": 4.438877138629887e-06, "loss": 0.1958, "step": 123075 }, { "epoch": 1.21, "grad_norm": 11.461297035217285, "learning_rate": 4.438753016175639e-06, "loss": 0.2491, "step": 123100 }, { "epoch": 1.21, "grad_norm": 7.948614597320557, "learning_rate": 4.43862889372139e-06, "loss": 0.2277, "step": 123125 }, { "epoch": 1.21, "grad_norm": 12.83610725402832, "learning_rate": 4.438504771267141e-06, "loss": 0.2356, "step": 123150 }, { "epoch": 1.21, "grad_norm": 1.7719745635986328, "learning_rate": 4.438380648812893e-06, "loss": 0.2156, "step": 123175 }, { "epoch": 1.21, "grad_norm": 18.233957290649414, "learning_rate": 4.438256526358644e-06, "loss": 0.2642, "step": 123200 }, { "epoch": 1.21, "grad_norm": 2.6317920684814453, "learning_rate": 4.438132403904396e-06, "loss": 0.2145, "step": 123225 }, { "epoch": 1.21, "grad_norm": 13.683466911315918, "learning_rate": 4.438008281450148e-06, "loss": 0.2516, "step": 123250 }, { "epoch": 1.21, "grad_norm": 6.213540077209473, "learning_rate": 4.4378841589959e-06, "loss": 0.2198, "step": 123275 }, { "epoch": 1.21, "grad_norm": 12.137665748596191, "learning_rate": 4.437760036541651e-06, "loss": 0.284, "step": 123300 }, { "epoch": 1.21, "grad_norm": 0.939301073551178, "learning_rate": 4.437635914087403e-06, "loss": 0.2282, "step": 123325 }, { "epoch": 1.21, "grad_norm": 13.554686546325684, "learning_rate": 4.437511791633154e-06, "loss": 0.2534, "step": 123350 }, { "epoch": 1.21, "grad_norm": 4.968864917755127, "learning_rate": 4.437387669178905e-06, "loss": 0.2145, "step": 123375 }, { "epoch": 1.21, "grad_norm": 12.91256046295166, "learning_rate": 4.437263546724657e-06, "loss": 0.2672, "step": 123400 }, { "epoch": 1.21, "grad_norm": 6.282828330993652, "learning_rate": 4.437139424270409e-06, "loss": 0.2099, "step": 123425 }, { "epoch": 1.21, "grad_norm": 11.610130310058594, "learning_rate": 4.43701530181616e-06, "loss": 0.2233, "step": 123450 }, { "epoch": 1.21, "grad_norm": 6.366002559661865, "learning_rate": 4.436891179361912e-06, "loss": 0.1943, "step": 123475 }, { "epoch": 1.21, "grad_norm": 21.30640983581543, "learning_rate": 4.436767056907663e-06, "loss": 0.2819, "step": 123500 }, { "epoch": 1.21, "grad_norm": 5.584377765655518, "learning_rate": 4.436642934453414e-06, "loss": 0.2454, "step": 123525 }, { "epoch": 1.21, "grad_norm": 14.501655578613281, "learning_rate": 4.436518811999166e-06, "loss": 0.2879, "step": 123550 }, { "epoch": 1.22, "grad_norm": 5.0395660400390625, "learning_rate": 4.4363946895449176e-06, "loss": 0.1954, "step": 123575 }, { "epoch": 1.22, "grad_norm": 18.911514282226562, "learning_rate": 4.436270567090669e-06, "loss": 0.232, "step": 123600 }, { "epoch": 1.22, "grad_norm": 4.613089084625244, "learning_rate": 4.436146444636421e-06, "loss": 0.2295, "step": 123625 }, { "epoch": 1.22, "grad_norm": 16.701566696166992, "learning_rate": 4.436022322182172e-06, "loss": 0.286, "step": 123650 }, { "epoch": 1.22, "grad_norm": 5.082584381103516, "learning_rate": 4.435898199727924e-06, "loss": 0.2052, "step": 123675 }, { "epoch": 1.22, "grad_norm": 10.566987991333008, "learning_rate": 4.435774077273675e-06, "loss": 0.214, "step": 123700 }, { "epoch": 1.22, "grad_norm": 7.5606818199157715, "learning_rate": 4.435649954819427e-06, "loss": 0.22, "step": 123725 }, { "epoch": 1.22, "grad_norm": 16.978214263916016, "learning_rate": 4.435525832365179e-06, "loss": 0.1853, "step": 123750 }, { "epoch": 1.22, "grad_norm": 8.025030136108398, "learning_rate": 4.435401709910931e-06, "loss": 0.1917, "step": 123775 }, { "epoch": 1.22, "grad_norm": 11.563206672668457, "learning_rate": 4.435277587456682e-06, "loss": 0.2109, "step": 123800 }, { "epoch": 1.22, "grad_norm": 2.0865094661712646, "learning_rate": 4.435153465002433e-06, "loss": 0.2368, "step": 123825 }, { "epoch": 1.22, "grad_norm": 18.783668518066406, "learning_rate": 4.435029342548185e-06, "loss": 0.2678, "step": 123850 }, { "epoch": 1.22, "grad_norm": 24.635581970214844, "learning_rate": 4.434905220093936e-06, "loss": 0.2064, "step": 123875 }, { "epoch": 1.22, "grad_norm": 8.546828269958496, "learning_rate": 4.4347810976396876e-06, "loss": 0.2216, "step": 123900 }, { "epoch": 1.22, "grad_norm": 6.847104072570801, "learning_rate": 4.43465697518544e-06, "loss": 0.2051, "step": 123925 }, { "epoch": 1.22, "grad_norm": 13.810502052307129, "learning_rate": 4.434532852731191e-06, "loss": 0.1827, "step": 123950 }, { "epoch": 1.22, "grad_norm": 5.561845779418945, "learning_rate": 4.434408730276942e-06, "loss": 0.2516, "step": 123975 }, { "epoch": 1.22, "grad_norm": 14.461308479309082, "learning_rate": 4.434284607822693e-06, "loss": 0.2076, "step": 124000 }, { "epoch": 1.22, "grad_norm": 13.28304672241211, "learning_rate": 4.434160485368445e-06, "loss": 0.1901, "step": 124025 }, { "epoch": 1.22, "grad_norm": 11.246415138244629, "learning_rate": 4.4340363629141965e-06, "loss": 0.2503, "step": 124050 }, { "epoch": 1.22, "grad_norm": 3.995095729827881, "learning_rate": 4.433917205358118e-06, "loss": 0.2048, "step": 124075 }, { "epoch": 1.22, "grad_norm": 14.454621315002441, "learning_rate": 4.43379308290387e-06, "loss": 0.269, "step": 124100 }, { "epoch": 1.22, "grad_norm": 7.738598823547363, "learning_rate": 4.433668960449622e-06, "loss": 0.2193, "step": 124125 }, { "epoch": 1.22, "grad_norm": 13.831655502319336, "learning_rate": 4.433544837995373e-06, "loss": 0.2865, "step": 124150 }, { "epoch": 1.22, "grad_norm": 9.435235977172852, "learning_rate": 4.433420715541125e-06, "loss": 0.2192, "step": 124175 }, { "epoch": 1.22, "grad_norm": 10.628461837768555, "learning_rate": 4.433296593086876e-06, "loss": 0.2212, "step": 124200 }, { "epoch": 1.22, "grad_norm": 8.571837425231934, "learning_rate": 4.433172470632628e-06, "loss": 0.2111, "step": 124225 }, { "epoch": 1.22, "grad_norm": 10.181750297546387, "learning_rate": 4.433048348178379e-06, "loss": 0.22, "step": 124250 }, { "epoch": 1.22, "grad_norm": 5.738417148590088, "learning_rate": 4.432924225724131e-06, "loss": 0.2131, "step": 124275 }, { "epoch": 1.22, "grad_norm": 13.979435920715332, "learning_rate": 4.432800103269883e-06, "loss": 0.2724, "step": 124300 }, { "epoch": 1.22, "grad_norm": 4.231959819793701, "learning_rate": 4.432675980815634e-06, "loss": 0.2193, "step": 124325 }, { "epoch": 1.22, "grad_norm": 19.830665588378906, "learning_rate": 4.432551858361385e-06, "loss": 0.2851, "step": 124350 }, { "epoch": 1.22, "grad_norm": 3.9441845417022705, "learning_rate": 4.432427735907137e-06, "loss": 0.1999, "step": 124375 }, { "epoch": 1.22, "grad_norm": 13.672311782836914, "learning_rate": 4.432303613452888e-06, "loss": 0.256, "step": 124400 }, { "epoch": 1.22, "grad_norm": 6.6357035636901855, "learning_rate": 4.43217949099864e-06, "loss": 0.2209, "step": 124425 }, { "epoch": 1.22, "grad_norm": 8.881914138793945, "learning_rate": 4.432055368544392e-06, "loss": 0.2579, "step": 124450 }, { "epoch": 1.22, "grad_norm": 5.933802127838135, "learning_rate": 4.431931246090143e-06, "loss": 0.2261, "step": 124475 }, { "epoch": 1.22, "grad_norm": 29.991477966308594, "learning_rate": 4.431807123635894e-06, "loss": 0.2284, "step": 124500 }, { "epoch": 1.22, "grad_norm": 1.002458095550537, "learning_rate": 4.431683001181646e-06, "loss": 0.2132, "step": 124525 }, { "epoch": 1.22, "grad_norm": 18.33172035217285, "learning_rate": 4.431558878727397e-06, "loss": 0.2076, "step": 124550 }, { "epoch": 1.22, "grad_norm": 11.278604507446289, "learning_rate": 4.431434756273149e-06, "loss": 0.2338, "step": 124575 }, { "epoch": 1.23, "grad_norm": 16.08371353149414, "learning_rate": 4.431310633818901e-06, "loss": 0.2584, "step": 124600 }, { "epoch": 1.23, "grad_norm": 4.029568672180176, "learning_rate": 4.431186511364653e-06, "loss": 0.2335, "step": 124625 }, { "epoch": 1.23, "grad_norm": 12.340340614318848, "learning_rate": 4.431062388910404e-06, "loss": 0.231, "step": 124650 }, { "epoch": 1.23, "grad_norm": 7.961022853851318, "learning_rate": 4.430938266456155e-06, "loss": 0.2203, "step": 124675 }, { "epoch": 1.23, "grad_norm": 15.597393035888672, "learning_rate": 4.430814144001907e-06, "loss": 0.2455, "step": 124700 }, { "epoch": 1.23, "grad_norm": 4.110496520996094, "learning_rate": 4.430690021547658e-06, "loss": 0.2079, "step": 124725 }, { "epoch": 1.23, "grad_norm": 14.03205394744873, "learning_rate": 4.43056589909341e-06, "loss": 0.2336, "step": 124750 }, { "epoch": 1.23, "grad_norm": 10.159258842468262, "learning_rate": 4.430441776639162e-06, "loss": 0.2219, "step": 124775 }, { "epoch": 1.23, "grad_norm": 11.261120796203613, "learning_rate": 4.430317654184913e-06, "loss": 0.2321, "step": 124800 }, { "epoch": 1.23, "grad_norm": 5.170065879821777, "learning_rate": 4.430193531730664e-06, "loss": 0.2451, "step": 124825 }, { "epoch": 1.23, "grad_norm": 12.67054557800293, "learning_rate": 4.430069409276416e-06, "loss": 0.2339, "step": 124850 }, { "epoch": 1.23, "grad_norm": 6.468502521514893, "learning_rate": 4.429945286822167e-06, "loss": 0.2267, "step": 124875 }, { "epoch": 1.23, "grad_norm": 17.195640563964844, "learning_rate": 4.429821164367919e-06, "loss": 0.235, "step": 124900 }, { "epoch": 1.23, "grad_norm": 8.451510429382324, "learning_rate": 4.429697041913671e-06, "loss": 0.2098, "step": 124925 }, { "epoch": 1.23, "grad_norm": 14.370872497558594, "learning_rate": 4.429572919459423e-06, "loss": 0.2279, "step": 124950 }, { "epoch": 1.23, "grad_norm": 5.830008506774902, "learning_rate": 4.429448797005174e-06, "loss": 0.2568, "step": 124975 }, { "epoch": 1.23, "grad_norm": 12.023262023925781, "learning_rate": 4.429324674550926e-06, "loss": 0.2934, "step": 125000 }, { "epoch": 1.23, "grad_norm": 7.86068058013916, "learning_rate": 4.429200552096677e-06, "loss": 0.1975, "step": 125025 }, { "epoch": 1.23, "grad_norm": 12.9217529296875, "learning_rate": 4.429076429642428e-06, "loss": 0.2298, "step": 125050 }, { "epoch": 1.23, "grad_norm": 6.587538719177246, "learning_rate": 4.42895230718818e-06, "loss": 0.2172, "step": 125075 }, { "epoch": 1.23, "grad_norm": 13.585149765014648, "learning_rate": 4.428828184733932e-06, "loss": 0.2489, "step": 125100 }, { "epoch": 1.23, "grad_norm": 5.063422679901123, "learning_rate": 4.428704062279683e-06, "loss": 0.23, "step": 125125 }, { "epoch": 1.23, "grad_norm": 15.566007614135742, "learning_rate": 4.428579939825435e-06, "loss": 0.2554, "step": 125150 }, { "epoch": 1.23, "grad_norm": 4.179235458374023, "learning_rate": 4.428455817371186e-06, "loss": 0.2147, "step": 125175 }, { "epoch": 1.23, "grad_norm": 15.91113567352295, "learning_rate": 4.428331694916937e-06, "loss": 0.1962, "step": 125200 }, { "epoch": 1.23, "grad_norm": 1.2262144088745117, "learning_rate": 4.428207572462689e-06, "loss": 0.2228, "step": 125225 }, { "epoch": 1.23, "grad_norm": 15.77612590789795, "learning_rate": 4.428083450008441e-06, "loss": 0.2673, "step": 125250 }, { "epoch": 1.23, "grad_norm": 7.0562238693237305, "learning_rate": 4.427959327554192e-06, "loss": 0.2418, "step": 125275 }, { "epoch": 1.23, "grad_norm": 13.213480949401855, "learning_rate": 4.427835205099944e-06, "loss": 0.2704, "step": 125300 }, { "epoch": 1.23, "grad_norm": 5.438790798187256, "learning_rate": 4.427711082645695e-06, "loss": 0.192, "step": 125325 }, { "epoch": 1.23, "grad_norm": 11.5697021484375, "learning_rate": 4.427586960191447e-06, "loss": 0.2309, "step": 125350 }, { "epoch": 1.23, "grad_norm": 5.906172275543213, "learning_rate": 4.427462837737198e-06, "loss": 0.2258, "step": 125375 }, { "epoch": 1.23, "grad_norm": 12.259110450744629, "learning_rate": 4.42733871528295e-06, "loss": 0.243, "step": 125400 }, { "epoch": 1.23, "grad_norm": 12.069473266601562, "learning_rate": 4.427214592828702e-06, "loss": 0.2007, "step": 125425 }, { "epoch": 1.23, "grad_norm": 13.372950553894043, "learning_rate": 4.427090470374453e-06, "loss": 0.2463, "step": 125450 }, { "epoch": 1.23, "grad_norm": 4.704540729522705, "learning_rate": 4.426966347920205e-06, "loss": 0.2077, "step": 125475 }, { "epoch": 1.23, "grad_norm": 7.517647743225098, "learning_rate": 4.426842225465956e-06, "loss": 0.2737, "step": 125500 }, { "epoch": 1.23, "grad_norm": 14.378791809082031, "learning_rate": 4.426718103011707e-06, "loss": 0.2107, "step": 125525 }, { "epoch": 1.23, "grad_norm": 15.275672912597656, "learning_rate": 4.426593980557459e-06, "loss": 0.2723, "step": 125550 }, { "epoch": 1.23, "grad_norm": 7.833057403564453, "learning_rate": 4.4264698581032106e-06, "loss": 0.2126, "step": 125575 }, { "epoch": 1.23, "grad_norm": 16.049959182739258, "learning_rate": 4.426345735648962e-06, "loss": 0.2464, "step": 125600 }, { "epoch": 1.24, "grad_norm": 9.47091007232666, "learning_rate": 4.426221613194714e-06, "loss": 0.2038, "step": 125625 }, { "epoch": 1.24, "grad_norm": 18.01631736755371, "learning_rate": 4.426097490740465e-06, "loss": 0.2281, "step": 125650 }, { "epoch": 1.24, "grad_norm": 5.3948564529418945, "learning_rate": 4.425973368286216e-06, "loss": 0.2193, "step": 125675 }, { "epoch": 1.24, "grad_norm": 12.264717102050781, "learning_rate": 4.425849245831968e-06, "loss": 0.2542, "step": 125700 }, { "epoch": 1.24, "grad_norm": 6.89351749420166, "learning_rate": 4.4257251233777195e-06, "loss": 0.2055, "step": 125725 }, { "epoch": 1.24, "grad_norm": 14.776298522949219, "learning_rate": 4.425601000923472e-06, "loss": 0.1862, "step": 125750 }, { "epoch": 1.24, "grad_norm": 3.9950649738311768, "learning_rate": 4.425476878469223e-06, "loss": 0.2229, "step": 125775 }, { "epoch": 1.24, "grad_norm": 8.456477165222168, "learning_rate": 4.425352756014975e-06, "loss": 0.2614, "step": 125800 }, { "epoch": 1.24, "grad_norm": 11.858365058898926, "learning_rate": 4.425228633560726e-06, "loss": 0.1751, "step": 125825 }, { "epoch": 1.24, "grad_norm": 21.534700393676758, "learning_rate": 4.425104511106478e-06, "loss": 0.2681, "step": 125850 }, { "epoch": 1.24, "grad_norm": 2.5291225910186768, "learning_rate": 4.424980388652229e-06, "loss": 0.2233, "step": 125875 }, { "epoch": 1.24, "grad_norm": 10.995214462280273, "learning_rate": 4.4248562661979806e-06, "loss": 0.229, "step": 125900 }, { "epoch": 1.24, "grad_norm": 9.66532039642334, "learning_rate": 4.424732143743733e-06, "loss": 0.2081, "step": 125925 }, { "epoch": 1.24, "grad_norm": 15.840290069580078, "learning_rate": 4.424608021289484e-06, "loss": 0.276, "step": 125950 }, { "epoch": 1.24, "grad_norm": 5.127369403839111, "learning_rate": 4.424483898835235e-06, "loss": 0.2118, "step": 125975 }, { "epoch": 1.24, "grad_norm": 15.569429397583008, "learning_rate": 4.424359776380987e-06, "loss": 0.2919, "step": 126000 }, { "epoch": 1.24, "grad_norm": 7.577507972717285, "learning_rate": 4.424235653926738e-06, "loss": 0.1957, "step": 126025 }, { "epoch": 1.24, "grad_norm": 10.56314468383789, "learning_rate": 4.4241115314724895e-06, "loss": 0.2703, "step": 126050 }, { "epoch": 1.24, "grad_norm": 8.762948989868164, "learning_rate": 4.4239874090182416e-06, "loss": 0.2308, "step": 126075 }, { "epoch": 1.24, "grad_norm": 15.478458404541016, "learning_rate": 4.423863286563993e-06, "loss": 0.2063, "step": 126100 }, { "epoch": 1.24, "grad_norm": 5.024847984313965, "learning_rate": 4.423739164109744e-06, "loss": 0.2533, "step": 126125 }, { "epoch": 1.24, "grad_norm": 19.737224578857422, "learning_rate": 4.423615041655496e-06, "loss": 0.2255, "step": 126150 }, { "epoch": 1.24, "grad_norm": 5.266118049621582, "learning_rate": 4.423490919201247e-06, "loss": 0.2254, "step": 126175 }, { "epoch": 1.24, "grad_norm": 14.344234466552734, "learning_rate": 4.423366796746999e-06, "loss": 0.2071, "step": 126200 }, { "epoch": 1.24, "grad_norm": 4.9406256675720215, "learning_rate": 4.4232426742927505e-06, "loss": 0.2217, "step": 126225 }, { "epoch": 1.24, "grad_norm": 18.043392181396484, "learning_rate": 4.423123516736672e-06, "loss": 0.2463, "step": 126250 }, { "epoch": 1.24, "grad_norm": 7.83861780166626, "learning_rate": 4.422999394282424e-06, "loss": 0.211, "step": 126275 }, { "epoch": 1.24, "grad_norm": 11.041086196899414, "learning_rate": 4.422875271828176e-06, "loss": 0.1808, "step": 126300 }, { "epoch": 1.24, "grad_norm": 8.440325736999512, "learning_rate": 4.422751149373927e-06, "loss": 0.2203, "step": 126325 }, { "epoch": 1.24, "grad_norm": 9.437719345092773, "learning_rate": 4.422627026919678e-06, "loss": 0.228, "step": 126350 }, { "epoch": 1.24, "grad_norm": 5.224446773529053, "learning_rate": 4.42250290446543e-06, "loss": 0.2183, "step": 126375 }, { "epoch": 1.24, "grad_norm": 17.18441390991211, "learning_rate": 4.422378782011181e-06, "loss": 0.2227, "step": 126400 }, { "epoch": 1.24, "grad_norm": 7.5179314613342285, "learning_rate": 4.422254659556933e-06, "loss": 0.2033, "step": 126425 }, { "epoch": 1.24, "grad_norm": 12.970259666442871, "learning_rate": 4.422130537102685e-06, "loss": 0.2212, "step": 126450 }, { "epoch": 1.24, "grad_norm": 26.07317543029785, "learning_rate": 4.422006414648436e-06, "loss": 0.2437, "step": 126475 }, { "epoch": 1.24, "grad_norm": 10.867015838623047, "learning_rate": 4.421882292194187e-06, "loss": 0.2437, "step": 126500 }, { "epoch": 1.24, "grad_norm": 1.8872735500335693, "learning_rate": 4.421758169739939e-06, "loss": 0.2068, "step": 126525 }, { "epoch": 1.24, "grad_norm": 10.633360862731934, "learning_rate": 4.42163404728569e-06, "loss": 0.2435, "step": 126550 }, { "epoch": 1.24, "grad_norm": 7.300833702087402, "learning_rate": 4.4215099248314415e-06, "loss": 0.2258, "step": 126575 }, { "epoch": 1.24, "grad_norm": 20.344741821289062, "learning_rate": 4.421385802377194e-06, "loss": 0.214, "step": 126600 }, { "epoch": 1.24, "grad_norm": 1.6527950763702393, "learning_rate": 4.421261679922945e-06, "loss": 0.2013, "step": 126625 }, { "epoch": 1.25, "grad_norm": 15.416077613830566, "learning_rate": 4.421137557468697e-06, "loss": 0.253, "step": 126650 }, { "epoch": 1.25, "grad_norm": 11.532496452331543, "learning_rate": 4.421013435014448e-06, "loss": 0.197, "step": 126675 }, { "epoch": 1.25, "grad_norm": 9.385797500610352, "learning_rate": 4.4208893125602e-06, "loss": 0.2136, "step": 126700 }, { "epoch": 1.25, "grad_norm": 3.8247430324554443, "learning_rate": 4.420765190105951e-06, "loss": 0.2274, "step": 126725 }, { "epoch": 1.25, "grad_norm": 14.150794982910156, "learning_rate": 4.420641067651703e-06, "loss": 0.285, "step": 126750 }, { "epoch": 1.25, "grad_norm": 3.6110846996307373, "learning_rate": 4.420516945197455e-06, "loss": 0.2247, "step": 126775 }, { "epoch": 1.25, "grad_norm": 14.984219551086426, "learning_rate": 4.420392822743206e-06, "loss": 0.2281, "step": 126800 }, { "epoch": 1.25, "grad_norm": 3.9460437297821045, "learning_rate": 4.420268700288957e-06, "loss": 0.2179, "step": 126825 }, { "epoch": 1.25, "grad_norm": 16.466205596923828, "learning_rate": 4.420144577834709e-06, "loss": 0.2381, "step": 126850 }, { "epoch": 1.25, "grad_norm": 5.043099880218506, "learning_rate": 4.42002045538046e-06, "loss": 0.1943, "step": 126875 }, { "epoch": 1.25, "grad_norm": 7.784548759460449, "learning_rate": 4.4198963329262115e-06, "loss": 0.3149, "step": 126900 }, { "epoch": 1.25, "grad_norm": 6.356399059295654, "learning_rate": 4.419772210471964e-06, "loss": 0.2375, "step": 126925 }, { "epoch": 1.25, "grad_norm": 13.241070747375488, "learning_rate": 4.419648088017715e-06, "loss": 0.274, "step": 126950 }, { "epoch": 1.25, "grad_norm": 4.437890529632568, "learning_rate": 4.419523965563466e-06, "loss": 0.2019, "step": 126975 }, { "epoch": 1.25, "grad_norm": 13.077924728393555, "learning_rate": 4.419399843109218e-06, "loss": 0.2016, "step": 127000 }, { "epoch": 1.25, "grad_norm": 7.119729518890381, "learning_rate": 4.419275720654969e-06, "loss": 0.2214, "step": 127025 }, { "epoch": 1.25, "grad_norm": 15.542913436889648, "learning_rate": 4.419151598200721e-06, "loss": 0.2818, "step": 127050 }, { "epoch": 1.25, "grad_norm": 7.05670166015625, "learning_rate": 4.4190274757464726e-06, "loss": 0.2177, "step": 127075 }, { "epoch": 1.25, "grad_norm": 11.374151229858398, "learning_rate": 4.418903353292225e-06, "loss": 0.2033, "step": 127100 }, { "epoch": 1.25, "grad_norm": 7.893677234649658, "learning_rate": 4.418779230837976e-06, "loss": 0.2129, "step": 127125 }, { "epoch": 1.25, "grad_norm": 11.0048189163208, "learning_rate": 4.418655108383728e-06, "loss": 0.2158, "step": 127150 }, { "epoch": 1.25, "grad_norm": 3.618361711502075, "learning_rate": 4.418530985929479e-06, "loss": 0.2328, "step": 127175 }, { "epoch": 1.25, "grad_norm": 26.809301376342773, "learning_rate": 4.41840686347523e-06, "loss": 0.218, "step": 127200 }, { "epoch": 1.25, "grad_norm": 0.7896062135696411, "learning_rate": 4.418282741020982e-06, "loss": 0.2388, "step": 127225 }, { "epoch": 1.25, "grad_norm": 16.371408462524414, "learning_rate": 4.4181586185667336e-06, "loss": 0.2072, "step": 127250 }, { "epoch": 1.25, "grad_norm": 4.066748142242432, "learning_rate": 4.418034496112485e-06, "loss": 0.2057, "step": 127275 }, { "epoch": 1.25, "grad_norm": 17.145540237426758, "learning_rate": 4.417910373658237e-06, "loss": 0.2703, "step": 127300 }, { "epoch": 1.25, "grad_norm": 7.14804220199585, "learning_rate": 4.417786251203988e-06, "loss": 0.2702, "step": 127325 }, { "epoch": 1.25, "grad_norm": 14.259772300720215, "learning_rate": 4.417662128749739e-06, "loss": 0.2281, "step": 127350 }, { "epoch": 1.25, "grad_norm": 6.631993770599365, "learning_rate": 4.417538006295491e-06, "loss": 0.2349, "step": 127375 }, { "epoch": 1.25, "grad_norm": 10.284134864807129, "learning_rate": 4.4174138838412425e-06, "loss": 0.2639, "step": 127400 }, { "epoch": 1.25, "grad_norm": 5.476398944854736, "learning_rate": 4.417289761386994e-06, "loss": 0.2622, "step": 127425 }, { "epoch": 1.25, "grad_norm": 15.235760688781738, "learning_rate": 4.417165638932746e-06, "loss": 0.23, "step": 127450 }, { "epoch": 1.25, "grad_norm": 5.2301859855651855, "learning_rate": 4.417041516478497e-06, "loss": 0.221, "step": 127475 }, { "epoch": 1.25, "grad_norm": 21.0159969329834, "learning_rate": 4.416917394024249e-06, "loss": 0.2184, "step": 127500 }, { "epoch": 1.25, "grad_norm": 8.10832405090332, "learning_rate": 4.41679327157e-06, "loss": 0.2539, "step": 127525 }, { "epoch": 1.25, "grad_norm": 9.817633628845215, "learning_rate": 4.416669149115752e-06, "loss": 0.2476, "step": 127550 }, { "epoch": 1.25, "grad_norm": 5.8725175857543945, "learning_rate": 4.4165450266615036e-06, "loss": 0.242, "step": 127575 }, { "epoch": 1.25, "grad_norm": 14.60090160369873, "learning_rate": 4.416420904207256e-06, "loss": 0.2543, "step": 127600 }, { "epoch": 1.25, "grad_norm": 8.572408676147461, "learning_rate": 4.416296781753007e-06, "loss": 0.2248, "step": 127625 }, { "epoch": 1.26, "grad_norm": 15.104433059692383, "learning_rate": 4.416172659298758e-06, "loss": 0.2096, "step": 127650 }, { "epoch": 1.26, "grad_norm": 4.423783779144287, "learning_rate": 4.416048536844509e-06, "loss": 0.2291, "step": 127675 }, { "epoch": 1.26, "grad_norm": 14.479549407958984, "learning_rate": 4.415924414390261e-06, "loss": 0.2296, "step": 127700 }, { "epoch": 1.26, "grad_norm": 3.6398348808288574, "learning_rate": 4.4158002919360125e-06, "loss": 0.2034, "step": 127725 }, { "epoch": 1.26, "grad_norm": 21.71814727783203, "learning_rate": 4.415676169481764e-06, "loss": 0.2465, "step": 127750 }, { "epoch": 1.26, "grad_norm": 10.790631294250488, "learning_rate": 4.415552047027516e-06, "loss": 0.2591, "step": 127775 }, { "epoch": 1.26, "grad_norm": 16.918930053710938, "learning_rate": 4.415427924573267e-06, "loss": 0.285, "step": 127800 }, { "epoch": 1.26, "grad_norm": 8.58591365814209, "learning_rate": 4.415303802119019e-06, "loss": 0.1971, "step": 127825 }, { "epoch": 1.26, "grad_norm": 11.993759155273438, "learning_rate": 4.41517967966477e-06, "loss": 0.2518, "step": 127850 }, { "epoch": 1.26, "grad_norm": 4.143641471862793, "learning_rate": 4.415055557210522e-06, "loss": 0.1989, "step": 127875 }, { "epoch": 1.26, "grad_norm": 14.50139045715332, "learning_rate": 4.4149314347562735e-06, "loss": 0.2737, "step": 127900 }, { "epoch": 1.26, "grad_norm": 2.8803224563598633, "learning_rate": 4.414807312302026e-06, "loss": 0.2539, "step": 127925 }, { "epoch": 1.26, "grad_norm": 18.632678985595703, "learning_rate": 4.414683189847777e-06, "loss": 0.3135, "step": 127950 }, { "epoch": 1.26, "grad_norm": 7.616642475128174, "learning_rate": 4.414559067393528e-06, "loss": 0.2252, "step": 127975 }, { "epoch": 1.26, "grad_norm": 15.770037651062012, "learning_rate": 4.41443494493928e-06, "loss": 0.2885, "step": 128000 }, { "epoch": 1.26, "grad_norm": 5.454052925109863, "learning_rate": 4.414310822485031e-06, "loss": 0.2412, "step": 128025 }, { "epoch": 1.26, "grad_norm": 13.542280197143555, "learning_rate": 4.4141867000307825e-06, "loss": 0.2325, "step": 128050 }, { "epoch": 1.26, "grad_norm": 1.2782902717590332, "learning_rate": 4.4140625775765346e-06, "loss": 0.2087, "step": 128075 }, { "epoch": 1.26, "grad_norm": 16.9116153717041, "learning_rate": 4.413938455122286e-06, "loss": 0.2504, "step": 128100 }, { "epoch": 1.26, "grad_norm": 6.886627674102783, "learning_rate": 4.413814332668037e-06, "loss": 0.2495, "step": 128125 }, { "epoch": 1.26, "grad_norm": 15.606660842895508, "learning_rate": 4.413690210213789e-06, "loss": 0.291, "step": 128150 }, { "epoch": 1.26, "grad_norm": 5.2402238845825195, "learning_rate": 4.41356608775954e-06, "loss": 0.2376, "step": 128175 }, { "epoch": 1.26, "grad_norm": 11.094438552856445, "learning_rate": 4.4134419653052915e-06, "loss": 0.2523, "step": 128200 }, { "epoch": 1.26, "grad_norm": 5.26094388961792, "learning_rate": 4.4133178428510435e-06, "loss": 0.2171, "step": 128225 }, { "epoch": 1.26, "grad_norm": 25.931333541870117, "learning_rate": 4.413193720396795e-06, "loss": 0.2861, "step": 128250 }, { "epoch": 1.26, "grad_norm": 5.145885944366455, "learning_rate": 4.413069597942547e-06, "loss": 0.225, "step": 128275 }, { "epoch": 1.26, "grad_norm": 16.04336166381836, "learning_rate": 4.412945475488298e-06, "loss": 0.2269, "step": 128300 }, { "epoch": 1.26, "grad_norm": 3.8505420684814453, "learning_rate": 4.41282135303405e-06, "loss": 0.2597, "step": 128325 }, { "epoch": 1.26, "grad_norm": 16.328353881835938, "learning_rate": 4.412697230579801e-06, "loss": 0.2115, "step": 128350 }, { "epoch": 1.26, "grad_norm": 5.396733283996582, "learning_rate": 4.412573108125553e-06, "loss": 0.2602, "step": 128375 }, { "epoch": 1.26, "grad_norm": 10.98437213897705, "learning_rate": 4.4124489856713045e-06, "loss": 0.2479, "step": 128400 }, { "epoch": 1.26, "grad_norm": 2.1070852279663086, "learning_rate": 4.412324863217056e-06, "loss": 0.2345, "step": 128425 }, { "epoch": 1.26, "grad_norm": 13.039994239807129, "learning_rate": 4.412200740762808e-06, "loss": 0.2111, "step": 128450 }, { "epoch": 1.26, "grad_norm": 5.324425220489502, "learning_rate": 4.412081583206729e-06, "loss": 0.2421, "step": 128475 }, { "epoch": 1.26, "grad_norm": 12.275501251220703, "learning_rate": 4.41195746075248e-06, "loss": 0.2201, "step": 128500 }, { "epoch": 1.26, "grad_norm": 5.2273173332214355, "learning_rate": 4.411833338298232e-06, "loss": 0.267, "step": 128525 }, { "epoch": 1.26, "grad_norm": 17.6877384185791, "learning_rate": 4.411709215843983e-06, "loss": 0.2638, "step": 128550 }, { "epoch": 1.26, "grad_norm": 2.3605380058288574, "learning_rate": 4.4115850933897345e-06, "loss": 0.2325, "step": 128575 }, { "epoch": 1.26, "grad_norm": 18.940845489501953, "learning_rate": 4.411460970935487e-06, "loss": 0.2738, "step": 128600 }, { "epoch": 1.26, "grad_norm": 12.317648887634277, "learning_rate": 4.411336848481238e-06, "loss": 0.2173, "step": 128625 }, { "epoch": 1.26, "grad_norm": 15.876907348632812, "learning_rate": 4.411212726026989e-06, "loss": 0.2647, "step": 128650 }, { "epoch": 1.27, "grad_norm": 7.4306182861328125, "learning_rate": 4.411088603572741e-06, "loss": 0.2274, "step": 128675 }, { "epoch": 1.27, "grad_norm": 6.803994178771973, "learning_rate": 4.410964481118492e-06, "loss": 0.2175, "step": 128700 }, { "epoch": 1.27, "grad_norm": 7.448451042175293, "learning_rate": 4.410840358664244e-06, "loss": 0.2262, "step": 128725 }, { "epoch": 1.27, "grad_norm": 14.427845001220703, "learning_rate": 4.4107162362099956e-06, "loss": 0.2465, "step": 128750 }, { "epoch": 1.27, "grad_norm": 4.688682556152344, "learning_rate": 4.410592113755748e-06, "loss": 0.1982, "step": 128775 }, { "epoch": 1.27, "grad_norm": 20.97072982788086, "learning_rate": 4.410467991301499e-06, "loss": 0.2967, "step": 128800 }, { "epoch": 1.27, "grad_norm": 5.616243362426758, "learning_rate": 4.410343868847251e-06, "loss": 0.2631, "step": 128825 }, { "epoch": 1.27, "grad_norm": 16.318689346313477, "learning_rate": 4.410219746393002e-06, "loss": 0.1868, "step": 128850 }, { "epoch": 1.27, "grad_norm": 9.52254581451416, "learning_rate": 4.410095623938753e-06, "loss": 0.2805, "step": 128875 }, { "epoch": 1.27, "grad_norm": 15.986954689025879, "learning_rate": 4.409971501484505e-06, "loss": 0.2427, "step": 128900 }, { "epoch": 1.27, "grad_norm": 9.370651245117188, "learning_rate": 4.409847379030257e-06, "loss": 0.218, "step": 128925 }, { "epoch": 1.27, "grad_norm": 12.233316421508789, "learning_rate": 4.409723256576008e-06, "loss": 0.2348, "step": 128950 }, { "epoch": 1.27, "grad_norm": 2.2039427757263184, "learning_rate": 4.40959913412176e-06, "loss": 0.1832, "step": 128975 }, { "epoch": 1.27, "grad_norm": 11.432027816772461, "learning_rate": 4.409475011667511e-06, "loss": 0.2562, "step": 129000 }, { "epoch": 1.27, "grad_norm": 13.640759468078613, "learning_rate": 4.409350889213262e-06, "loss": 0.2212, "step": 129025 }, { "epoch": 1.27, "grad_norm": 19.09003257751465, "learning_rate": 4.409226766759014e-06, "loss": 0.2951, "step": 129050 }, { "epoch": 1.27, "grad_norm": 6.452193737030029, "learning_rate": 4.4091026443047655e-06, "loss": 0.2317, "step": 129075 }, { "epoch": 1.27, "grad_norm": 15.168025970458984, "learning_rate": 4.408978521850517e-06, "loss": 0.2923, "step": 129100 }, { "epoch": 1.27, "grad_norm": 8.55765438079834, "learning_rate": 4.408854399396269e-06, "loss": 0.2468, "step": 129125 }, { "epoch": 1.27, "grad_norm": 12.929678916931152, "learning_rate": 4.40873027694202e-06, "loss": 0.2601, "step": 129150 }, { "epoch": 1.27, "grad_norm": 3.7044801712036133, "learning_rate": 4.408606154487772e-06, "loss": 0.1622, "step": 129175 }, { "epoch": 1.27, "grad_norm": 14.374327659606934, "learning_rate": 4.408482032033523e-06, "loss": 0.247, "step": 129200 }, { "epoch": 1.27, "grad_norm": 4.0043206214904785, "learning_rate": 4.408357909579275e-06, "loss": 0.2822, "step": 129225 }, { "epoch": 1.27, "grad_norm": 10.071701049804688, "learning_rate": 4.4082337871250266e-06, "loss": 0.2469, "step": 129250 }, { "epoch": 1.27, "grad_norm": 0.1159510537981987, "learning_rate": 4.408109664670778e-06, "loss": 0.2335, "step": 129275 }, { "epoch": 1.27, "grad_norm": 14.6112642288208, "learning_rate": 4.40798554221653e-06, "loss": 0.2329, "step": 129300 }, { "epoch": 1.27, "grad_norm": 10.050820350646973, "learning_rate": 4.407861419762281e-06, "loss": 0.2504, "step": 129325 }, { "epoch": 1.27, "grad_norm": 13.741178512573242, "learning_rate": 4.407737297308032e-06, "loss": 0.2448, "step": 129350 }, { "epoch": 1.27, "grad_norm": 6.379864692687988, "learning_rate": 4.407613174853784e-06, "loss": 0.257, "step": 129375 }, { "epoch": 1.27, "grad_norm": 8.520694732666016, "learning_rate": 4.4074890523995355e-06, "loss": 0.2322, "step": 129400 }, { "epoch": 1.27, "grad_norm": 5.418385982513428, "learning_rate": 4.407364929945287e-06, "loss": 0.2484, "step": 129425 }, { "epoch": 1.27, "grad_norm": 5.35260009765625, "learning_rate": 4.407240807491039e-06, "loss": 0.222, "step": 129450 }, { "epoch": 1.27, "grad_norm": 1.9630018472671509, "learning_rate": 4.40711668503679e-06, "loss": 0.2425, "step": 129475 }, { "epoch": 1.27, "grad_norm": 13.033796310424805, "learning_rate": 4.406992562582541e-06, "loss": 0.2279, "step": 129500 }, { "epoch": 1.27, "grad_norm": 4.0491557121276855, "learning_rate": 4.406868440128293e-06, "loss": 0.221, "step": 129525 }, { "epoch": 1.27, "grad_norm": 15.327168464660645, "learning_rate": 4.4067443176740445e-06, "loss": 0.2068, "step": 129550 }, { "epoch": 1.27, "grad_norm": 3.6086483001708984, "learning_rate": 4.4066201952197965e-06, "loss": 0.2248, "step": 129575 }, { "epoch": 1.27, "grad_norm": 15.272269248962402, "learning_rate": 4.406496072765548e-06, "loss": 0.2163, "step": 129600 }, { "epoch": 1.27, "grad_norm": 7.470543384552002, "learning_rate": 4.4063719503113e-06, "loss": 0.2555, "step": 129625 }, { "epoch": 1.27, "grad_norm": 17.278432846069336, "learning_rate": 4.406247827857051e-06, "loss": 0.2434, "step": 129650 }, { "epoch": 1.27, "grad_norm": 2.8381590843200684, "learning_rate": 4.406123705402803e-06, "loss": 0.1939, "step": 129675 }, { "epoch": 1.28, "grad_norm": 15.951441764831543, "learning_rate": 4.405999582948554e-06, "loss": 0.2653, "step": 129700 }, { "epoch": 1.28, "grad_norm": 4.978048324584961, "learning_rate": 4.4058754604943055e-06, "loss": 0.1912, "step": 129725 }, { "epoch": 1.28, "grad_norm": 21.273405075073242, "learning_rate": 4.4057513380400576e-06, "loss": 0.2975, "step": 129750 }, { "epoch": 1.28, "grad_norm": 5.560883522033691, "learning_rate": 4.405627215585809e-06, "loss": 0.2538, "step": 129775 }, { "epoch": 1.28, "grad_norm": 18.85722541809082, "learning_rate": 4.40550309313156e-06, "loss": 0.2002, "step": 129800 }, { "epoch": 1.28, "grad_norm": 2.128870725631714, "learning_rate": 4.405378970677312e-06, "loss": 0.2225, "step": 129825 }, { "epoch": 1.28, "grad_norm": 9.942117691040039, "learning_rate": 4.405254848223063e-06, "loss": 0.2063, "step": 129850 }, { "epoch": 1.28, "grad_norm": 0.19985108077526093, "learning_rate": 4.4051307257688145e-06, "loss": 0.2251, "step": 129875 }, { "epoch": 1.28, "grad_norm": 8.096346855163574, "learning_rate": 4.4050066033145665e-06, "loss": 0.2352, "step": 129900 }, { "epoch": 1.28, "grad_norm": 7.682011604309082, "learning_rate": 4.404882480860318e-06, "loss": 0.2085, "step": 129925 }, { "epoch": 1.28, "grad_norm": 12.050222396850586, "learning_rate": 4.404758358406069e-06, "loss": 0.2614, "step": 129950 }, { "epoch": 1.28, "grad_norm": 7.513065338134766, "learning_rate": 4.404634235951821e-06, "loss": 0.2446, "step": 129975 }, { "epoch": 1.28, "grad_norm": 13.849555015563965, "learning_rate": 4.404510113497572e-06, "loss": 0.221, "step": 130000 }, { "epoch": 1.28, "grad_norm": 4.793423175811768, "learning_rate": 4.404385991043324e-06, "loss": 0.2082, "step": 130025 }, { "epoch": 1.28, "grad_norm": 11.600775718688965, "learning_rate": 4.4042618685890755e-06, "loss": 0.2525, "step": 130050 }, { "epoch": 1.28, "grad_norm": 8.373879432678223, "learning_rate": 4.4041377461348276e-06, "loss": 0.2428, "step": 130075 }, { "epoch": 1.28, "grad_norm": 11.980966567993164, "learning_rate": 4.404013623680579e-06, "loss": 0.2423, "step": 130100 }, { "epoch": 1.28, "grad_norm": 6.117869853973389, "learning_rate": 4.40388950122633e-06, "loss": 0.2056, "step": 130125 }, { "epoch": 1.28, "grad_norm": 20.009719848632812, "learning_rate": 4.403765378772082e-06, "loss": 0.2856, "step": 130150 }, { "epoch": 1.28, "grad_norm": 6.836498260498047, "learning_rate": 4.403641256317833e-06, "loss": 0.1972, "step": 130175 }, { "epoch": 1.28, "grad_norm": 15.026937484741211, "learning_rate": 4.4035171338635845e-06, "loss": 0.2198, "step": 130200 }, { "epoch": 1.28, "grad_norm": 4.209665775299072, "learning_rate": 4.4033930114093365e-06, "loss": 0.2377, "step": 130225 }, { "epoch": 1.28, "grad_norm": 9.52161693572998, "learning_rate": 4.403268888955088e-06, "loss": 0.2368, "step": 130250 }, { "epoch": 1.28, "grad_norm": 6.235169887542725, "learning_rate": 4.403144766500839e-06, "loss": 0.2181, "step": 130275 }, { "epoch": 1.28, "grad_norm": 14.005574226379395, "learning_rate": 4.403020644046591e-06, "loss": 0.2501, "step": 130300 }, { "epoch": 1.28, "grad_norm": 8.413484573364258, "learning_rate": 4.402896521592342e-06, "loss": 0.2025, "step": 130325 }, { "epoch": 1.28, "grad_norm": 14.341064453125, "learning_rate": 4.402772399138094e-06, "loss": 0.246, "step": 130350 }, { "epoch": 1.28, "grad_norm": 5.248907566070557, "learning_rate": 4.4026482766838455e-06, "loss": 0.2606, "step": 130375 }, { "epoch": 1.28, "grad_norm": 13.826528549194336, "learning_rate": 4.4025241542295975e-06, "loss": 0.21, "step": 130400 }, { "epoch": 1.28, "grad_norm": 2.160001039505005, "learning_rate": 4.402400031775349e-06, "loss": 0.2608, "step": 130425 }, { "epoch": 1.28, "grad_norm": 14.837275505065918, "learning_rate": 4.4022759093211e-06, "loss": 0.2592, "step": 130450 }, { "epoch": 1.28, "grad_norm": 6.652617931365967, "learning_rate": 4.402151786866852e-06, "loss": 0.1848, "step": 130475 }, { "epoch": 1.28, "grad_norm": 10.542926788330078, "learning_rate": 4.402027664412603e-06, "loss": 0.2495, "step": 130500 }, { "epoch": 1.28, "grad_norm": 2.591198205947876, "learning_rate": 4.401903541958355e-06, "loss": 0.1724, "step": 130525 }, { "epoch": 1.28, "grad_norm": 12.59205150604248, "learning_rate": 4.4017794195041065e-06, "loss": 0.2273, "step": 130550 }, { "epoch": 1.28, "grad_norm": 7.004802227020264, "learning_rate": 4.4016602619480275e-06, "loss": 0.2687, "step": 130575 }, { "epoch": 1.28, "grad_norm": 13.98893928527832, "learning_rate": 4.40153613949378e-06, "loss": 0.2443, "step": 130600 }, { "epoch": 1.28, "grad_norm": 4.593717575073242, "learning_rate": 4.401412017039531e-06, "loss": 0.2044, "step": 130625 }, { "epoch": 1.28, "grad_norm": 12.23222541809082, "learning_rate": 4.401287894585282e-06, "loss": 0.1629, "step": 130650 }, { "epoch": 1.28, "grad_norm": 7.667115211486816, "learning_rate": 4.401163772131034e-06, "loss": 0.2206, "step": 130675 }, { "epoch": 1.29, "grad_norm": 20.160131454467773, "learning_rate": 4.401039649676785e-06, "loss": 0.2487, "step": 130700 }, { "epoch": 1.29, "grad_norm": 3.9499285221099854, "learning_rate": 4.4009155272225365e-06, "loss": 0.2015, "step": 130725 }, { "epoch": 1.29, "grad_norm": 12.215291023254395, "learning_rate": 4.4007914047682885e-06, "loss": 0.187, "step": 130750 }, { "epoch": 1.29, "grad_norm": 7.451835632324219, "learning_rate": 4.40066728231404e-06, "loss": 0.2149, "step": 130775 }, { "epoch": 1.29, "grad_norm": 18.475744247436523, "learning_rate": 4.400543159859792e-06, "loss": 0.2414, "step": 130800 }, { "epoch": 1.29, "grad_norm": 8.148404121398926, "learning_rate": 4.400419037405543e-06, "loss": 0.1989, "step": 130825 }, { "epoch": 1.29, "grad_norm": 14.44548511505127, "learning_rate": 4.400294914951295e-06, "loss": 0.2649, "step": 130850 }, { "epoch": 1.29, "grad_norm": 8.454995155334473, "learning_rate": 4.400170792497046e-06, "loss": 0.2028, "step": 130875 }, { "epoch": 1.29, "grad_norm": 6.48099946975708, "learning_rate": 4.400046670042798e-06, "loss": 0.2293, "step": 130900 }, { "epoch": 1.29, "grad_norm": 6.956261157989502, "learning_rate": 4.3999225475885496e-06, "loss": 0.2093, "step": 130925 }, { "epoch": 1.29, "grad_norm": 14.270389556884766, "learning_rate": 4.399798425134301e-06, "loss": 0.2671, "step": 130950 }, { "epoch": 1.29, "grad_norm": 4.89428186416626, "learning_rate": 4.399674302680053e-06, "loss": 0.2173, "step": 130975 }, { "epoch": 1.29, "grad_norm": 5.869399070739746, "learning_rate": 4.399550180225804e-06, "loss": 0.2083, "step": 131000 }, { "epoch": 1.29, "grad_norm": 6.926473617553711, "learning_rate": 4.399426057771555e-06, "loss": 0.2253, "step": 131025 }, { "epoch": 1.29, "grad_norm": 16.040180206298828, "learning_rate": 4.399301935317307e-06, "loss": 0.292, "step": 131050 }, { "epoch": 1.29, "grad_norm": 4.282521724700928, "learning_rate": 4.3991778128630585e-06, "loss": 0.2525, "step": 131075 }, { "epoch": 1.29, "grad_norm": 25.249250411987305, "learning_rate": 4.39905369040881e-06, "loss": 0.3141, "step": 131100 }, { "epoch": 1.29, "grad_norm": 3.9234442710876465, "learning_rate": 4.398929567954562e-06, "loss": 0.2319, "step": 131125 }, { "epoch": 1.29, "grad_norm": 16.65121841430664, "learning_rate": 4.398805445500313e-06, "loss": 0.2558, "step": 131150 }, { "epoch": 1.29, "grad_norm": 3.9184834957122803, "learning_rate": 4.398681323046064e-06, "loss": 0.1918, "step": 131175 }, { "epoch": 1.29, "grad_norm": 12.915156364440918, "learning_rate": 4.398557200591816e-06, "loss": 0.2223, "step": 131200 }, { "epoch": 1.29, "grad_norm": 3.225156545639038, "learning_rate": 4.3984330781375675e-06, "loss": 0.2013, "step": 131225 }, { "epoch": 1.29, "grad_norm": 10.160148620605469, "learning_rate": 4.3983089556833196e-06, "loss": 0.2773, "step": 131250 }, { "epoch": 1.29, "grad_norm": 4.155196189880371, "learning_rate": 4.398184833229071e-06, "loss": 0.1962, "step": 131275 }, { "epoch": 1.29, "grad_norm": 11.399569511413574, "learning_rate": 4.398060710774823e-06, "loss": 0.2722, "step": 131300 }, { "epoch": 1.29, "grad_norm": 4.60548734664917, "learning_rate": 4.397936588320574e-06, "loss": 0.2084, "step": 131325 }, { "epoch": 1.29, "grad_norm": 11.719099044799805, "learning_rate": 4.397812465866326e-06, "loss": 0.2192, "step": 131350 }, { "epoch": 1.29, "grad_norm": 3.1600189208984375, "learning_rate": 4.397688343412077e-06, "loss": 0.1919, "step": 131375 }, { "epoch": 1.29, "grad_norm": 14.868276596069336, "learning_rate": 4.3975642209578285e-06, "loss": 0.214, "step": 131400 }, { "epoch": 1.29, "grad_norm": 0.49190714955329895, "learning_rate": 4.39744009850358e-06, "loss": 0.2197, "step": 131425 }, { "epoch": 1.29, "grad_norm": 15.2496976852417, "learning_rate": 4.397315976049332e-06, "loss": 0.2257, "step": 131450 }, { "epoch": 1.29, "grad_norm": 2.700468063354492, "learning_rate": 4.397191853595083e-06, "loss": 0.2253, "step": 131475 }, { "epoch": 1.29, "grad_norm": 13.460205078125, "learning_rate": 4.397067731140834e-06, "loss": 0.242, "step": 131500 }, { "epoch": 1.29, "grad_norm": 8.699013710021973, "learning_rate": 4.396943608686586e-06, "loss": 0.2107, "step": 131525 }, { "epoch": 1.29, "grad_norm": 11.420828819274902, "learning_rate": 4.3968194862323375e-06, "loss": 0.2394, "step": 131550 }, { "epoch": 1.29, "grad_norm": 11.675894737243652, "learning_rate": 4.396695363778089e-06, "loss": 0.2455, "step": 131575 }, { "epoch": 1.29, "grad_norm": 17.47262191772461, "learning_rate": 4.396571241323841e-06, "loss": 0.2444, "step": 131600 }, { "epoch": 1.29, "grad_norm": 6.769543647766113, "learning_rate": 4.396447118869592e-06, "loss": 0.207, "step": 131625 }, { "epoch": 1.29, "grad_norm": 18.544458389282227, "learning_rate": 4.396322996415344e-06, "loss": 0.2363, "step": 131650 }, { "epoch": 1.29, "grad_norm": 7.601781845092773, "learning_rate": 4.396198873961095e-06, "loss": 0.2002, "step": 131675 }, { "epoch": 1.29, "grad_norm": 13.319855690002441, "learning_rate": 4.396074751506847e-06, "loss": 0.2877, "step": 131700 }, { "epoch": 1.3, "grad_norm": 3.4305121898651123, "learning_rate": 4.3959506290525985e-06, "loss": 0.1766, "step": 131725 }, { "epoch": 1.3, "grad_norm": 14.259912490844727, "learning_rate": 4.3958265065983506e-06, "loss": 0.239, "step": 131750 }, { "epoch": 1.3, "grad_norm": 7.244787693023682, "learning_rate": 4.395702384144102e-06, "loss": 0.1911, "step": 131775 }, { "epoch": 1.3, "grad_norm": 22.956972122192383, "learning_rate": 4.395578261689853e-06, "loss": 0.2624, "step": 131800 }, { "epoch": 1.3, "grad_norm": 4.647058963775635, "learning_rate": 4.395454139235605e-06, "loss": 0.2239, "step": 131825 }, { "epoch": 1.3, "grad_norm": 16.986295700073242, "learning_rate": 4.395330016781356e-06, "loss": 0.2689, "step": 131850 }, { "epoch": 1.3, "grad_norm": 9.106695175170898, "learning_rate": 4.3952058943271075e-06, "loss": 0.2328, "step": 131875 }, { "epoch": 1.3, "grad_norm": 24.177406311035156, "learning_rate": 4.3950817718728595e-06, "loss": 0.2258, "step": 131900 }, { "epoch": 1.3, "grad_norm": 5.392459392547607, "learning_rate": 4.394957649418611e-06, "loss": 0.2083, "step": 131925 }, { "epoch": 1.3, "grad_norm": 14.158039093017578, "learning_rate": 4.394833526964362e-06, "loss": 0.2161, "step": 131950 }, { "epoch": 1.3, "grad_norm": 7.116237163543701, "learning_rate": 4.394709404510114e-06, "loss": 0.2891, "step": 131975 }, { "epoch": 1.3, "grad_norm": 14.759591102600098, "learning_rate": 4.394585282055865e-06, "loss": 0.2187, "step": 132000 }, { "epoch": 1.3, "grad_norm": 3.6556906700134277, "learning_rate": 4.3944611596016164e-06, "loss": 0.1698, "step": 132025 }, { "epoch": 1.3, "grad_norm": 9.832568168640137, "learning_rate": 4.3943370371473685e-06, "loss": 0.2297, "step": 132050 }, { "epoch": 1.3, "grad_norm": 8.024086952209473, "learning_rate": 4.39421291469312e-06, "loss": 0.2123, "step": 132075 }, { "epoch": 1.3, "grad_norm": 12.09119987487793, "learning_rate": 4.394088792238872e-06, "loss": 0.2134, "step": 132100 }, { "epoch": 1.3, "grad_norm": 9.184427261352539, "learning_rate": 4.393964669784623e-06, "loss": 0.2203, "step": 132125 }, { "epoch": 1.3, "grad_norm": 14.770176887512207, "learning_rate": 4.393840547330375e-06, "loss": 0.2817, "step": 132150 }, { "epoch": 1.3, "grad_norm": 6.668329238891602, "learning_rate": 4.393716424876126e-06, "loss": 0.254, "step": 132175 }, { "epoch": 1.3, "grad_norm": 22.514328002929688, "learning_rate": 4.393592302421878e-06, "loss": 0.2963, "step": 132200 }, { "epoch": 1.3, "grad_norm": 8.47273063659668, "learning_rate": 4.3934681799676295e-06, "loss": 0.2792, "step": 132225 }, { "epoch": 1.3, "grad_norm": 15.112652778625488, "learning_rate": 4.393344057513381e-06, "loss": 0.2553, "step": 132250 }, { "epoch": 1.3, "grad_norm": 4.625419616699219, "learning_rate": 4.393219935059132e-06, "loss": 0.203, "step": 132275 }, { "epoch": 1.3, "grad_norm": 13.242040634155273, "learning_rate": 4.393095812604884e-06, "loss": 0.2259, "step": 132300 }, { "epoch": 1.3, "grad_norm": 14.068391799926758, "learning_rate": 4.392971690150635e-06, "loss": 0.2807, "step": 132325 }, { "epoch": 1.3, "grad_norm": 12.434771537780762, "learning_rate": 4.392847567696386e-06, "loss": 0.259, "step": 132350 }, { "epoch": 1.3, "grad_norm": 0.9074265360832214, "learning_rate": 4.3927234452421385e-06, "loss": 0.2018, "step": 132375 }, { "epoch": 1.3, "grad_norm": 13.277691841125488, "learning_rate": 4.39259932278789e-06, "loss": 0.2643, "step": 132400 }, { "epoch": 1.3, "grad_norm": 5.43746280670166, "learning_rate": 4.392475200333641e-06, "loss": 0.1926, "step": 132425 }, { "epoch": 1.3, "grad_norm": 13.237055778503418, "learning_rate": 4.392351077879393e-06, "loss": 0.2039, "step": 132450 }, { "epoch": 1.3, "grad_norm": 4.717223167419434, "learning_rate": 4.392226955425144e-06, "loss": 0.1899, "step": 132475 }, { "epoch": 1.3, "grad_norm": 14.688694953918457, "learning_rate": 4.392102832970896e-06, "loss": 0.231, "step": 132500 }, { "epoch": 1.3, "grad_norm": 6.062863826751709, "learning_rate": 4.3919787105166474e-06, "loss": 0.2479, "step": 132525 }, { "epoch": 1.3, "grad_norm": 9.518253326416016, "learning_rate": 4.3918545880623995e-06, "loss": 0.2053, "step": 132550 }, { "epoch": 1.3, "grad_norm": 8.588841438293457, "learning_rate": 4.391730465608151e-06, "loss": 0.2305, "step": 132575 }, { "epoch": 1.3, "grad_norm": 16.03582000732422, "learning_rate": 4.391606343153903e-06, "loss": 0.3186, "step": 132600 }, { "epoch": 1.3, "grad_norm": 2.2862565517425537, "learning_rate": 4.391482220699654e-06, "loss": 0.2554, "step": 132625 }, { "epoch": 1.3, "grad_norm": 12.058419227600098, "learning_rate": 4.391358098245405e-06, "loss": 0.2694, "step": 132650 }, { "epoch": 1.3, "grad_norm": 1.3860520124435425, "learning_rate": 4.391233975791157e-06, "loss": 0.2348, "step": 132675 }, { "epoch": 1.3, "grad_norm": 14.057984352111816, "learning_rate": 4.3911098533369085e-06, "loss": 0.2078, "step": 132700 }, { "epoch": 1.3, "grad_norm": 7.637643337249756, "learning_rate": 4.39098573088266e-06, "loss": 0.2003, "step": 132725 }, { "epoch": 1.31, "grad_norm": 15.100687026977539, "learning_rate": 4.390861608428412e-06, "loss": 0.2546, "step": 132750 }, { "epoch": 1.31, "grad_norm": 9.353574752807617, "learning_rate": 4.390737485974163e-06, "loss": 0.1972, "step": 132775 }, { "epoch": 1.31, "grad_norm": 8.756891250610352, "learning_rate": 4.390613363519914e-06, "loss": 0.2349, "step": 132800 }, { "epoch": 1.31, "grad_norm": 4.390960693359375, "learning_rate": 4.390489241065666e-06, "loss": 0.2456, "step": 132825 }, { "epoch": 1.31, "grad_norm": 14.16718864440918, "learning_rate": 4.390365118611417e-06, "loss": 0.2468, "step": 132850 }, { "epoch": 1.31, "grad_norm": 3.9595210552215576, "learning_rate": 4.3902459610553384e-06, "loss": 0.1814, "step": 132875 }, { "epoch": 1.31, "grad_norm": 16.609188079833984, "learning_rate": 4.3901218386010905e-06, "loss": 0.2608, "step": 132900 }, { "epoch": 1.31, "grad_norm": 2.3250701427459717, "learning_rate": 4.389997716146842e-06, "loss": 0.2899, "step": 132925 }, { "epoch": 1.31, "grad_norm": 14.171709060668945, "learning_rate": 4.389873593692594e-06, "loss": 0.225, "step": 132950 }, { "epoch": 1.31, "grad_norm": 6.870080471038818, "learning_rate": 4.389749471238345e-06, "loss": 0.204, "step": 132975 }, { "epoch": 1.31, "grad_norm": 22.43378448486328, "learning_rate": 4.389625348784097e-06, "loss": 0.2886, "step": 133000 }, { "epoch": 1.31, "grad_norm": 5.840813636779785, "learning_rate": 4.389501226329848e-06, "loss": 0.2312, "step": 133025 }, { "epoch": 1.31, "grad_norm": 15.97537899017334, "learning_rate": 4.3893771038756e-06, "loss": 0.2343, "step": 133050 }, { "epoch": 1.31, "grad_norm": 7.075302600860596, "learning_rate": 4.3892529814213515e-06, "loss": 0.2417, "step": 133075 }, { "epoch": 1.31, "grad_norm": 16.025774002075195, "learning_rate": 4.389128858967103e-06, "loss": 0.2453, "step": 133100 }, { "epoch": 1.31, "grad_norm": 6.897862911224365, "learning_rate": 4.389004736512855e-06, "loss": 0.2387, "step": 133125 }, { "epoch": 1.31, "grad_norm": 14.377415657043457, "learning_rate": 4.388880614058606e-06, "loss": 0.215, "step": 133150 }, { "epoch": 1.31, "grad_norm": 5.38767147064209, "learning_rate": 4.388756491604357e-06, "loss": 0.2341, "step": 133175 }, { "epoch": 1.31, "grad_norm": 7.564655780792236, "learning_rate": 4.388632369150109e-06, "loss": 0.2242, "step": 133200 }, { "epoch": 1.31, "grad_norm": 5.011399269104004, "learning_rate": 4.3885082466958605e-06, "loss": 0.2261, "step": 133225 }, { "epoch": 1.31, "grad_norm": 13.630640983581543, "learning_rate": 4.388384124241612e-06, "loss": 0.3147, "step": 133250 }, { "epoch": 1.31, "grad_norm": 9.194714546203613, "learning_rate": 4.388260001787364e-06, "loss": 0.2483, "step": 133275 }, { "epoch": 1.31, "grad_norm": 15.49658489227295, "learning_rate": 4.388135879333115e-06, "loss": 0.2628, "step": 133300 }, { "epoch": 1.31, "grad_norm": 0.9957780838012695, "learning_rate": 4.388011756878866e-06, "loss": 0.2816, "step": 133325 }, { "epoch": 1.31, "grad_norm": 8.74513053894043, "learning_rate": 4.387887634424618e-06, "loss": 0.2175, "step": 133350 }, { "epoch": 1.31, "grad_norm": 3.886078357696533, "learning_rate": 4.3877635119703694e-06, "loss": 0.177, "step": 133375 }, { "epoch": 1.31, "grad_norm": 20.10130500793457, "learning_rate": 4.3876393895161215e-06, "loss": 0.2207, "step": 133400 }, { "epoch": 1.31, "grad_norm": 8.689667701721191, "learning_rate": 4.387515267061873e-06, "loss": 0.2137, "step": 133425 }, { "epoch": 1.31, "grad_norm": 18.434967041015625, "learning_rate": 4.387391144607625e-06, "loss": 0.2544, "step": 133450 }, { "epoch": 1.31, "grad_norm": 4.435853481292725, "learning_rate": 4.387267022153376e-06, "loss": 0.1932, "step": 133475 }, { "epoch": 1.31, "grad_norm": 13.957906723022461, "learning_rate": 4.387142899699128e-06, "loss": 0.2191, "step": 133500 }, { "epoch": 1.31, "grad_norm": 1.6208000183105469, "learning_rate": 4.387018777244879e-06, "loss": 0.2164, "step": 133525 }, { "epoch": 1.31, "grad_norm": 15.386796951293945, "learning_rate": 4.3868946547906305e-06, "loss": 0.2298, "step": 133550 }, { "epoch": 1.31, "grad_norm": 2.50093412399292, "learning_rate": 4.3867705323363825e-06, "loss": 0.2572, "step": 133575 }, { "epoch": 1.31, "grad_norm": 8.521809577941895, "learning_rate": 4.386646409882134e-06, "loss": 0.2416, "step": 133600 }, { "epoch": 1.31, "grad_norm": 6.29603910446167, "learning_rate": 4.386522287427885e-06, "loss": 0.1942, "step": 133625 }, { "epoch": 1.31, "grad_norm": 15.680240631103516, "learning_rate": 4.386398164973637e-06, "loss": 0.2611, "step": 133650 }, { "epoch": 1.31, "grad_norm": 4.386072635650635, "learning_rate": 4.386274042519388e-06, "loss": 0.2114, "step": 133675 }, { "epoch": 1.31, "grad_norm": 10.868165016174316, "learning_rate": 4.3861499200651394e-06, "loss": 0.2479, "step": 133700 }, { "epoch": 1.31, "grad_norm": 4.770082950592041, "learning_rate": 4.3860257976108915e-06, "loss": 0.2253, "step": 133725 }, { "epoch": 1.32, "grad_norm": 14.621954917907715, "learning_rate": 4.385901675156643e-06, "loss": 0.2298, "step": 133750 }, { "epoch": 1.32, "grad_norm": 3.902562379837036, "learning_rate": 4.385777552702395e-06, "loss": 0.2068, "step": 133775 }, { "epoch": 1.32, "grad_norm": 12.162396430969238, "learning_rate": 4.385653430248146e-06, "loss": 0.2623, "step": 133800 }, { "epoch": 1.32, "grad_norm": 5.772877216339111, "learning_rate": 4.385529307793898e-06, "loss": 0.2474, "step": 133825 }, { "epoch": 1.32, "grad_norm": 14.397503852844238, "learning_rate": 4.385405185339649e-06, "loss": 0.2114, "step": 133850 }, { "epoch": 1.32, "grad_norm": 5.1899919509887695, "learning_rate": 4.3852810628854005e-06, "loss": 0.2032, "step": 133875 }, { "epoch": 1.32, "grad_norm": 13.400806427001953, "learning_rate": 4.3851569404311525e-06, "loss": 0.2527, "step": 133900 }, { "epoch": 1.32, "grad_norm": 9.6448392868042, "learning_rate": 4.385032817976904e-06, "loss": 0.2249, "step": 133925 }, { "epoch": 1.32, "grad_norm": 11.564506530761719, "learning_rate": 4.384908695522655e-06, "loss": 0.2278, "step": 133950 }, { "epoch": 1.32, "grad_norm": 10.183361053466797, "learning_rate": 4.384784573068407e-06, "loss": 0.2116, "step": 133975 }, { "epoch": 1.32, "grad_norm": 13.049195289611816, "learning_rate": 4.384660450614158e-06, "loss": 0.2268, "step": 134000 }, { "epoch": 1.32, "grad_norm": 5.990405559539795, "learning_rate": 4.384536328159909e-06, "loss": 0.2163, "step": 134025 }, { "epoch": 1.32, "grad_norm": 20.161531448364258, "learning_rate": 4.3844122057056615e-06, "loss": 0.2262, "step": 134050 }, { "epoch": 1.32, "grad_norm": 5.240345478057861, "learning_rate": 4.384288083251413e-06, "loss": 0.2304, "step": 134075 }, { "epoch": 1.32, "grad_norm": 12.578239440917969, "learning_rate": 4.384163960797164e-06, "loss": 0.2351, "step": 134100 }, { "epoch": 1.32, "grad_norm": 6.356546401977539, "learning_rate": 4.384039838342916e-06, "loss": 0.2012, "step": 134125 }, { "epoch": 1.32, "grad_norm": 13.822425842285156, "learning_rate": 4.383915715888667e-06, "loss": 0.2149, "step": 134150 }, { "epoch": 1.32, "grad_norm": 5.209866046905518, "learning_rate": 4.383791593434419e-06, "loss": 0.2209, "step": 134175 }, { "epoch": 1.32, "grad_norm": 13.889056205749512, "learning_rate": 4.3836674709801704e-06, "loss": 0.23, "step": 134200 }, { "epoch": 1.32, "grad_norm": 5.98256778717041, "learning_rate": 4.3835433485259225e-06, "loss": 0.2319, "step": 134225 }, { "epoch": 1.32, "grad_norm": 19.56329345703125, "learning_rate": 4.383419226071674e-06, "loss": 0.2784, "step": 134250 }, { "epoch": 1.32, "grad_norm": 0.5864983201026917, "learning_rate": 4.383295103617426e-06, "loss": 0.2148, "step": 134275 }, { "epoch": 1.32, "grad_norm": 13.969012260437012, "learning_rate": 4.383170981163177e-06, "loss": 0.235, "step": 134300 }, { "epoch": 1.32, "grad_norm": 0.6568583846092224, "learning_rate": 4.383046858708928e-06, "loss": 0.2325, "step": 134325 }, { "epoch": 1.32, "grad_norm": 11.918615341186523, "learning_rate": 4.38292273625468e-06, "loss": 0.2345, "step": 134350 }, { "epoch": 1.32, "grad_norm": 2.767050266265869, "learning_rate": 4.3827986138004315e-06, "loss": 0.2373, "step": 134375 }, { "epoch": 1.32, "grad_norm": 11.039642333984375, "learning_rate": 4.382674491346183e-06, "loss": 0.2346, "step": 134400 }, { "epoch": 1.32, "grad_norm": 8.047327041625977, "learning_rate": 4.382550368891935e-06, "loss": 0.2045, "step": 134425 }, { "epoch": 1.32, "grad_norm": 7.87130069732666, "learning_rate": 4.382426246437686e-06, "loss": 0.1834, "step": 134450 }, { "epoch": 1.32, "grad_norm": 6.545335292816162, "learning_rate": 4.382302123983437e-06, "loss": 0.2508, "step": 134475 }, { "epoch": 1.32, "grad_norm": 13.530345916748047, "learning_rate": 4.382178001529189e-06, "loss": 0.2354, "step": 134500 }, { "epoch": 1.32, "grad_norm": 7.7714104652404785, "learning_rate": 4.38205387907494e-06, "loss": 0.1802, "step": 134525 }, { "epoch": 1.32, "grad_norm": 14.950579643249512, "learning_rate": 4.381929756620692e-06, "loss": 0.2332, "step": 134550 }, { "epoch": 1.32, "grad_norm": 2.9633116722106934, "learning_rate": 4.381805634166444e-06, "loss": 0.2326, "step": 134575 }, { "epoch": 1.32, "grad_norm": 18.411678314208984, "learning_rate": 4.381681511712195e-06, "loss": 0.2355, "step": 134600 }, { "epoch": 1.32, "grad_norm": 3.3499553203582764, "learning_rate": 4.381557389257947e-06, "loss": 0.2287, "step": 134625 }, { "epoch": 1.32, "grad_norm": 10.14608383178711, "learning_rate": 4.381433266803698e-06, "loss": 0.2156, "step": 134650 }, { "epoch": 1.32, "grad_norm": 8.358572006225586, "learning_rate": 4.38130914434945e-06, "loss": 0.2507, "step": 134675 }, { "epoch": 1.32, "grad_norm": 14.838190078735352, "learning_rate": 4.3811850218952014e-06, "loss": 0.2064, "step": 134700 }, { "epoch": 1.32, "grad_norm": 6.018238544464111, "learning_rate": 4.381060899440953e-06, "loss": 0.2247, "step": 134725 }, { "epoch": 1.32, "grad_norm": 10.37923526763916, "learning_rate": 4.380936776986705e-06, "loss": 0.2125, "step": 134750 }, { "epoch": 1.33, "grad_norm": 3.1636221408843994, "learning_rate": 4.380812654532456e-06, "loss": 0.216, "step": 134775 }, { "epoch": 1.33, "grad_norm": 8.342116355895996, "learning_rate": 4.380688532078207e-06, "loss": 0.2581, "step": 134800 }, { "epoch": 1.33, "grad_norm": 7.00844669342041, "learning_rate": 4.380564409623959e-06, "loss": 0.2514, "step": 134825 }, { "epoch": 1.33, "grad_norm": 17.903188705444336, "learning_rate": 4.38044028716971e-06, "loss": 0.3042, "step": 134850 }, { "epoch": 1.33, "grad_norm": 4.139457702636719, "learning_rate": 4.380316164715462e-06, "loss": 0.2073, "step": 134875 }, { "epoch": 1.33, "grad_norm": 15.251808166503906, "learning_rate": 4.380192042261214e-06, "loss": 0.2898, "step": 134900 }, { "epoch": 1.33, "grad_norm": 9.017580032348633, "learning_rate": 4.380067919806965e-06, "loss": 0.2174, "step": 134925 }, { "epoch": 1.33, "grad_norm": 20.092620849609375, "learning_rate": 4.379943797352716e-06, "loss": 0.255, "step": 134950 }, { "epoch": 1.33, "grad_norm": 11.469436645507812, "learning_rate": 4.379819674898468e-06, "loss": 0.2601, "step": 134975 }, { "epoch": 1.33, "grad_norm": 11.55288314819336, "learning_rate": 4.379695552444219e-06, "loss": 0.2406, "step": 135000 }, { "epoch": 1.33, "grad_norm": 6.047355651855469, "learning_rate": 4.3795714299899714e-06, "loss": 0.2031, "step": 135025 }, { "epoch": 1.33, "grad_norm": 8.768141746520996, "learning_rate": 4.379447307535723e-06, "loss": 0.2205, "step": 135050 }, { "epoch": 1.33, "grad_norm": 8.408604621887207, "learning_rate": 4.379323185081475e-06, "loss": 0.2387, "step": 135075 }, { "epoch": 1.33, "grad_norm": 16.9825496673584, "learning_rate": 4.379204027525396e-06, "loss": 0.238, "step": 135100 }, { "epoch": 1.33, "grad_norm": 3.6977322101593018, "learning_rate": 4.379079905071148e-06, "loss": 0.2691, "step": 135125 }, { "epoch": 1.33, "grad_norm": 18.796632766723633, "learning_rate": 4.378955782616899e-06, "loss": 0.242, "step": 135150 }, { "epoch": 1.33, "grad_norm": 2.636837959289551, "learning_rate": 4.37883166016265e-06, "loss": 0.2182, "step": 135175 }, { "epoch": 1.33, "grad_norm": 14.63265323638916, "learning_rate": 4.378707537708402e-06, "loss": 0.2766, "step": 135200 }, { "epoch": 1.33, "grad_norm": 7.675684452056885, "learning_rate": 4.3785834152541535e-06, "loss": 0.2463, "step": 135225 }, { "epoch": 1.33, "grad_norm": 8.090023040771484, "learning_rate": 4.378459292799905e-06, "loss": 0.2199, "step": 135250 }, { "epoch": 1.33, "grad_norm": 4.67706298828125, "learning_rate": 4.378335170345657e-06, "loss": 0.1729, "step": 135275 }, { "epoch": 1.33, "grad_norm": 12.770975112915039, "learning_rate": 4.378211047891408e-06, "loss": 0.227, "step": 135300 }, { "epoch": 1.33, "grad_norm": 5.522381782531738, "learning_rate": 4.378086925437159e-06, "loss": 0.2221, "step": 135325 }, { "epoch": 1.33, "grad_norm": 16.02486801147461, "learning_rate": 4.377962802982911e-06, "loss": 0.2518, "step": 135350 }, { "epoch": 1.33, "grad_norm": 6.284611225128174, "learning_rate": 4.3778386805286624e-06, "loss": 0.1721, "step": 135375 }, { "epoch": 1.33, "grad_norm": 14.617850303649902, "learning_rate": 4.377714558074414e-06, "loss": 0.1885, "step": 135400 }, { "epoch": 1.33, "grad_norm": 5.1038126945495605, "learning_rate": 4.377590435620166e-06, "loss": 0.2161, "step": 135425 }, { "epoch": 1.33, "grad_norm": 12.858475685119629, "learning_rate": 4.377466313165917e-06, "loss": 0.255, "step": 135450 }, { "epoch": 1.33, "grad_norm": 8.597003936767578, "learning_rate": 4.377342190711669e-06, "loss": 0.2137, "step": 135475 }, { "epoch": 1.33, "grad_norm": 14.142426490783691, "learning_rate": 4.37721806825742e-06, "loss": 0.2206, "step": 135500 }, { "epoch": 1.33, "grad_norm": 5.256147384643555, "learning_rate": 4.377093945803172e-06, "loss": 0.2213, "step": 135525 }, { "epoch": 1.33, "grad_norm": 11.69968032836914, "learning_rate": 4.3769698233489235e-06, "loss": 0.2514, "step": 135550 }, { "epoch": 1.33, "grad_norm": 5.483299255371094, "learning_rate": 4.3768457008946755e-06, "loss": 0.2419, "step": 135575 }, { "epoch": 1.33, "grad_norm": 14.50622844696045, "learning_rate": 4.376721578440427e-06, "loss": 0.2673, "step": 135600 }, { "epoch": 1.33, "grad_norm": 8.224284172058105, "learning_rate": 4.376597455986178e-06, "loss": 0.1944, "step": 135625 }, { "epoch": 1.33, "grad_norm": 15.361298561096191, "learning_rate": 4.37647333353193e-06, "loss": 0.2212, "step": 135650 }, { "epoch": 1.33, "grad_norm": 6.183015823364258, "learning_rate": 4.376349211077681e-06, "loss": 0.1995, "step": 135675 }, { "epoch": 1.33, "grad_norm": 8.773083686828613, "learning_rate": 4.3762250886234324e-06, "loss": 0.2404, "step": 135700 }, { "epoch": 1.33, "grad_norm": 2.6869304180145264, "learning_rate": 4.3761009661691845e-06, "loss": 0.2568, "step": 135725 }, { "epoch": 1.33, "grad_norm": 15.997157096862793, "learning_rate": 4.375976843714936e-06, "loss": 0.2401, "step": 135750 }, { "epoch": 1.33, "grad_norm": 5.667858123779297, "learning_rate": 4.375852721260687e-06, "loss": 0.2014, "step": 135775 }, { "epoch": 1.34, "grad_norm": 15.43194580078125, "learning_rate": 4.375728598806439e-06, "loss": 0.2212, "step": 135800 }, { "epoch": 1.34, "grad_norm": 5.354394912719727, "learning_rate": 4.37560447635219e-06, "loss": 0.2054, "step": 135825 }, { "epoch": 1.34, "grad_norm": 4.723624229431152, "learning_rate": 4.375480353897941e-06, "loss": 0.1917, "step": 135850 }, { "epoch": 1.34, "grad_norm": 3.0471854209899902, "learning_rate": 4.3753562314436934e-06, "loss": 0.2653, "step": 135875 }, { "epoch": 1.34, "grad_norm": 13.25086784362793, "learning_rate": 4.375232108989445e-06, "loss": 0.2531, "step": 135900 }, { "epoch": 1.34, "grad_norm": 6.335779190063477, "learning_rate": 4.375107986535197e-06, "loss": 0.2556, "step": 135925 }, { "epoch": 1.34, "grad_norm": 15.736489295959473, "learning_rate": 4.374983864080948e-06, "loss": 0.2316, "step": 135950 }, { "epoch": 1.34, "grad_norm": 9.108832359313965, "learning_rate": 4.3748597416267e-06, "loss": 0.1986, "step": 135975 }, { "epoch": 1.34, "grad_norm": 15.253175735473633, "learning_rate": 4.374735619172451e-06, "loss": 0.2199, "step": 136000 }, { "epoch": 1.34, "grad_norm": 4.877544403076172, "learning_rate": 4.374611496718202e-06, "loss": 0.236, "step": 136025 }, { "epoch": 1.34, "grad_norm": 15.260011672973633, "learning_rate": 4.3744873742639545e-06, "loss": 0.1869, "step": 136050 }, { "epoch": 1.34, "grad_norm": 4.393813610076904, "learning_rate": 4.374363251809706e-06, "loss": 0.2344, "step": 136075 }, { "epoch": 1.34, "grad_norm": 19.046142578125, "learning_rate": 4.374239129355457e-06, "loss": 0.2626, "step": 136100 }, { "epoch": 1.34, "grad_norm": 9.8099365234375, "learning_rate": 4.374115006901209e-06, "loss": 0.2269, "step": 136125 }, { "epoch": 1.34, "grad_norm": 15.00665283203125, "learning_rate": 4.37399088444696e-06, "loss": 0.2086, "step": 136150 }, { "epoch": 1.34, "grad_norm": 3.305863857269287, "learning_rate": 4.373866761992711e-06, "loss": 0.2393, "step": 136175 }, { "epoch": 1.34, "grad_norm": 15.101840019226074, "learning_rate": 4.3737426395384634e-06, "loss": 0.2037, "step": 136200 }, { "epoch": 1.34, "grad_norm": 8.360380172729492, "learning_rate": 4.373618517084215e-06, "loss": 0.2462, "step": 136225 }, { "epoch": 1.34, "grad_norm": 9.271402359008789, "learning_rate": 4.373494394629967e-06, "loss": 0.2075, "step": 136250 }, { "epoch": 1.34, "grad_norm": 9.371285438537598, "learning_rate": 4.373370272175718e-06, "loss": 0.2131, "step": 136275 }, { "epoch": 1.34, "grad_norm": 11.155731201171875, "learning_rate": 4.37324614972147e-06, "loss": 0.2593, "step": 136300 }, { "epoch": 1.34, "grad_norm": 7.007360458374023, "learning_rate": 4.373122027267221e-06, "loss": 0.1759, "step": 136325 }, { "epoch": 1.34, "grad_norm": 18.154041290283203, "learning_rate": 4.372997904812973e-06, "loss": 0.2767, "step": 136350 }, { "epoch": 1.34, "grad_norm": 6.030117511749268, "learning_rate": 4.3728737823587245e-06, "loss": 0.232, "step": 136375 }, { "epoch": 1.34, "grad_norm": 15.91983413696289, "learning_rate": 4.372749659904476e-06, "loss": 0.2185, "step": 136400 }, { "epoch": 1.34, "grad_norm": 8.368706703186035, "learning_rate": 4.372625537450228e-06, "loss": 0.2264, "step": 136425 }, { "epoch": 1.34, "grad_norm": 13.042561531066895, "learning_rate": 4.372501414995979e-06, "loss": 0.3013, "step": 136450 }, { "epoch": 1.34, "grad_norm": 4.343691825866699, "learning_rate": 4.37237729254173e-06, "loss": 0.1653, "step": 136475 }, { "epoch": 1.34, "grad_norm": 7.3859148025512695, "learning_rate": 4.372253170087482e-06, "loss": 0.2674, "step": 136500 }, { "epoch": 1.34, "grad_norm": 1.5962241888046265, "learning_rate": 4.372129047633233e-06, "loss": 0.22, "step": 136525 }, { "epoch": 1.34, "grad_norm": 13.461058616638184, "learning_rate": 4.372004925178985e-06, "loss": 0.2184, "step": 136550 }, { "epoch": 1.34, "grad_norm": 5.191440105438232, "learning_rate": 4.371880802724737e-06, "loss": 0.1962, "step": 136575 }, { "epoch": 1.34, "grad_norm": 11.496091842651367, "learning_rate": 4.371756680270488e-06, "loss": 0.287, "step": 136600 }, { "epoch": 1.34, "grad_norm": 4.884710788726807, "learning_rate": 4.371632557816239e-06, "loss": 0.2678, "step": 136625 }, { "epoch": 1.34, "grad_norm": 6.973674774169922, "learning_rate": 4.371508435361991e-06, "loss": 0.1841, "step": 136650 }, { "epoch": 1.34, "grad_norm": 7.672122001647949, "learning_rate": 4.371384312907742e-06, "loss": 0.2354, "step": 136675 }, { "epoch": 1.34, "grad_norm": 16.436033248901367, "learning_rate": 4.3712601904534944e-06, "loss": 0.3111, "step": 136700 }, { "epoch": 1.34, "grad_norm": 6.157918453216553, "learning_rate": 4.371136067999246e-06, "loss": 0.1775, "step": 136725 }, { "epoch": 1.34, "grad_norm": 14.71729850769043, "learning_rate": 4.371011945544998e-06, "loss": 0.2032, "step": 136750 }, { "epoch": 1.34, "grad_norm": 4.8241729736328125, "learning_rate": 4.370887823090749e-06, "loss": 0.2151, "step": 136775 }, { "epoch": 1.35, "grad_norm": 19.908126831054688, "learning_rate": 4.370763700636501e-06, "loss": 0.2315, "step": 136800 }, { "epoch": 1.35, "grad_norm": 7.814438819885254, "learning_rate": 4.370639578182252e-06, "loss": 0.2427, "step": 136825 }, { "epoch": 1.35, "grad_norm": 14.876625061035156, "learning_rate": 4.370515455728003e-06, "loss": 0.2238, "step": 136850 }, { "epoch": 1.35, "grad_norm": 4.745203018188477, "learning_rate": 4.370391333273755e-06, "loss": 0.2278, "step": 136875 }, { "epoch": 1.35, "grad_norm": 11.212532043457031, "learning_rate": 4.370267210819507e-06, "loss": 0.2386, "step": 136900 }, { "epoch": 1.35, "grad_norm": 4.290109634399414, "learning_rate": 4.370143088365258e-06, "loss": 0.2501, "step": 136925 }, { "epoch": 1.35, "grad_norm": 11.665539741516113, "learning_rate": 4.370018965911009e-06, "loss": 0.2162, "step": 136950 }, { "epoch": 1.35, "grad_norm": 9.33578109741211, "learning_rate": 4.369894843456761e-06, "loss": 0.2248, "step": 136975 }, { "epoch": 1.35, "grad_norm": 14.600189208984375, "learning_rate": 4.369770721002512e-06, "loss": 0.1939, "step": 137000 }, { "epoch": 1.35, "grad_norm": 2.492662191390991, "learning_rate": 4.3696465985482636e-06, "loss": 0.1807, "step": 137025 }, { "epoch": 1.35, "grad_norm": 12.79051685333252, "learning_rate": 4.369522476094016e-06, "loss": 0.3292, "step": 137050 }, { "epoch": 1.35, "grad_norm": 5.241563320159912, "learning_rate": 4.369398353639767e-06, "loss": 0.2098, "step": 137075 }, { "epoch": 1.35, "grad_norm": 14.974899291992188, "learning_rate": 4.369274231185519e-06, "loss": 0.2433, "step": 137100 }, { "epoch": 1.35, "grad_norm": 2.2730462551116943, "learning_rate": 4.36915010873127e-06, "loss": 0.2168, "step": 137125 }, { "epoch": 1.35, "grad_norm": 9.539167404174805, "learning_rate": 4.369025986277022e-06, "loss": 0.2139, "step": 137150 }, { "epoch": 1.35, "grad_norm": 4.206154823303223, "learning_rate": 4.368901863822773e-06, "loss": 0.1954, "step": 137175 }, { "epoch": 1.35, "grad_norm": 7.274220943450928, "learning_rate": 4.3687777413685254e-06, "loss": 0.2077, "step": 137200 }, { "epoch": 1.35, "grad_norm": 4.098337650299072, "learning_rate": 4.368653618914277e-06, "loss": 0.2294, "step": 137225 }, { "epoch": 1.35, "grad_norm": 14.536261558532715, "learning_rate": 4.368529496460028e-06, "loss": 0.2614, "step": 137250 }, { "epoch": 1.35, "grad_norm": 6.853871822357178, "learning_rate": 4.36841033890395e-06, "loss": 0.215, "step": 137275 }, { "epoch": 1.35, "grad_norm": 9.726449012756348, "learning_rate": 4.368286216449701e-06, "loss": 0.1929, "step": 137300 }, { "epoch": 1.35, "grad_norm": 5.4687652587890625, "learning_rate": 4.368162093995453e-06, "loss": 0.1812, "step": 137325 }, { "epoch": 1.35, "grad_norm": 12.734696388244629, "learning_rate": 4.368037971541204e-06, "loss": 0.2188, "step": 137350 }, { "epoch": 1.35, "grad_norm": 2.1657536029815674, "learning_rate": 4.3679138490869554e-06, "loss": 0.2497, "step": 137375 }, { "epoch": 1.35, "grad_norm": 15.826961517333984, "learning_rate": 4.3677897266327075e-06, "loss": 0.2692, "step": 137400 }, { "epoch": 1.35, "grad_norm": 3.0168240070343018, "learning_rate": 4.367665604178459e-06, "loss": 0.2186, "step": 137425 }, { "epoch": 1.35, "grad_norm": 13.067567825317383, "learning_rate": 4.36754148172421e-06, "loss": 0.2485, "step": 137450 }, { "epoch": 1.35, "grad_norm": 2.775078535079956, "learning_rate": 4.367417359269961e-06, "loss": 0.2004, "step": 137475 }, { "epoch": 1.35, "grad_norm": 11.519488334655762, "learning_rate": 4.367293236815713e-06, "loss": 0.2184, "step": 137500 }, { "epoch": 1.35, "grad_norm": 4.342960834503174, "learning_rate": 4.367169114361464e-06, "loss": 0.2135, "step": 137525 }, { "epoch": 1.35, "grad_norm": 19.659435272216797, "learning_rate": 4.3670449919072165e-06, "loss": 0.2586, "step": 137550 }, { "epoch": 1.35, "grad_norm": 6.0836076736450195, "learning_rate": 4.366920869452968e-06, "loss": 0.2442, "step": 137575 }, { "epoch": 1.35, "grad_norm": 16.548160552978516, "learning_rate": 4.36679674699872e-06, "loss": 0.252, "step": 137600 }, { "epoch": 1.35, "grad_norm": 6.411392688751221, "learning_rate": 4.366672624544471e-06, "loss": 0.2683, "step": 137625 }, { "epoch": 1.35, "grad_norm": 11.101428031921387, "learning_rate": 4.366548502090223e-06, "loss": 0.2536, "step": 137650 }, { "epoch": 1.35, "grad_norm": 6.775595664978027, "learning_rate": 4.366424379635974e-06, "loss": 0.1994, "step": 137675 }, { "epoch": 1.35, "grad_norm": 14.608951568603516, "learning_rate": 4.366300257181725e-06, "loss": 0.2836, "step": 137700 }, { "epoch": 1.35, "grad_norm": 5.7470293045043945, "learning_rate": 4.3661761347274775e-06, "loss": 0.2005, "step": 137725 }, { "epoch": 1.35, "grad_norm": 10.834452629089355, "learning_rate": 4.366052012273229e-06, "loss": 0.2747, "step": 137750 }, { "epoch": 1.35, "grad_norm": 3.504976987838745, "learning_rate": 4.36592788981898e-06, "loss": 0.233, "step": 137775 }, { "epoch": 1.35, "grad_norm": 15.641592025756836, "learning_rate": 4.365803767364732e-06, "loss": 0.2401, "step": 137800 }, { "epoch": 1.36, "grad_norm": 6.620126724243164, "learning_rate": 4.365679644910483e-06, "loss": 0.216, "step": 137825 }, { "epoch": 1.36, "grad_norm": 10.896647453308105, "learning_rate": 4.365555522456234e-06, "loss": 0.2064, "step": 137850 }, { "epoch": 1.36, "grad_norm": 7.3477325439453125, "learning_rate": 4.3654314000019864e-06, "loss": 0.2339, "step": 137875 }, { "epoch": 1.36, "grad_norm": 24.121625900268555, "learning_rate": 4.365307277547738e-06, "loss": 0.2576, "step": 137900 }, { "epoch": 1.36, "grad_norm": 6.080133438110352, "learning_rate": 4.365183155093489e-06, "loss": 0.2596, "step": 137925 }, { "epoch": 1.36, "grad_norm": 13.66464900970459, "learning_rate": 4.365059032639241e-06, "loss": 0.2715, "step": 137950 }, { "epoch": 1.36, "grad_norm": 9.067763328552246, "learning_rate": 4.364934910184992e-06, "loss": 0.2225, "step": 137975 }, { "epoch": 1.36, "grad_norm": 12.577164649963379, "learning_rate": 4.364810787730744e-06, "loss": 0.2344, "step": 138000 }, { "epoch": 1.36, "grad_norm": 6.506419658660889, "learning_rate": 4.364686665276495e-06, "loss": 0.2232, "step": 138025 }, { "epoch": 1.36, "grad_norm": 13.287131309509277, "learning_rate": 4.3645625428222475e-06, "loss": 0.2444, "step": 138050 }, { "epoch": 1.36, "grad_norm": 8.1784029006958, "learning_rate": 4.364438420367999e-06, "loss": 0.1991, "step": 138075 }, { "epoch": 1.36, "grad_norm": 14.636743545532227, "learning_rate": 4.364314297913751e-06, "loss": 0.2783, "step": 138100 }, { "epoch": 1.36, "grad_norm": 3.313403367996216, "learning_rate": 4.364190175459502e-06, "loss": 0.1874, "step": 138125 }, { "epoch": 1.36, "grad_norm": 12.037093162536621, "learning_rate": 4.364066053005253e-06, "loss": 0.2419, "step": 138150 }, { "epoch": 1.36, "grad_norm": 3.8423144817352295, "learning_rate": 4.363941930551005e-06, "loss": 0.1937, "step": 138175 }, { "epoch": 1.36, "grad_norm": 18.462509155273438, "learning_rate": 4.363817808096756e-06, "loss": 0.2839, "step": 138200 }, { "epoch": 1.36, "grad_norm": 19.38832664489746, "learning_rate": 4.363693685642508e-06, "loss": 0.2325, "step": 138225 }, { "epoch": 1.36, "grad_norm": 12.441375732421875, "learning_rate": 4.36356956318826e-06, "loss": 0.2551, "step": 138250 }, { "epoch": 1.36, "grad_norm": 6.268680095672607, "learning_rate": 4.363445440734011e-06, "loss": 0.2109, "step": 138275 }, { "epoch": 1.36, "grad_norm": 13.66144847869873, "learning_rate": 4.363321318279762e-06, "loss": 0.1977, "step": 138300 }, { "epoch": 1.36, "grad_norm": 3.166494846343994, "learning_rate": 4.363197195825513e-06, "loss": 0.1623, "step": 138325 }, { "epoch": 1.36, "grad_norm": 17.541221618652344, "learning_rate": 4.363073073371265e-06, "loss": 0.2185, "step": 138350 }, { "epoch": 1.36, "grad_norm": 5.642696380615234, "learning_rate": 4.362948950917017e-06, "loss": 0.2338, "step": 138375 }, { "epoch": 1.36, "grad_norm": 14.9486665725708, "learning_rate": 4.362824828462769e-06, "loss": 0.2348, "step": 138400 }, { "epoch": 1.36, "grad_norm": 4.05312442779541, "learning_rate": 4.36270070600852e-06, "loss": 0.2724, "step": 138425 }, { "epoch": 1.36, "grad_norm": 12.626708030700684, "learning_rate": 4.362576583554272e-06, "loss": 0.227, "step": 138450 }, { "epoch": 1.36, "grad_norm": 5.140681266784668, "learning_rate": 4.362452461100023e-06, "loss": 0.2391, "step": 138475 }, { "epoch": 1.36, "grad_norm": 14.043414115905762, "learning_rate": 4.362328338645775e-06, "loss": 0.1823, "step": 138500 }, { "epoch": 1.36, "grad_norm": 2.8765645027160645, "learning_rate": 4.362204216191526e-06, "loss": 0.2082, "step": 138525 }, { "epoch": 1.36, "grad_norm": 14.02043628692627, "learning_rate": 4.362080093737278e-06, "loss": 0.2401, "step": 138550 }, { "epoch": 1.36, "grad_norm": 0.6256030201911926, "learning_rate": 4.36195597128303e-06, "loss": 0.1928, "step": 138575 }, { "epoch": 1.36, "grad_norm": 12.301492691040039, "learning_rate": 4.361831848828781e-06, "loss": 0.2209, "step": 138600 }, { "epoch": 1.36, "grad_norm": 3.029737710952759, "learning_rate": 4.361707726374532e-06, "loss": 0.2169, "step": 138625 }, { "epoch": 1.36, "grad_norm": 10.256930351257324, "learning_rate": 4.361583603920284e-06, "loss": 0.2065, "step": 138650 }, { "epoch": 1.36, "grad_norm": 4.297562122344971, "learning_rate": 4.361459481466035e-06, "loss": 0.1962, "step": 138675 }, { "epoch": 1.36, "grad_norm": 13.732364654541016, "learning_rate": 4.361335359011787e-06, "loss": 0.1492, "step": 138700 }, { "epoch": 1.36, "grad_norm": 8.697270393371582, "learning_rate": 4.361211236557539e-06, "loss": 0.1944, "step": 138725 }, { "epoch": 1.36, "grad_norm": 11.177449226379395, "learning_rate": 4.36108711410329e-06, "loss": 0.2692, "step": 138750 }, { "epoch": 1.36, "grad_norm": 4.046298503875732, "learning_rate": 4.360962991649041e-06, "loss": 0.2688, "step": 138775 }, { "epoch": 1.36, "grad_norm": 10.320836067199707, "learning_rate": 4.360838869194793e-06, "loss": 0.2419, "step": 138800 }, { "epoch": 1.36, "grad_norm": 5.32994270324707, "learning_rate": 4.360714746740544e-06, "loss": 0.215, "step": 138825 }, { "epoch": 1.37, "grad_norm": 13.018478393554688, "learning_rate": 4.360590624286296e-06, "loss": 0.2899, "step": 138850 }, { "epoch": 1.37, "grad_norm": 7.873075008392334, "learning_rate": 4.360466501832048e-06, "loss": 0.2231, "step": 138875 }, { "epoch": 1.37, "grad_norm": 15.512901306152344, "learning_rate": 4.3603423793778e-06, "loss": 0.2227, "step": 138900 }, { "epoch": 1.37, "grad_norm": 7.1876749992370605, "learning_rate": 4.360218256923551e-06, "loss": 0.2035, "step": 138925 }, { "epoch": 1.37, "grad_norm": 14.107749938964844, "learning_rate": 4.360094134469303e-06, "loss": 0.2382, "step": 138950 }, { "epoch": 1.37, "grad_norm": 1.371035099029541, "learning_rate": 4.359970012015054e-06, "loss": 0.2314, "step": 138975 }, { "epoch": 1.37, "grad_norm": 8.777149200439453, "learning_rate": 4.359845889560805e-06, "loss": 0.2635, "step": 139000 }, { "epoch": 1.37, "grad_norm": 7.616693496704102, "learning_rate": 4.359721767106557e-06, "loss": 0.2197, "step": 139025 }, { "epoch": 1.37, "grad_norm": 14.974286079406738, "learning_rate": 4.359597644652309e-06, "loss": 0.2354, "step": 139050 }, { "epoch": 1.37, "grad_norm": 0.2655041515827179, "learning_rate": 4.35947352219806e-06, "loss": 0.2001, "step": 139075 }, { "epoch": 1.37, "grad_norm": 13.96699333190918, "learning_rate": 4.359349399743812e-06, "loss": 0.2515, "step": 139100 }, { "epoch": 1.37, "grad_norm": 7.392407417297363, "learning_rate": 4.359225277289563e-06, "loss": 0.2315, "step": 139125 }, { "epoch": 1.37, "grad_norm": 12.962549209594727, "learning_rate": 4.359101154835314e-06, "loss": 0.2954, "step": 139150 }, { "epoch": 1.37, "grad_norm": 4.798254013061523, "learning_rate": 4.358977032381066e-06, "loss": 0.2396, "step": 139175 }, { "epoch": 1.37, "grad_norm": 16.545848846435547, "learning_rate": 4.358852909926818e-06, "loss": 0.2479, "step": 139200 }, { "epoch": 1.37, "grad_norm": 5.83384895324707, "learning_rate": 4.35872878747257e-06, "loss": 0.2401, "step": 139225 }, { "epoch": 1.37, "grad_norm": 21.958330154418945, "learning_rate": 4.358604665018321e-06, "loss": 0.2235, "step": 139250 }, { "epoch": 1.37, "grad_norm": 1.6733479499816895, "learning_rate": 4.358480542564073e-06, "loss": 0.2108, "step": 139275 }, { "epoch": 1.37, "grad_norm": 14.370040893554688, "learning_rate": 4.358356420109824e-06, "loss": 0.2497, "step": 139300 }, { "epoch": 1.37, "grad_norm": 3.6894173622131348, "learning_rate": 4.358232297655575e-06, "loss": 0.1856, "step": 139325 }, { "epoch": 1.37, "grad_norm": 8.955307006835938, "learning_rate": 4.358108175201327e-06, "loss": 0.2594, "step": 139350 }, { "epoch": 1.37, "grad_norm": 4.93232536315918, "learning_rate": 4.357984052747079e-06, "loss": 0.2053, "step": 139375 }, { "epoch": 1.37, "grad_norm": 11.3510103225708, "learning_rate": 4.35785993029283e-06, "loss": 0.2554, "step": 139400 }, { "epoch": 1.37, "grad_norm": 0.6710118055343628, "learning_rate": 4.357735807838582e-06, "loss": 0.227, "step": 139425 }, { "epoch": 1.37, "grad_norm": 16.83525276184082, "learning_rate": 4.357611685384333e-06, "loss": 0.283, "step": 139450 }, { "epoch": 1.37, "grad_norm": 4.395604610443115, "learning_rate": 4.357487562930084e-06, "loss": 0.2166, "step": 139475 }, { "epoch": 1.37, "grad_norm": 12.131575584411621, "learning_rate": 4.357363440475836e-06, "loss": 0.2604, "step": 139500 }, { "epoch": 1.37, "grad_norm": 4.511504173278809, "learning_rate": 4.3572393180215876e-06, "loss": 0.2266, "step": 139525 }, { "epoch": 1.37, "grad_norm": 11.738913536071777, "learning_rate": 4.357115195567339e-06, "loss": 0.2696, "step": 139550 }, { "epoch": 1.37, "grad_norm": 11.440153121948242, "learning_rate": 4.356991073113091e-06, "loss": 0.2283, "step": 139575 }, { "epoch": 1.37, "grad_norm": 17.581768035888672, "learning_rate": 4.356866950658842e-06, "loss": 0.2539, "step": 139600 }, { "epoch": 1.37, "grad_norm": 6.399583339691162, "learning_rate": 4.356747793102764e-06, "loss": 0.2275, "step": 139625 }, { "epoch": 1.37, "grad_norm": 16.528160095214844, "learning_rate": 4.356623670648515e-06, "loss": 0.2832, "step": 139650 }, { "epoch": 1.37, "grad_norm": 8.172945976257324, "learning_rate": 4.356499548194267e-06, "loss": 0.1882, "step": 139675 }, { "epoch": 1.37, "grad_norm": 14.86141586303711, "learning_rate": 4.356375425740018e-06, "loss": 0.2234, "step": 139700 }, { "epoch": 1.37, "grad_norm": 2.8841400146484375, "learning_rate": 4.3562513032857705e-06, "loss": 0.1943, "step": 139725 }, { "epoch": 1.37, "grad_norm": 14.837106704711914, "learning_rate": 4.356127180831522e-06, "loss": 0.2283, "step": 139750 }, { "epoch": 1.37, "grad_norm": 5.343163013458252, "learning_rate": 4.356003058377273e-06, "loss": 0.2094, "step": 139775 }, { "epoch": 1.37, "grad_norm": 16.066457748413086, "learning_rate": 4.355878935923025e-06, "loss": 0.2547, "step": 139800 }, { "epoch": 1.37, "grad_norm": 5.871826171875, "learning_rate": 4.355754813468776e-06, "loss": 0.2573, "step": 139825 }, { "epoch": 1.38, "grad_norm": 16.0415096282959, "learning_rate": 4.355630691014527e-06, "loss": 0.2333, "step": 139850 }, { "epoch": 1.38, "grad_norm": 2.2067644596099854, "learning_rate": 4.3555065685602794e-06, "loss": 0.2133, "step": 139875 }, { "epoch": 1.38, "grad_norm": 19.23496437072754, "learning_rate": 4.355382446106031e-06, "loss": 0.238, "step": 139900 }, { "epoch": 1.38, "grad_norm": 4.125644683837891, "learning_rate": 4.355258323651782e-06, "loss": 0.2255, "step": 139925 }, { "epoch": 1.38, "grad_norm": 16.152536392211914, "learning_rate": 4.355134201197534e-06, "loss": 0.2415, "step": 139950 }, { "epoch": 1.38, "grad_norm": 6.6542253494262695, "learning_rate": 4.355010078743285e-06, "loss": 0.2251, "step": 139975 }, { "epoch": 1.38, "grad_norm": 10.71972370147705, "learning_rate": 4.354885956289036e-06, "loss": 0.227, "step": 140000 }, { "epoch": 1.38, "eval_loss": 0.5758389234542847, "eval_runtime": 6059.1277, "eval_samples_per_second": 1.562, "eval_steps_per_second": 0.195, "eval_wer": 0.13226678684091933, "step": 140000 }, { "epoch": 1.38, "grad_norm": 8.102038383483887, "learning_rate": 4.354761833834788e-06, "loss": 0.1962, "step": 140025 }, { "epoch": 1.38, "grad_norm": 14.48742961883545, "learning_rate": 4.35463771138054e-06, "loss": 0.2319, "step": 140050 }, { "epoch": 1.38, "grad_norm": 8.061699867248535, "learning_rate": 4.354513588926292e-06, "loss": 0.2196, "step": 140075 }, { "epoch": 1.38, "grad_norm": 20.502670288085938, "learning_rate": 4.354389466472043e-06, "loss": 0.217, "step": 140100 }, { "epoch": 1.38, "grad_norm": 4.423060417175293, "learning_rate": 4.354265344017795e-06, "loss": 0.2109, "step": 140125 }, { "epoch": 1.38, "grad_norm": 18.2462215423584, "learning_rate": 4.354141221563546e-06, "loss": 0.233, "step": 140150 }, { "epoch": 1.38, "grad_norm": 6.366488456726074, "learning_rate": 4.354017099109298e-06, "loss": 0.2071, "step": 140175 }, { "epoch": 1.38, "grad_norm": 11.992898941040039, "learning_rate": 4.353892976655049e-06, "loss": 0.1985, "step": 140200 }, { "epoch": 1.38, "grad_norm": 9.359869956970215, "learning_rate": 4.353768854200801e-06, "loss": 0.2165, "step": 140225 }, { "epoch": 1.38, "grad_norm": 12.768844604492188, "learning_rate": 4.353644731746553e-06, "loss": 0.258, "step": 140250 }, { "epoch": 1.38, "grad_norm": 7.198148727416992, "learning_rate": 4.353520609292304e-06, "loss": 0.2206, "step": 140275 }, { "epoch": 1.38, "grad_norm": 21.543703079223633, "learning_rate": 4.353396486838055e-06, "loss": 0.284, "step": 140300 }, { "epoch": 1.38, "grad_norm": 6.036532402038574, "learning_rate": 4.353272364383807e-06, "loss": 0.2486, "step": 140325 }, { "epoch": 1.38, "grad_norm": 19.647117614746094, "learning_rate": 4.353148241929558e-06, "loss": 0.2286, "step": 140350 }, { "epoch": 1.38, "grad_norm": 10.27474594116211, "learning_rate": 4.35302411947531e-06, "loss": 0.1992, "step": 140375 }, { "epoch": 1.38, "grad_norm": 12.88915729522705, "learning_rate": 4.352899997021062e-06, "loss": 0.2892, "step": 140400 }, { "epoch": 1.38, "grad_norm": 9.977450370788574, "learning_rate": 4.352775874566813e-06, "loss": 0.2382, "step": 140425 }, { "epoch": 1.38, "grad_norm": 13.189664840698242, "learning_rate": 4.352651752112564e-06, "loss": 0.2469, "step": 140450 }, { "epoch": 1.38, "grad_norm": 0.14499250054359436, "learning_rate": 4.352527629658316e-06, "loss": 0.2305, "step": 140475 }, { "epoch": 1.38, "grad_norm": 16.69020652770996, "learning_rate": 4.352403507204067e-06, "loss": 0.2478, "step": 140500 }, { "epoch": 1.38, "grad_norm": 6.607022762298584, "learning_rate": 4.352279384749819e-06, "loss": 0.2217, "step": 140525 }, { "epoch": 1.38, "grad_norm": 12.532285690307617, "learning_rate": 4.352155262295571e-06, "loss": 0.1851, "step": 140550 }, { "epoch": 1.38, "grad_norm": 6.980555057525635, "learning_rate": 4.352031139841323e-06, "loss": 0.2164, "step": 140575 }, { "epoch": 1.38, "grad_norm": 19.98702621459961, "learning_rate": 4.351907017387074e-06, "loss": 0.2059, "step": 140600 }, { "epoch": 1.38, "grad_norm": 3.2996768951416016, "learning_rate": 4.351782894932825e-06, "loss": 0.2265, "step": 140625 }, { "epoch": 1.38, "grad_norm": 18.995319366455078, "learning_rate": 4.351658772478577e-06, "loss": 0.3014, "step": 140650 }, { "epoch": 1.38, "grad_norm": 6.19497013092041, "learning_rate": 4.351534650024328e-06, "loss": 0.2099, "step": 140675 }, { "epoch": 1.38, "grad_norm": 21.65911102294922, "learning_rate": 4.3514105275700796e-06, "loss": 0.2608, "step": 140700 }, { "epoch": 1.38, "grad_norm": 6.9946136474609375, "learning_rate": 4.351286405115832e-06, "loss": 0.1763, "step": 140725 }, { "epoch": 1.38, "grad_norm": 11.35628890991211, "learning_rate": 4.351162282661583e-06, "loss": 0.2484, "step": 140750 }, { "epoch": 1.38, "grad_norm": 5.923892498016357, "learning_rate": 4.351038160207334e-06, "loss": 0.2291, "step": 140775 }, { "epoch": 1.38, "grad_norm": 10.855462074279785, "learning_rate": 4.350914037753086e-06, "loss": 0.3004, "step": 140800 }, { "epoch": 1.38, "grad_norm": 1.8798078298568726, "learning_rate": 4.350789915298837e-06, "loss": 0.1944, "step": 140825 }, { "epoch": 1.38, "grad_norm": 12.744999885559082, "learning_rate": 4.3506657928445885e-06, "loss": 0.2546, "step": 140850 }, { "epoch": 1.39, "grad_norm": 6.449207305908203, "learning_rate": 4.350541670390341e-06, "loss": 0.2591, "step": 140875 }, { "epoch": 1.39, "grad_norm": 16.693750381469727, "learning_rate": 4.350417547936092e-06, "loss": 0.2343, "step": 140900 }, { "epoch": 1.39, "grad_norm": 3.9859812259674072, "learning_rate": 4.350293425481844e-06, "loss": 0.1973, "step": 140925 }, { "epoch": 1.39, "grad_norm": 18.262773513793945, "learning_rate": 4.350169303027595e-06, "loss": 0.2174, "step": 140950 }, { "epoch": 1.39, "grad_norm": 4.4140143394470215, "learning_rate": 4.350045180573347e-06, "loss": 0.2458, "step": 140975 }, { "epoch": 1.39, "grad_norm": 16.87392807006836, "learning_rate": 4.349921058119098e-06, "loss": 0.2533, "step": 141000 }, { "epoch": 1.39, "grad_norm": 9.172922134399414, "learning_rate": 4.34979693566485e-06, "loss": 0.2066, "step": 141025 }, { "epoch": 1.39, "grad_norm": 11.211629867553711, "learning_rate": 4.349672813210602e-06, "loss": 0.1942, "step": 141050 }, { "epoch": 1.39, "grad_norm": 6.461820602416992, "learning_rate": 4.349548690756353e-06, "loss": 0.2014, "step": 141075 }, { "epoch": 1.39, "grad_norm": 9.480567932128906, "learning_rate": 4.349424568302105e-06, "loss": 0.2419, "step": 141100 }, { "epoch": 1.39, "grad_norm": 5.883343696594238, "learning_rate": 4.349300445847856e-06, "loss": 0.1972, "step": 141125 }, { "epoch": 1.39, "grad_norm": 11.07579517364502, "learning_rate": 4.349176323393607e-06, "loss": 0.1928, "step": 141150 }, { "epoch": 1.39, "grad_norm": 2.3828952312469482, "learning_rate": 4.349052200939359e-06, "loss": 0.2439, "step": 141175 }, { "epoch": 1.39, "grad_norm": 23.874292373657227, "learning_rate": 4.3489280784851106e-06, "loss": 0.2608, "step": 141200 }, { "epoch": 1.39, "grad_norm": 5.581405162811279, "learning_rate": 4.348803956030862e-06, "loss": 0.1966, "step": 141225 }, { "epoch": 1.39, "grad_norm": 20.07847023010254, "learning_rate": 4.348679833576614e-06, "loss": 0.2636, "step": 141250 }, { "epoch": 1.39, "grad_norm": 3.8860812187194824, "learning_rate": 4.348555711122365e-06, "loss": 0.196, "step": 141275 }, { "epoch": 1.39, "grad_norm": 13.918455123901367, "learning_rate": 4.348431588668116e-06, "loss": 0.2638, "step": 141300 }, { "epoch": 1.39, "grad_norm": 4.96771764755249, "learning_rate": 4.348307466213868e-06, "loss": 0.2423, "step": 141325 }, { "epoch": 1.39, "grad_norm": 17.87027931213379, "learning_rate": 4.3481833437596195e-06, "loss": 0.2669, "step": 141350 }, { "epoch": 1.39, "grad_norm": 4.561116695404053, "learning_rate": 4.348059221305372e-06, "loss": 0.227, "step": 141375 }, { "epoch": 1.39, "grad_norm": 17.255111694335938, "learning_rate": 4.347935098851123e-06, "loss": 0.254, "step": 141400 }, { "epoch": 1.39, "grad_norm": 6.647614002227783, "learning_rate": 4.347810976396875e-06, "loss": 0.2044, "step": 141425 }, { "epoch": 1.39, "grad_norm": 5.962655544281006, "learning_rate": 4.347686853942626e-06, "loss": 0.185, "step": 141450 }, { "epoch": 1.39, "grad_norm": 11.69825553894043, "learning_rate": 4.347562731488377e-06, "loss": 0.205, "step": 141475 }, { "epoch": 1.39, "grad_norm": 10.43247127532959, "learning_rate": 4.347438609034129e-06, "loss": 0.2428, "step": 141500 }, { "epoch": 1.39, "grad_norm": 7.0827155113220215, "learning_rate": 4.3473144865798806e-06, "loss": 0.2248, "step": 141525 }, { "epoch": 1.39, "grad_norm": 8.6065092086792, "learning_rate": 4.347190364125632e-06, "loss": 0.261, "step": 141550 }, { "epoch": 1.39, "grad_norm": 7.777624607086182, "learning_rate": 4.347066241671384e-06, "loss": 0.2285, "step": 141575 }, { "epoch": 1.39, "grad_norm": 11.015341758728027, "learning_rate": 4.346942119217135e-06, "loss": 0.2619, "step": 141600 }, { "epoch": 1.39, "grad_norm": 7.840792655944824, "learning_rate": 4.346817996762886e-06, "loss": 0.2044, "step": 141625 }, { "epoch": 1.39, "grad_norm": 14.604777336120605, "learning_rate": 4.346693874308638e-06, "loss": 0.2425, "step": 141650 }, { "epoch": 1.39, "grad_norm": 7.4383544921875, "learning_rate": 4.3465697518543895e-06, "loss": 0.2821, "step": 141675 }, { "epoch": 1.39, "grad_norm": 10.69802474975586, "learning_rate": 4.346445629400141e-06, "loss": 0.2215, "step": 141700 }, { "epoch": 1.39, "grad_norm": 4.215821743011475, "learning_rate": 4.346321506945893e-06, "loss": 0.2208, "step": 141725 }, { "epoch": 1.39, "grad_norm": 11.624191284179688, "learning_rate": 4.346197384491644e-06, "loss": 0.2589, "step": 141750 }, { "epoch": 1.39, "grad_norm": 8.461258888244629, "learning_rate": 4.346073262037396e-06, "loss": 0.2789, "step": 141775 }, { "epoch": 1.39, "grad_norm": 16.915040969848633, "learning_rate": 4.345949139583147e-06, "loss": 0.2296, "step": 141800 }, { "epoch": 1.39, "grad_norm": 3.332216739654541, "learning_rate": 4.345829982027069e-06, "loss": 0.2674, "step": 141825 }, { "epoch": 1.39, "grad_norm": 12.39121150970459, "learning_rate": 4.34570585957282e-06, "loss": 0.2495, "step": 141850 }, { "epoch": 1.39, "grad_norm": 10.013053894042969, "learning_rate": 4.345581737118572e-06, "loss": 0.2167, "step": 141875 }, { "epoch": 1.4, "grad_norm": 13.55473518371582, "learning_rate": 4.345457614664324e-06, "loss": 0.2482, "step": 141900 }, { "epoch": 1.4, "grad_norm": 5.366178035736084, "learning_rate": 4.345333492210076e-06, "loss": 0.2319, "step": 141925 }, { "epoch": 1.4, "grad_norm": 16.212934494018555, "learning_rate": 4.345209369755827e-06, "loss": 0.2257, "step": 141950 }, { "epoch": 1.4, "grad_norm": 4.35817289352417, "learning_rate": 4.345085247301578e-06, "loss": 0.2569, "step": 141975 }, { "epoch": 1.4, "grad_norm": 11.948685646057129, "learning_rate": 4.34496112484733e-06, "loss": 0.2155, "step": 142000 }, { "epoch": 1.4, "grad_norm": 3.7735447883605957, "learning_rate": 4.344837002393081e-06, "loss": 0.2496, "step": 142025 }, { "epoch": 1.4, "grad_norm": 17.07169532775879, "learning_rate": 4.344712879938833e-06, "loss": 0.2568, "step": 142050 }, { "epoch": 1.4, "grad_norm": 9.099164009094238, "learning_rate": 4.344588757484584e-06, "loss": 0.1743, "step": 142075 }, { "epoch": 1.4, "grad_norm": 15.492310523986816, "learning_rate": 4.344464635030336e-06, "loss": 0.2807, "step": 142100 }, { "epoch": 1.4, "grad_norm": 8.316987991333008, "learning_rate": 4.344340512576087e-06, "loss": 0.1945, "step": 142125 }, { "epoch": 1.4, "grad_norm": 17.074426651000977, "learning_rate": 4.344216390121839e-06, "loss": 0.2337, "step": 142150 }, { "epoch": 1.4, "grad_norm": 3.2330305576324463, "learning_rate": 4.34409226766759e-06, "loss": 0.2174, "step": 142175 }, { "epoch": 1.4, "grad_norm": 10.485437393188477, "learning_rate": 4.343968145213342e-06, "loss": 0.2376, "step": 142200 }, { "epoch": 1.4, "grad_norm": 7.547940731048584, "learning_rate": 4.343844022759094e-06, "loss": 0.224, "step": 142225 }, { "epoch": 1.4, "grad_norm": 12.909192085266113, "learning_rate": 4.343719900304846e-06, "loss": 0.2761, "step": 142250 }, { "epoch": 1.4, "grad_norm": 2.8257715702056885, "learning_rate": 4.343595777850597e-06, "loss": 0.2871, "step": 142275 }, { "epoch": 1.4, "grad_norm": 9.257452011108398, "learning_rate": 4.343471655396348e-06, "loss": 0.2372, "step": 142300 }, { "epoch": 1.4, "grad_norm": 12.29336929321289, "learning_rate": 4.3433475329421e-06, "loss": 0.2334, "step": 142325 }, { "epoch": 1.4, "grad_norm": 11.291097640991211, "learning_rate": 4.343223410487851e-06, "loss": 0.2697, "step": 142350 }, { "epoch": 1.4, "grad_norm": 4.964902877807617, "learning_rate": 4.3430992880336026e-06, "loss": 0.2229, "step": 142375 }, { "epoch": 1.4, "grad_norm": 14.87120246887207, "learning_rate": 4.342975165579355e-06, "loss": 0.2373, "step": 142400 }, { "epoch": 1.4, "grad_norm": 4.957470893859863, "learning_rate": 4.342851043125106e-06, "loss": 0.2484, "step": 142425 }, { "epoch": 1.4, "grad_norm": 17.50591468811035, "learning_rate": 4.342726920670857e-06, "loss": 0.2434, "step": 142450 }, { "epoch": 1.4, "grad_norm": 8.199772834777832, "learning_rate": 4.342602798216609e-06, "loss": 0.2443, "step": 142475 }, { "epoch": 1.4, "grad_norm": 16.167264938354492, "learning_rate": 4.34247867576236e-06, "loss": 0.2423, "step": 142500 }, { "epoch": 1.4, "grad_norm": 1.6324162483215332, "learning_rate": 4.3423545533081115e-06, "loss": 0.2136, "step": 142525 }, { "epoch": 1.4, "grad_norm": 9.627168655395508, "learning_rate": 4.342230430853864e-06, "loss": 0.2472, "step": 142550 }, { "epoch": 1.4, "grad_norm": 2.6758334636688232, "learning_rate": 4.342106308399615e-06, "loss": 0.2441, "step": 142575 }, { "epoch": 1.4, "grad_norm": 17.67582893371582, "learning_rate": 4.341982185945367e-06, "loss": 0.2607, "step": 142600 }, { "epoch": 1.4, "grad_norm": 9.12712287902832, "learning_rate": 4.341858063491118e-06, "loss": 0.2197, "step": 142625 }, { "epoch": 1.4, "grad_norm": 18.905519485473633, "learning_rate": 4.34173394103687e-06, "loss": 0.2498, "step": 142650 }, { "epoch": 1.4, "grad_norm": 3.754486083984375, "learning_rate": 4.341609818582621e-06, "loss": 0.2832, "step": 142675 }, { "epoch": 1.4, "grad_norm": 20.14773941040039, "learning_rate": 4.341485696128373e-06, "loss": 0.241, "step": 142700 }, { "epoch": 1.4, "grad_norm": 6.508164405822754, "learning_rate": 4.341361573674125e-06, "loss": 0.216, "step": 142725 }, { "epoch": 1.4, "grad_norm": 15.22961711883545, "learning_rate": 4.341237451219876e-06, "loss": 0.247, "step": 142750 }, { "epoch": 1.4, "grad_norm": 7.914768695831299, "learning_rate": 4.341113328765628e-06, "loss": 0.2081, "step": 142775 }, { "epoch": 1.4, "grad_norm": 12.37458610534668, "learning_rate": 4.340989206311379e-06, "loss": 0.2283, "step": 142800 }, { "epoch": 1.4, "grad_norm": 5.07891321182251, "learning_rate": 4.34086508385713e-06, "loss": 0.2037, "step": 142825 }, { "epoch": 1.4, "grad_norm": 11.805991172790527, "learning_rate": 4.340740961402882e-06, "loss": 0.2853, "step": 142850 }, { "epoch": 1.4, "grad_norm": 12.848814964294434, "learning_rate": 4.340616838948634e-06, "loss": 0.2315, "step": 142875 }, { "epoch": 1.41, "grad_norm": 23.069013595581055, "learning_rate": 4.340492716494385e-06, "loss": 0.2381, "step": 142900 }, { "epoch": 1.41, "grad_norm": 0.34509843587875366, "learning_rate": 4.340368594040136e-06, "loss": 0.2147, "step": 142925 }, { "epoch": 1.41, "grad_norm": 15.154476165771484, "learning_rate": 4.340244471585888e-06, "loss": 0.247, "step": 142950 }, { "epoch": 1.41, "grad_norm": 8.410543441772461, "learning_rate": 4.340120349131639e-06, "loss": 0.2516, "step": 142975 }, { "epoch": 1.41, "grad_norm": 18.036022186279297, "learning_rate": 4.339996226677391e-06, "loss": 0.2528, "step": 143000 }, { "epoch": 1.41, "grad_norm": 5.310205459594727, "learning_rate": 4.3398721042231425e-06, "loss": 0.2141, "step": 143025 }, { "epoch": 1.41, "grad_norm": 6.143832683563232, "learning_rate": 4.339747981768895e-06, "loss": 0.2019, "step": 143050 }, { "epoch": 1.41, "grad_norm": 1.990419626235962, "learning_rate": 4.339623859314646e-06, "loss": 0.2268, "step": 143075 }, { "epoch": 1.41, "grad_norm": 16.8817081451416, "learning_rate": 4.339499736860398e-06, "loss": 0.21, "step": 143100 }, { "epoch": 1.41, "grad_norm": 5.573775768280029, "learning_rate": 4.339375614406149e-06, "loss": 0.2083, "step": 143125 }, { "epoch": 1.41, "grad_norm": 11.309297561645508, "learning_rate": 4.3392514919519e-06, "loss": 0.2293, "step": 143150 }, { "epoch": 1.41, "grad_norm": 7.178645610809326, "learning_rate": 4.339127369497652e-06, "loss": 0.2404, "step": 143175 }, { "epoch": 1.41, "grad_norm": 16.89045524597168, "learning_rate": 4.3390032470434036e-06, "loss": 0.2563, "step": 143200 }, { "epoch": 1.41, "grad_norm": 4.631725311279297, "learning_rate": 4.338879124589155e-06, "loss": 0.1698, "step": 143225 }, { "epoch": 1.41, "grad_norm": 14.906959533691406, "learning_rate": 4.338755002134907e-06, "loss": 0.2503, "step": 143250 }, { "epoch": 1.41, "grad_norm": 5.171718597412109, "learning_rate": 4.338630879680658e-06, "loss": 0.227, "step": 143275 }, { "epoch": 1.41, "grad_norm": 15.036201477050781, "learning_rate": 4.338506757226409e-06, "loss": 0.2631, "step": 143300 }, { "epoch": 1.41, "grad_norm": 4.804661273956299, "learning_rate": 4.338382634772161e-06, "loss": 0.2035, "step": 143325 }, { "epoch": 1.41, "grad_norm": 16.029735565185547, "learning_rate": 4.3382585123179125e-06, "loss": 0.1985, "step": 143350 }, { "epoch": 1.41, "grad_norm": 3.485459804534912, "learning_rate": 4.338134389863664e-06, "loss": 0.2131, "step": 143375 }, { "epoch": 1.41, "grad_norm": 11.557448387145996, "learning_rate": 4.338010267409416e-06, "loss": 0.2484, "step": 143400 }, { "epoch": 1.41, "grad_norm": 4.931928634643555, "learning_rate": 4.337886144955167e-06, "loss": 0.1938, "step": 143425 }, { "epoch": 1.41, "grad_norm": 14.392374038696289, "learning_rate": 4.337762022500919e-06, "loss": 0.2438, "step": 143450 }, { "epoch": 1.41, "grad_norm": 6.633610725402832, "learning_rate": 4.33763790004667e-06, "loss": 0.2183, "step": 143475 }, { "epoch": 1.41, "grad_norm": 14.58227252960205, "learning_rate": 4.337513777592422e-06, "loss": 0.2604, "step": 143500 }, { "epoch": 1.41, "grad_norm": 3.637272834777832, "learning_rate": 4.3373896551381735e-06, "loss": 0.2629, "step": 143525 }, { "epoch": 1.41, "grad_norm": 12.901302337646484, "learning_rate": 4.337265532683926e-06, "loss": 0.2305, "step": 143550 }, { "epoch": 1.41, "grad_norm": 4.609577178955078, "learning_rate": 4.337141410229677e-06, "loss": 0.1839, "step": 143575 }, { "epoch": 1.41, "grad_norm": 14.543405532836914, "learning_rate": 4.337017287775428e-06, "loss": 0.2803, "step": 143600 }, { "epoch": 1.41, "grad_norm": 6.2489495277404785, "learning_rate": 4.33689316532118e-06, "loss": 0.2181, "step": 143625 }, { "epoch": 1.41, "grad_norm": 12.784219741821289, "learning_rate": 4.336769042866931e-06, "loss": 0.2841, "step": 143650 }, { "epoch": 1.41, "grad_norm": 4.860693454742432, "learning_rate": 4.3366449204126825e-06, "loss": 0.2549, "step": 143675 }, { "epoch": 1.41, "grad_norm": 13.981861114501953, "learning_rate": 4.3365207979584346e-06, "loss": 0.2328, "step": 143700 }, { "epoch": 1.41, "grad_norm": 2.145892858505249, "learning_rate": 4.336396675504186e-06, "loss": 0.2323, "step": 143725 }, { "epoch": 1.41, "grad_norm": 14.058755874633789, "learning_rate": 4.336272553049937e-06, "loss": 0.2182, "step": 143750 }, { "epoch": 1.41, "grad_norm": 6.81107234954834, "learning_rate": 4.336148430595688e-06, "loss": 0.2231, "step": 143775 }, { "epoch": 1.41, "grad_norm": 16.00098419189453, "learning_rate": 4.33602430814144e-06, "loss": 0.2141, "step": 143800 }, { "epoch": 1.41, "grad_norm": 2.6257851123809814, "learning_rate": 4.3359001856871915e-06, "loss": 0.2011, "step": 143825 }, { "epoch": 1.41, "grad_norm": Infinity, "learning_rate": 4.335781028131113e-06, "loss": 0.216, "step": 143850 }, { "epoch": 1.41, "grad_norm": 6.154589653015137, "learning_rate": 4.3356569056768646e-06, "loss": 0.2062, "step": 143875 }, { "epoch": 1.41, "grad_norm": 19.767736434936523, "learning_rate": 4.335532783222617e-06, "loss": 0.2616, "step": 143900 }, { "epoch": 1.42, "grad_norm": 2.9582550525665283, "learning_rate": 4.335408660768368e-06, "loss": 0.2253, "step": 143925 }, { "epoch": 1.42, "grad_norm": 9.85506534576416, "learning_rate": 4.33528453831412e-06, "loss": 0.2278, "step": 143950 }, { "epoch": 1.42, "grad_norm": 7.086476802825928, "learning_rate": 4.335160415859871e-06, "loss": 0.2009, "step": 143975 }, { "epoch": 1.42, "grad_norm": 12.941642761230469, "learning_rate": 4.335036293405623e-06, "loss": 0.2406, "step": 144000 }, { "epoch": 1.42, "grad_norm": 6.668257236480713, "learning_rate": 4.334912170951374e-06, "loss": 0.2161, "step": 144025 }, { "epoch": 1.42, "grad_norm": 11.544346809387207, "learning_rate": 4.334788048497126e-06, "loss": 0.2296, "step": 144050 }, { "epoch": 1.42, "grad_norm": 3.452702045440674, "learning_rate": 4.334663926042878e-06, "loss": 0.2427, "step": 144075 }, { "epoch": 1.42, "grad_norm": 18.572004318237305, "learning_rate": 4.334539803588629e-06, "loss": 0.226, "step": 144100 }, { "epoch": 1.42, "grad_norm": 6.22231912612915, "learning_rate": 4.33441568113438e-06, "loss": 0.2155, "step": 144125 }, { "epoch": 1.42, "grad_norm": 11.64397144317627, "learning_rate": 4.334291558680132e-06, "loss": 0.2401, "step": 144150 }, { "epoch": 1.42, "grad_norm": 5.783189296722412, "learning_rate": 4.334167436225883e-06, "loss": 0.2073, "step": 144175 }, { "epoch": 1.42, "grad_norm": 16.32879066467285, "learning_rate": 4.3340433137716345e-06, "loss": 0.2497, "step": 144200 }, { "epoch": 1.42, "grad_norm": 6.060708045959473, "learning_rate": 4.333919191317387e-06, "loss": 0.2288, "step": 144225 }, { "epoch": 1.42, "grad_norm": 12.81336498260498, "learning_rate": 4.333795068863138e-06, "loss": 0.2758, "step": 144250 }, { "epoch": 1.42, "grad_norm": 4.551025867462158, "learning_rate": 4.333670946408889e-06, "loss": 0.2103, "step": 144275 }, { "epoch": 1.42, "grad_norm": 20.466299057006836, "learning_rate": 4.333546823954641e-06, "loss": 0.2389, "step": 144300 }, { "epoch": 1.42, "grad_norm": 5.169661521911621, "learning_rate": 4.333422701500392e-06, "loss": 0.2459, "step": 144325 }, { "epoch": 1.42, "grad_norm": 10.2069673538208, "learning_rate": 4.333298579046144e-06, "loss": 0.2325, "step": 144350 }, { "epoch": 1.42, "grad_norm": 7.146702766418457, "learning_rate": 4.3331744565918956e-06, "loss": 0.2137, "step": 144375 }, { "epoch": 1.42, "grad_norm": 19.033504486083984, "learning_rate": 4.333050334137648e-06, "loss": 0.248, "step": 144400 }, { "epoch": 1.42, "grad_norm": 5.401582717895508, "learning_rate": 4.332926211683399e-06, "loss": 0.2356, "step": 144425 }, { "epoch": 1.42, "grad_norm": 18.51287841796875, "learning_rate": 4.33280208922915e-06, "loss": 0.2712, "step": 144450 }, { "epoch": 1.42, "grad_norm": 6.267446517944336, "learning_rate": 4.332677966774902e-06, "loss": 0.2199, "step": 144475 }, { "epoch": 1.42, "grad_norm": 16.395965576171875, "learning_rate": 4.332553844320653e-06, "loss": 0.2633, "step": 144500 }, { "epoch": 1.42, "grad_norm": 4.296096324920654, "learning_rate": 4.3324297218664045e-06, "loss": 0.2551, "step": 144525 }, { "epoch": 1.42, "grad_norm": 18.44954490661621, "learning_rate": 4.332305599412157e-06, "loss": 0.2492, "step": 144550 }, { "epoch": 1.42, "grad_norm": 0.13641105592250824, "learning_rate": 4.332181476957908e-06, "loss": 0.1433, "step": 144575 }, { "epoch": 1.42, "grad_norm": 18.42377281188965, "learning_rate": 4.332057354503659e-06, "loss": 0.2696, "step": 144600 }, { "epoch": 1.42, "grad_norm": 1.975969672203064, "learning_rate": 4.331933232049411e-06, "loss": 0.2594, "step": 144625 }, { "epoch": 1.42, "grad_norm": 19.563352584838867, "learning_rate": 4.331809109595162e-06, "loss": 0.234, "step": 144650 }, { "epoch": 1.42, "grad_norm": 4.282959461212158, "learning_rate": 4.3316849871409135e-06, "loss": 0.2141, "step": 144675 }, { "epoch": 1.42, "grad_norm": 14.553921699523926, "learning_rate": 4.3315608646866656e-06, "loss": 0.2735, "step": 144700 }, { "epoch": 1.42, "grad_norm": 0.06845761090517044, "learning_rate": 4.331436742232417e-06, "loss": 0.2403, "step": 144725 }, { "epoch": 1.42, "grad_norm": 20.002538681030273, "learning_rate": 4.331312619778169e-06, "loss": 0.3398, "step": 144750 }, { "epoch": 1.42, "grad_norm": 11.970146179199219, "learning_rate": 4.33118849732392e-06, "loss": 0.2253, "step": 144775 }, { "epoch": 1.42, "grad_norm": 20.42653465270996, "learning_rate": 4.331064374869672e-06, "loss": 0.2571, "step": 144800 }, { "epoch": 1.42, "grad_norm": 12.878397941589355, "learning_rate": 4.330940252415423e-06, "loss": 0.1946, "step": 144825 }, { "epoch": 1.42, "grad_norm": 17.473596572875977, "learning_rate": 4.330816129961175e-06, "loss": 0.236, "step": 144850 }, { "epoch": 1.42, "grad_norm": 5.904590129852295, "learning_rate": 4.3306920075069266e-06, "loss": 0.2063, "step": 144875 }, { "epoch": 1.42, "grad_norm": 8.579952239990234, "learning_rate": 4.330567885052678e-06, "loss": 0.1835, "step": 144900 }, { "epoch": 1.42, "grad_norm": 11.106547355651855, "learning_rate": 4.33044376259843e-06, "loss": 0.1993, "step": 144925 }, { "epoch": 1.43, "grad_norm": 15.159419059753418, "learning_rate": 4.330319640144181e-06, "loss": 0.2023, "step": 144950 }, { "epoch": 1.43, "grad_norm": 5.980818271636963, "learning_rate": 4.330195517689932e-06, "loss": 0.2328, "step": 144975 }, { "epoch": 1.43, "grad_norm": 16.581493377685547, "learning_rate": 4.330071395235684e-06, "loss": 0.2592, "step": 145000 }, { "epoch": 1.43, "grad_norm": 4.130502700805664, "learning_rate": 4.3299472727814355e-06, "loss": 0.2078, "step": 145025 }, { "epoch": 1.43, "grad_norm": 19.472082138061523, "learning_rate": 4.329823150327187e-06, "loss": 0.2333, "step": 145050 }, { "epoch": 1.43, "grad_norm": 7.490078926086426, "learning_rate": 4.329699027872939e-06, "loss": 0.2527, "step": 145075 }, { "epoch": 1.43, "grad_norm": 11.643948554992676, "learning_rate": 4.32957490541869e-06, "loss": 0.2576, "step": 145100 }, { "epoch": 1.43, "grad_norm": 2.6721630096435547, "learning_rate": 4.329450782964442e-06, "loss": 0.2256, "step": 145125 }, { "epoch": 1.43, "grad_norm": 20.4184627532959, "learning_rate": 4.329326660510193e-06, "loss": 0.2595, "step": 145150 }, { "epoch": 1.43, "grad_norm": 5.300870895385742, "learning_rate": 4.329202538055945e-06, "loss": 0.2131, "step": 145175 }, { "epoch": 1.43, "grad_norm": 13.147759437561035, "learning_rate": 4.3290784156016966e-06, "loss": 0.2889, "step": 145200 }, { "epoch": 1.43, "grad_norm": 4.187758445739746, "learning_rate": 4.328954293147448e-06, "loss": 0.2698, "step": 145225 }, { "epoch": 1.43, "grad_norm": 12.103971481323242, "learning_rate": 4.3288301706932e-06, "loss": 0.2215, "step": 145250 }, { "epoch": 1.43, "grad_norm": 0.8769383430480957, "learning_rate": 4.328706048238951e-06, "loss": 0.2237, "step": 145275 }, { "epoch": 1.43, "grad_norm": 14.06377124786377, "learning_rate": 4.328581925784702e-06, "loss": 0.2459, "step": 145300 }, { "epoch": 1.43, "grad_norm": 6.807493209838867, "learning_rate": 4.328457803330454e-06, "loss": 0.2004, "step": 145325 }, { "epoch": 1.43, "grad_norm": 12.208006858825684, "learning_rate": 4.3283336808762055e-06, "loss": 0.2445, "step": 145350 }, { "epoch": 1.43, "grad_norm": 8.421276092529297, "learning_rate": 4.328209558421957e-06, "loss": 0.2094, "step": 145375 }, { "epoch": 1.43, "grad_norm": 14.980817794799805, "learning_rate": 4.328085435967709e-06, "loss": 0.2577, "step": 145400 }, { "epoch": 1.43, "grad_norm": 4.396658897399902, "learning_rate": 4.32796131351346e-06, "loss": 0.2006, "step": 145425 }, { "epoch": 1.43, "grad_norm": 12.210081100463867, "learning_rate": 4.327837191059211e-06, "loss": 0.2751, "step": 145450 }, { "epoch": 1.43, "grad_norm": 4.9264116287231445, "learning_rate": 4.327713068604963e-06, "loss": 0.234, "step": 145475 }, { "epoch": 1.43, "grad_norm": 21.633804321289062, "learning_rate": 4.3275889461507145e-06, "loss": 0.2131, "step": 145500 }, { "epoch": 1.43, "grad_norm": 5.339575290679932, "learning_rate": 4.3274648236964665e-06, "loss": 0.2163, "step": 145525 }, { "epoch": 1.43, "grad_norm": 10.835432052612305, "learning_rate": 4.327340701242218e-06, "loss": 0.2399, "step": 145550 }, { "epoch": 1.43, "grad_norm": 8.591144561767578, "learning_rate": 4.32721657878797e-06, "loss": 0.2413, "step": 145575 }, { "epoch": 1.43, "grad_norm": 11.674116134643555, "learning_rate": 4.327092456333721e-06, "loss": 0.2358, "step": 145600 }, { "epoch": 1.43, "grad_norm": 9.648324012756348, "learning_rate": 4.326968333879473e-06, "loss": 0.2272, "step": 145625 }, { "epoch": 1.43, "grad_norm": 14.08393669128418, "learning_rate": 4.326844211425224e-06, "loss": 0.2238, "step": 145650 }, { "epoch": 1.43, "grad_norm": 7.648350715637207, "learning_rate": 4.3267200889709755e-06, "loss": 0.255, "step": 145675 }, { "epoch": 1.43, "grad_norm": 14.334254264831543, "learning_rate": 4.3265959665167276e-06, "loss": 0.2787, "step": 145700 }, { "epoch": 1.43, "grad_norm": 5.7783894538879395, "learning_rate": 4.326471844062479e-06, "loss": 0.2429, "step": 145725 }, { "epoch": 1.43, "grad_norm": 13.474733352661133, "learning_rate": 4.32634772160823e-06, "loss": 0.2402, "step": 145750 }, { "epoch": 1.43, "grad_norm": 2.48893404006958, "learning_rate": 4.326223599153982e-06, "loss": 0.2202, "step": 145775 }, { "epoch": 1.43, "grad_norm": 14.761602401733398, "learning_rate": 4.326099476699733e-06, "loss": 0.2568, "step": 145800 }, { "epoch": 1.43, "grad_norm": 5.210699558258057, "learning_rate": 4.3259753542454845e-06, "loss": 0.2305, "step": 145825 }, { "epoch": 1.43, "grad_norm": 14.904062271118164, "learning_rate": 4.3258512317912365e-06, "loss": 0.2164, "step": 145850 }, { "epoch": 1.43, "grad_norm": 5.722714424133301, "learning_rate": 4.3257320742351576e-06, "loss": 0.1908, "step": 145875 }, { "epoch": 1.43, "grad_norm": 17.13109016418457, "learning_rate": 4.325607951780909e-06, "loss": 0.2482, "step": 145900 }, { "epoch": 1.43, "grad_norm": 5.297187328338623, "learning_rate": 4.325483829326661e-06, "loss": 0.201, "step": 145925 }, { "epoch": 1.44, "grad_norm": 12.220352172851562, "learning_rate": 4.325359706872412e-06, "loss": 0.2377, "step": 145950 }, { "epoch": 1.44, "grad_norm": 10.494322776794434, "learning_rate": 4.325235584418164e-06, "loss": 0.2102, "step": 145975 }, { "epoch": 1.44, "grad_norm": 9.79920482635498, "learning_rate": 4.325111461963915e-06, "loss": 0.2541, "step": 146000 }, { "epoch": 1.44, "grad_norm": 0.4643121361732483, "learning_rate": 4.324987339509667e-06, "loss": 0.2196, "step": 146025 }, { "epoch": 1.44, "grad_norm": 17.100109100341797, "learning_rate": 4.3248632170554186e-06, "loss": 0.2317, "step": 146050 }, { "epoch": 1.44, "grad_norm": 6.528397083282471, "learning_rate": 4.324739094601171e-06, "loss": 0.202, "step": 146075 }, { "epoch": 1.44, "grad_norm": 15.734901428222656, "learning_rate": 4.324614972146922e-06, "loss": 0.2409, "step": 146100 }, { "epoch": 1.44, "grad_norm": 10.23405933380127, "learning_rate": 4.324490849692673e-06, "loss": 0.1961, "step": 146125 }, { "epoch": 1.44, "grad_norm": 20.294437408447266, "learning_rate": 4.324366727238425e-06, "loss": 0.2634, "step": 146150 }, { "epoch": 1.44, "grad_norm": 4.778573989868164, "learning_rate": 4.324242604784176e-06, "loss": 0.2325, "step": 146175 }, { "epoch": 1.44, "grad_norm": 22.624990463256836, "learning_rate": 4.3241184823299275e-06, "loss": 0.2351, "step": 146200 }, { "epoch": 1.44, "grad_norm": 13.040140151977539, "learning_rate": 4.32399435987568e-06, "loss": 0.2136, "step": 146225 }, { "epoch": 1.44, "grad_norm": 13.119342803955078, "learning_rate": 4.323870237421431e-06, "loss": 0.2729, "step": 146250 }, { "epoch": 1.44, "grad_norm": 4.533980846405029, "learning_rate": 4.323746114967182e-06, "loss": 0.2396, "step": 146275 }, { "epoch": 1.44, "grad_norm": 15.584789276123047, "learning_rate": 4.323621992512934e-06, "loss": 0.3091, "step": 146300 }, { "epoch": 1.44, "grad_norm": 6.468618869781494, "learning_rate": 4.323497870058685e-06, "loss": 0.2, "step": 146325 }, { "epoch": 1.44, "grad_norm": 16.723751068115234, "learning_rate": 4.3233737476044365e-06, "loss": 0.2397, "step": 146350 }, { "epoch": 1.44, "grad_norm": 6.292730331420898, "learning_rate": 4.3232496251501886e-06, "loss": 0.2164, "step": 146375 }, { "epoch": 1.44, "grad_norm": 24.004026412963867, "learning_rate": 4.32312550269594e-06, "loss": 0.2331, "step": 146400 }, { "epoch": 1.44, "grad_norm": 3.069237232208252, "learning_rate": 4.323001380241692e-06, "loss": 0.2123, "step": 146425 }, { "epoch": 1.44, "grad_norm": 14.235736846923828, "learning_rate": 4.322877257787443e-06, "loss": 0.2691, "step": 146450 }, { "epoch": 1.44, "grad_norm": 8.635126113891602, "learning_rate": 4.322753135333195e-06, "loss": 0.25, "step": 146475 }, { "epoch": 1.44, "grad_norm": 13.328489303588867, "learning_rate": 4.322629012878946e-06, "loss": 0.2696, "step": 146500 }, { "epoch": 1.44, "grad_norm": 4.4523491859436035, "learning_rate": 4.322504890424698e-06, "loss": 0.2463, "step": 146525 }, { "epoch": 1.44, "grad_norm": 15.289034843444824, "learning_rate": 4.32238076797045e-06, "loss": 0.2082, "step": 146550 }, { "epoch": 1.44, "grad_norm": 7.674623966217041, "learning_rate": 4.322256645516201e-06, "loss": 0.2013, "step": 146575 }, { "epoch": 1.44, "grad_norm": 12.631522178649902, "learning_rate": 4.322132523061952e-06, "loss": 0.2002, "step": 146600 }, { "epoch": 1.44, "grad_norm": 5.513891220092773, "learning_rate": 4.322008400607704e-06, "loss": 0.2527, "step": 146625 }, { "epoch": 1.44, "grad_norm": 14.658256530761719, "learning_rate": 4.321884278153455e-06, "loss": 0.2108, "step": 146650 }, { "epoch": 1.44, "grad_norm": 6.209190368652344, "learning_rate": 4.3217601556992065e-06, "loss": 0.2398, "step": 146675 }, { "epoch": 1.44, "grad_norm": 7.678194522857666, "learning_rate": 4.3216360332449585e-06, "loss": 0.2275, "step": 146700 }, { "epoch": 1.44, "grad_norm": 3.169947862625122, "learning_rate": 4.32151191079071e-06, "loss": 0.2216, "step": 146725 }, { "epoch": 1.44, "grad_norm": 13.139549255371094, "learning_rate": 4.321387788336461e-06, "loss": 0.261, "step": 146750 }, { "epoch": 1.44, "grad_norm": 4.268974781036377, "learning_rate": 4.321263665882213e-06, "loss": 0.2694, "step": 146775 }, { "epoch": 1.44, "grad_norm": 11.076166152954102, "learning_rate": 4.321139543427964e-06, "loss": 0.2625, "step": 146800 }, { "epoch": 1.44, "grad_norm": 3.5660386085510254, "learning_rate": 4.321015420973716e-06, "loss": 0.2147, "step": 146825 }, { "epoch": 1.44, "grad_norm": 18.799715042114258, "learning_rate": 4.3208912985194675e-06, "loss": 0.2963, "step": 146850 }, { "epoch": 1.44, "grad_norm": 8.715949058532715, "learning_rate": 4.3207671760652196e-06, "loss": 0.1731, "step": 146875 }, { "epoch": 1.44, "grad_norm": 11.686707496643066, "learning_rate": 4.320643053610971e-06, "loss": 0.2742, "step": 146900 }, { "epoch": 1.44, "grad_norm": 25.251895904541016, "learning_rate": 4.320518931156723e-06, "loss": 0.2553, "step": 146925 }, { "epoch": 1.44, "grad_norm": 8.368796348571777, "learning_rate": 4.320394808702474e-06, "loss": 0.2614, "step": 146950 }, { "epoch": 1.45, "grad_norm": 21.028717041015625, "learning_rate": 4.320270686248225e-06, "loss": 0.2924, "step": 146975 }, { "epoch": 1.45, "grad_norm": 13.96383285522461, "learning_rate": 4.320146563793977e-06, "loss": 0.2348, "step": 147000 }, { "epoch": 1.45, "grad_norm": 4.92943000793457, "learning_rate": 4.3200224413397285e-06, "loss": 0.2186, "step": 147025 }, { "epoch": 1.45, "grad_norm": 11.302582740783691, "learning_rate": 4.31989831888548e-06, "loss": 0.2815, "step": 147050 }, { "epoch": 1.45, "grad_norm": 4.37209939956665, "learning_rate": 4.319774196431232e-06, "loss": 0.2376, "step": 147075 }, { "epoch": 1.45, "grad_norm": 11.145710945129395, "learning_rate": 4.319650073976983e-06, "loss": 0.2382, "step": 147100 }, { "epoch": 1.45, "grad_norm": 7.610663890838623, "learning_rate": 4.319525951522734e-06, "loss": 0.2553, "step": 147125 }, { "epoch": 1.45, "grad_norm": 14.637029647827148, "learning_rate": 4.319401829068486e-06, "loss": 0.2132, "step": 147150 }, { "epoch": 1.45, "grad_norm": 4.30596399307251, "learning_rate": 4.3192777066142375e-06, "loss": 0.1781, "step": 147175 }, { "epoch": 1.45, "grad_norm": 8.21519947052002, "learning_rate": 4.319153584159989e-06, "loss": 0.2057, "step": 147200 }, { "epoch": 1.45, "grad_norm": 6.790311813354492, "learning_rate": 4.319029461705741e-06, "loss": 0.2454, "step": 147225 }, { "epoch": 1.45, "grad_norm": 16.14727020263672, "learning_rate": 4.318905339251492e-06, "loss": 0.2347, "step": 147250 }, { "epoch": 1.45, "grad_norm": 3.7276644706726074, "learning_rate": 4.318781216797244e-06, "loss": 0.2677, "step": 147275 }, { "epoch": 1.45, "grad_norm": 18.837072372436523, "learning_rate": 4.318657094342995e-06, "loss": 0.2069, "step": 147300 }, { "epoch": 1.45, "grad_norm": 2.2871453762054443, "learning_rate": 4.318532971888747e-06, "loss": 0.2208, "step": 147325 }, { "epoch": 1.45, "grad_norm": 11.54699420928955, "learning_rate": 4.3184088494344985e-06, "loss": 0.2674, "step": 147350 }, { "epoch": 1.45, "grad_norm": 4.4239702224731445, "learning_rate": 4.3182847269802506e-06, "loss": 0.2349, "step": 147375 }, { "epoch": 1.45, "grad_norm": 15.98571491241455, "learning_rate": 4.318160604526002e-06, "loss": 0.2298, "step": 147400 }, { "epoch": 1.45, "grad_norm": 5.2100138664245605, "learning_rate": 4.318036482071753e-06, "loss": 0.2599, "step": 147425 }, { "epoch": 1.45, "grad_norm": 14.410724639892578, "learning_rate": 4.317912359617504e-06, "loss": 0.2272, "step": 147450 }, { "epoch": 1.45, "grad_norm": 5.752781867980957, "learning_rate": 4.317788237163256e-06, "loss": 0.2367, "step": 147475 }, { "epoch": 1.45, "grad_norm": 15.507495880126953, "learning_rate": 4.3176641147090075e-06, "loss": 0.2383, "step": 147500 }, { "epoch": 1.45, "grad_norm": 7.414737701416016, "learning_rate": 4.317539992254759e-06, "loss": 0.2863, "step": 147525 }, { "epoch": 1.45, "grad_norm": 28.253482818603516, "learning_rate": 4.317415869800511e-06, "loss": 0.257, "step": 147550 }, { "epoch": 1.45, "grad_norm": 4.6133527755737305, "learning_rate": 4.317291747346262e-06, "loss": 0.2422, "step": 147575 }, { "epoch": 1.45, "grad_norm": 11.149394989013672, "learning_rate": 4.317167624892013e-06, "loss": 0.2279, "step": 147600 }, { "epoch": 1.45, "grad_norm": 14.655561447143555, "learning_rate": 4.317043502437765e-06, "loss": 0.2576, "step": 147625 }, { "epoch": 1.45, "grad_norm": 16.01178741455078, "learning_rate": 4.3169193799835164e-06, "loss": 0.2458, "step": 147650 }, { "epoch": 1.45, "grad_norm": 6.866621971130371, "learning_rate": 4.3167952575292685e-06, "loss": 0.208, "step": 147675 }, { "epoch": 1.45, "grad_norm": 8.770730972290039, "learning_rate": 4.31667113507502e-06, "loss": 0.2336, "step": 147700 }, { "epoch": 1.45, "grad_norm": 0.563687264919281, "learning_rate": 4.316547012620772e-06, "loss": 0.2163, "step": 147725 }, { "epoch": 1.45, "grad_norm": 15.022807121276855, "learning_rate": 4.316422890166523e-06, "loss": 0.255, "step": 147750 }, { "epoch": 1.45, "grad_norm": 4.809386730194092, "learning_rate": 4.316298767712275e-06, "loss": 0.2453, "step": 147775 }, { "epoch": 1.45, "grad_norm": 14.71146297454834, "learning_rate": 4.316174645258026e-06, "loss": 0.2766, "step": 147800 }, { "epoch": 1.45, "grad_norm": 8.917072296142578, "learning_rate": 4.3160505228037775e-06, "loss": 0.2381, "step": 147825 }, { "epoch": 1.45, "grad_norm": 16.028610229492188, "learning_rate": 4.3159264003495295e-06, "loss": 0.2255, "step": 147850 }, { "epoch": 1.45, "grad_norm": 6.33590030670166, "learning_rate": 4.315802277895281e-06, "loss": 0.2073, "step": 147875 }, { "epoch": 1.45, "grad_norm": 13.81419849395752, "learning_rate": 4.315678155441032e-06, "loss": 0.2114, "step": 147900 }, { "epoch": 1.45, "grad_norm": 4.389368534088135, "learning_rate": 4.315554032986784e-06, "loss": 0.1947, "step": 147925 }, { "epoch": 1.45, "grad_norm": 9.535700798034668, "learning_rate": 4.315429910532535e-06, "loss": 0.2827, "step": 147950 }, { "epoch": 1.45, "grad_norm": 5.1373419761657715, "learning_rate": 4.315310752976457e-06, "loss": 0.2416, "step": 147975 }, { "epoch": 1.46, "grad_norm": 20.639707565307617, "learning_rate": 4.315186630522208e-06, "loss": 0.1952, "step": 148000 }, { "epoch": 1.46, "grad_norm": 5.243012428283691, "learning_rate": 4.3150625080679595e-06, "loss": 0.1744, "step": 148025 }, { "epoch": 1.46, "grad_norm": 12.821410179138184, "learning_rate": 4.3149383856137116e-06, "loss": 0.2453, "step": 148050 }, { "epoch": 1.46, "grad_norm": 3.1317436695098877, "learning_rate": 4.314814263159463e-06, "loss": 0.1996, "step": 148075 }, { "epoch": 1.46, "grad_norm": 13.760690689086914, "learning_rate": 4.314690140705215e-06, "loss": 0.2603, "step": 148100 }, { "epoch": 1.46, "grad_norm": 1.5734474658966064, "learning_rate": 4.314566018250966e-06, "loss": 0.2042, "step": 148125 }, { "epoch": 1.46, "grad_norm": 15.20046329498291, "learning_rate": 4.314441895796718e-06, "loss": 0.269, "step": 148150 }, { "epoch": 1.46, "grad_norm": 6.508508682250977, "learning_rate": 4.314317773342469e-06, "loss": 0.2359, "step": 148175 }, { "epoch": 1.46, "grad_norm": 8.105216979980469, "learning_rate": 4.3141936508882205e-06, "loss": 0.204, "step": 148200 }, { "epoch": 1.46, "grad_norm": 8.596090316772461, "learning_rate": 4.314069528433973e-06, "loss": 0.2219, "step": 148225 }, { "epoch": 1.46, "grad_norm": 16.58452796936035, "learning_rate": 4.313945405979724e-06, "loss": 0.2482, "step": 148250 }, { "epoch": 1.46, "grad_norm": 5.382399559020996, "learning_rate": 4.313821283525475e-06, "loss": 0.2293, "step": 148275 }, { "epoch": 1.46, "grad_norm": 16.71819305419922, "learning_rate": 4.313697161071227e-06, "loss": 0.25, "step": 148300 }, { "epoch": 1.46, "grad_norm": 7.418943881988525, "learning_rate": 4.313573038616978e-06, "loss": 0.2293, "step": 148325 }, { "epoch": 1.46, "grad_norm": 9.033605575561523, "learning_rate": 4.3134489161627295e-06, "loss": 0.2381, "step": 148350 }, { "epoch": 1.46, "grad_norm": 7.501589298248291, "learning_rate": 4.3133247937084815e-06, "loss": 0.2289, "step": 148375 }, { "epoch": 1.46, "grad_norm": 14.53371810913086, "learning_rate": 4.313200671254233e-06, "loss": 0.2369, "step": 148400 }, { "epoch": 1.46, "grad_norm": 7.261002063751221, "learning_rate": 4.313076548799984e-06, "loss": 0.2363, "step": 148425 }, { "epoch": 1.46, "grad_norm": 16.809539794921875, "learning_rate": 4.312952426345736e-06, "loss": 0.2303, "step": 148450 }, { "epoch": 1.46, "grad_norm": 7.186700344085693, "learning_rate": 4.312828303891487e-06, "loss": 0.2461, "step": 148475 }, { "epoch": 1.46, "grad_norm": 15.690713882446289, "learning_rate": 4.312704181437239e-06, "loss": 0.2614, "step": 148500 }, { "epoch": 1.46, "grad_norm": 3.27734375, "learning_rate": 4.3125800589829905e-06, "loss": 0.3061, "step": 148525 }, { "epoch": 1.46, "grad_norm": 9.572608947753906, "learning_rate": 4.3124559365287426e-06, "loss": 0.2264, "step": 148550 }, { "epoch": 1.46, "grad_norm": 9.507758140563965, "learning_rate": 4.312331814074494e-06, "loss": 0.2208, "step": 148575 }, { "epoch": 1.46, "grad_norm": 13.12308120727539, "learning_rate": 4.312207691620246e-06, "loss": 0.2251, "step": 148600 }, { "epoch": 1.46, "grad_norm": 6.488513946533203, "learning_rate": 4.312083569165997e-06, "loss": 0.2065, "step": 148625 }, { "epoch": 1.46, "grad_norm": 13.133325576782227, "learning_rate": 4.311959446711748e-06, "loss": 0.256, "step": 148650 }, { "epoch": 1.46, "grad_norm": 4.897013187408447, "learning_rate": 4.3118353242575e-06, "loss": 0.2297, "step": 148675 }, { "epoch": 1.46, "grad_norm": 10.46588134765625, "learning_rate": 4.3117112018032515e-06, "loss": 0.2086, "step": 148700 }, { "epoch": 1.46, "grad_norm": 7.19920015335083, "learning_rate": 4.311587079349003e-06, "loss": 0.2105, "step": 148725 }, { "epoch": 1.46, "grad_norm": 17.675548553466797, "learning_rate": 4.311462956894755e-06, "loss": 0.2646, "step": 148750 }, { "epoch": 1.46, "grad_norm": 5.336732387542725, "learning_rate": 4.311338834440506e-06, "loss": 0.2365, "step": 148775 }, { "epoch": 1.46, "grad_norm": 18.458778381347656, "learning_rate": 4.311214711986257e-06, "loss": 0.2313, "step": 148800 }, { "epoch": 1.46, "grad_norm": 6.11721658706665, "learning_rate": 4.311090589532009e-06, "loss": 0.2138, "step": 148825 }, { "epoch": 1.46, "grad_norm": 9.042875289916992, "learning_rate": 4.3109664670777605e-06, "loss": 0.2585, "step": 148850 }, { "epoch": 1.46, "grad_norm": 7.276871681213379, "learning_rate": 4.310842344623512e-06, "loss": 0.2286, "step": 148875 }, { "epoch": 1.46, "grad_norm": 15.389832496643066, "learning_rate": 4.310718222169264e-06, "loss": 0.2578, "step": 148900 }, { "epoch": 1.46, "grad_norm": 4.477808952331543, "learning_rate": 4.310594099715015e-06, "loss": 0.2136, "step": 148925 }, { "epoch": 1.46, "grad_norm": 12.27019214630127, "learning_rate": 4.310469977260767e-06, "loss": 0.2389, "step": 148950 }, { "epoch": 1.46, "grad_norm": 2.7694177627563477, "learning_rate": 4.310345854806518e-06, "loss": 0.247, "step": 148975 }, { "epoch": 1.46, "grad_norm": 12.823637962341309, "learning_rate": 4.31022173235227e-06, "loss": 0.3023, "step": 149000 }, { "epoch": 1.47, "grad_norm": 5.550445079803467, "learning_rate": 4.3100976098980215e-06, "loss": 0.1905, "step": 149025 }, { "epoch": 1.47, "grad_norm": 13.144067764282227, "learning_rate": 4.309973487443773e-06, "loss": 0.2542, "step": 149050 }, { "epoch": 1.47, "grad_norm": 4.4242939949035645, "learning_rate": 4.309849364989525e-06, "loss": 0.2412, "step": 149075 }, { "epoch": 1.47, "grad_norm": 18.49184799194336, "learning_rate": 4.309725242535276e-06, "loss": 0.2384, "step": 149100 }, { "epoch": 1.47, "grad_norm": 4.087723255157471, "learning_rate": 4.309601120081027e-06, "loss": 0.2371, "step": 149125 }, { "epoch": 1.47, "grad_norm": 15.999985694885254, "learning_rate": 4.309476997626779e-06, "loss": 0.2786, "step": 149150 }, { "epoch": 1.47, "grad_norm": 6.089789867401123, "learning_rate": 4.3093528751725305e-06, "loss": 0.1883, "step": 149175 }, { "epoch": 1.47, "grad_norm": 9.61337661743164, "learning_rate": 4.309228752718282e-06, "loss": 0.2555, "step": 149200 }, { "epoch": 1.47, "grad_norm": 7.300487995147705, "learning_rate": 4.309104630264034e-06, "loss": 0.205, "step": 149225 }, { "epoch": 1.47, "grad_norm": 11.639514923095703, "learning_rate": 4.308980507809785e-06, "loss": 0.281, "step": 149250 }, { "epoch": 1.47, "grad_norm": 9.780967712402344, "learning_rate": 4.308856385355536e-06, "loss": 0.2371, "step": 149275 }, { "epoch": 1.47, "grad_norm": 14.638956069946289, "learning_rate": 4.308732262901288e-06, "loss": 0.2487, "step": 149300 }, { "epoch": 1.47, "grad_norm": 3.596817970275879, "learning_rate": 4.3086081404470394e-06, "loss": 0.2474, "step": 149325 }, { "epoch": 1.47, "grad_norm": 16.165916442871094, "learning_rate": 4.3084840179927915e-06, "loss": 0.2201, "step": 149350 }, { "epoch": 1.47, "grad_norm": 6.390298366546631, "learning_rate": 4.308359895538543e-06, "loss": 0.3037, "step": 149375 }, { "epoch": 1.47, "grad_norm": 16.3592586517334, "learning_rate": 4.308235773084295e-06, "loss": 0.2208, "step": 149400 }, { "epoch": 1.47, "grad_norm": 8.195363998413086, "learning_rate": 4.308111650630046e-06, "loss": 0.2317, "step": 149425 }, { "epoch": 1.47, "grad_norm": 13.930521011352539, "learning_rate": 4.307987528175798e-06, "loss": 0.2732, "step": 149450 }, { "epoch": 1.47, "grad_norm": 7.558556079864502, "learning_rate": 4.307863405721549e-06, "loss": 0.2011, "step": 149475 }, { "epoch": 1.47, "grad_norm": 17.021446228027344, "learning_rate": 4.3077392832673005e-06, "loss": 0.2083, "step": 149500 }, { "epoch": 1.47, "grad_norm": 4.810494899749756, "learning_rate": 4.3076151608130525e-06, "loss": 0.185, "step": 149525 }, { "epoch": 1.47, "grad_norm": 15.86127758026123, "learning_rate": 4.307491038358804e-06, "loss": 0.2693, "step": 149550 }, { "epoch": 1.47, "grad_norm": 5.208498954772949, "learning_rate": 4.307366915904555e-06, "loss": 0.2368, "step": 149575 }, { "epoch": 1.47, "grad_norm": 8.569722175598145, "learning_rate": 4.307242793450307e-06, "loss": 0.278, "step": 149600 }, { "epoch": 1.47, "grad_norm": 1.8597924709320068, "learning_rate": 4.307118670996058e-06, "loss": 0.2279, "step": 149625 }, { "epoch": 1.47, "grad_norm": 8.673299789428711, "learning_rate": 4.3069945485418094e-06, "loss": 0.2063, "step": 149650 }, { "epoch": 1.47, "grad_norm": 2.5522918701171875, "learning_rate": 4.3068704260875615e-06, "loss": 0.209, "step": 149675 }, { "epoch": 1.47, "grad_norm": 15.087742805480957, "learning_rate": 4.306746303633313e-06, "loss": 0.2347, "step": 149700 }, { "epoch": 1.47, "grad_norm": 6.9092020988464355, "learning_rate": 4.306622181179064e-06, "loss": 0.215, "step": 149725 }, { "epoch": 1.47, "grad_norm": 24.556461334228516, "learning_rate": 4.306498058724816e-06, "loss": 0.2704, "step": 149750 }, { "epoch": 1.47, "grad_norm": 6.6566925048828125, "learning_rate": 4.306373936270567e-06, "loss": 0.2452, "step": 149775 }, { "epoch": 1.47, "grad_norm": 12.835230827331543, "learning_rate": 4.306249813816319e-06, "loss": 0.2273, "step": 149800 }, { "epoch": 1.47, "grad_norm": 6.378761291503906, "learning_rate": 4.3061256913620704e-06, "loss": 0.2259, "step": 149825 }, { "epoch": 1.47, "grad_norm": 21.32094383239746, "learning_rate": 4.3060015689078225e-06, "loss": 0.2606, "step": 149850 }, { "epoch": 1.47, "grad_norm": 2.8532795906066895, "learning_rate": 4.305877446453574e-06, "loss": 0.2716, "step": 149875 }, { "epoch": 1.47, "grad_norm": 12.55733585357666, "learning_rate": 4.305753323999325e-06, "loss": 0.2294, "step": 149900 }, { "epoch": 1.47, "grad_norm": 4.732760906219482, "learning_rate": 4.305629201545077e-06, "loss": 0.2093, "step": 149925 }, { "epoch": 1.47, "grad_norm": 16.415489196777344, "learning_rate": 4.305505079090828e-06, "loss": 0.2437, "step": 149950 }, { "epoch": 1.47, "grad_norm": 4.656793117523193, "learning_rate": 4.305380956636579e-06, "loss": 0.2228, "step": 149975 }, { "epoch": 1.47, "grad_norm": 11.081451416015625, "learning_rate": 4.3052568341823315e-06, "loss": 0.2493, "step": 150000 }, { "epoch": 1.48, "grad_norm": 5.1185455322265625, "learning_rate": 4.305132711728083e-06, "loss": 0.1931, "step": 150025 }, { "epoch": 1.48, "grad_norm": 15.268503189086914, "learning_rate": 4.305008589273834e-06, "loss": 0.2129, "step": 150050 }, { "epoch": 1.48, "grad_norm": 6.619068145751953, "learning_rate": 4.304884466819586e-06, "loss": 0.2523, "step": 150075 }, { "epoch": 1.48, "grad_norm": 15.516283988952637, "learning_rate": 4.304760344365337e-06, "loss": 0.2244, "step": 150100 }, { "epoch": 1.48, "grad_norm": 4.775047302246094, "learning_rate": 4.304636221911088e-06, "loss": 0.1758, "step": 150125 }, { "epoch": 1.48, "grad_norm": 13.63707447052002, "learning_rate": 4.3045120994568404e-06, "loss": 0.2547, "step": 150150 }, { "epoch": 1.48, "grad_norm": 5.53611946105957, "learning_rate": 4.304387977002592e-06, "loss": 0.2343, "step": 150175 }, { "epoch": 1.48, "grad_norm": 13.518980979919434, "learning_rate": 4.304263854548344e-06, "loss": 0.2525, "step": 150200 }, { "epoch": 1.48, "grad_norm": 3.945152759552002, "learning_rate": 4.304139732094095e-06, "loss": 0.1883, "step": 150225 }, { "epoch": 1.48, "grad_norm": 18.301902770996094, "learning_rate": 4.304015609639847e-06, "loss": 0.2775, "step": 150250 }, { "epoch": 1.48, "grad_norm": 6.683727741241455, "learning_rate": 4.303891487185598e-06, "loss": 0.1928, "step": 150275 }, { "epoch": 1.48, "grad_norm": 18.720542907714844, "learning_rate": 4.30376736473135e-06, "loss": 0.2262, "step": 150300 }, { "epoch": 1.48, "grad_norm": 3.1237308979034424, "learning_rate": 4.3036432422771015e-06, "loss": 0.2395, "step": 150325 }, { "epoch": 1.48, "grad_norm": 12.915644645690918, "learning_rate": 4.303519119822853e-06, "loss": 0.2415, "step": 150350 }, { "epoch": 1.48, "grad_norm": 8.028403282165527, "learning_rate": 4.303394997368605e-06, "loss": 0.2554, "step": 150375 }, { "epoch": 1.48, "grad_norm": 13.797607421875, "learning_rate": 4.303270874914356e-06, "loss": 0.252, "step": 150400 }, { "epoch": 1.48, "grad_norm": 6.110745906829834, "learning_rate": 4.303146752460107e-06, "loss": 0.2191, "step": 150425 }, { "epoch": 1.48, "grad_norm": 13.19736099243164, "learning_rate": 4.303022630005859e-06, "loss": 0.2381, "step": 150450 }, { "epoch": 1.48, "grad_norm": 3.9526526927948, "learning_rate": 4.30289850755161e-06, "loss": 0.2641, "step": 150475 }, { "epoch": 1.48, "grad_norm": 16.481969833374023, "learning_rate": 4.302774385097362e-06, "loss": 0.2564, "step": 150500 }, { "epoch": 1.48, "grad_norm": 11.466379165649414, "learning_rate": 4.302650262643114e-06, "loss": 0.1929, "step": 150525 }, { "epoch": 1.48, "grad_norm": 11.640118598937988, "learning_rate": 4.302526140188865e-06, "loss": 0.2629, "step": 150550 }, { "epoch": 1.48, "grad_norm": 2.8452627658843994, "learning_rate": 4.302402017734616e-06, "loss": 0.2647, "step": 150575 }, { "epoch": 1.48, "grad_norm": 11.485502243041992, "learning_rate": 4.302277895280368e-06, "loss": 0.2731, "step": 150600 }, { "epoch": 1.48, "grad_norm": 8.131970405578613, "learning_rate": 4.302153772826119e-06, "loss": 0.219, "step": 150625 }, { "epoch": 1.48, "grad_norm": 15.146587371826172, "learning_rate": 4.3020296503718714e-06, "loss": 0.2583, "step": 150650 }, { "epoch": 1.48, "grad_norm": 10.163179397583008, "learning_rate": 4.301905527917623e-06, "loss": 0.2267, "step": 150675 }, { "epoch": 1.48, "grad_norm": 25.1287899017334, "learning_rate": 4.301781405463375e-06, "loss": 0.2811, "step": 150700 }, { "epoch": 1.48, "grad_norm": 6.953606605529785, "learning_rate": 4.301657283009126e-06, "loss": 0.2339, "step": 150725 }, { "epoch": 1.48, "grad_norm": 24.643953323364258, "learning_rate": 4.301533160554877e-06, "loss": 0.2549, "step": 150750 }, { "epoch": 1.48, "grad_norm": 8.358301162719727, "learning_rate": 4.301414002998799e-06, "loss": 0.2358, "step": 150775 }, { "epoch": 1.48, "grad_norm": 6.6502556800842285, "learning_rate": 4.30128988054455e-06, "loss": 0.2029, "step": 150800 }, { "epoch": 1.48, "grad_norm": 4.804015159606934, "learning_rate": 4.301165758090302e-06, "loss": 0.2115, "step": 150825 }, { "epoch": 1.48, "grad_norm": 8.890406608581543, "learning_rate": 4.3010416356360535e-06, "loss": 0.2482, "step": 150850 }, { "epoch": 1.48, "grad_norm": 6.587299823760986, "learning_rate": 4.300917513181805e-06, "loss": 0.2305, "step": 150875 }, { "epoch": 1.48, "grad_norm": 14.809059143066406, "learning_rate": 4.300793390727557e-06, "loss": 0.283, "step": 150900 }, { "epoch": 1.48, "grad_norm": 9.423077583312988, "learning_rate": 4.300669268273308e-06, "loss": 0.1907, "step": 150925 }, { "epoch": 1.48, "grad_norm": 16.973196029663086, "learning_rate": 4.300545145819059e-06, "loss": 0.2497, "step": 150950 }, { "epoch": 1.48, "grad_norm": 12.103229522705078, "learning_rate": 4.300421023364811e-06, "loss": 0.246, "step": 150975 }, { "epoch": 1.48, "grad_norm": 30.68379020690918, "learning_rate": 4.3002969009105624e-06, "loss": 0.2503, "step": 151000 }, { "epoch": 1.48, "grad_norm": 7.1373209953308105, "learning_rate": 4.3001727784563145e-06, "loss": 0.2175, "step": 151025 }, { "epoch": 1.49, "grad_norm": 13.232257843017578, "learning_rate": 4.300048656002066e-06, "loss": 0.2554, "step": 151050 }, { "epoch": 1.49, "grad_norm": 2.9455668926239014, "learning_rate": 4.299924533547818e-06, "loss": 0.2386, "step": 151075 }, { "epoch": 1.49, "grad_norm": 15.704965591430664, "learning_rate": 4.299800411093569e-06, "loss": 0.2361, "step": 151100 }, { "epoch": 1.49, "grad_norm": 4.872563362121582, "learning_rate": 4.299676288639321e-06, "loss": 0.3006, "step": 151125 }, { "epoch": 1.49, "grad_norm": 14.806591033935547, "learning_rate": 4.299552166185072e-06, "loss": 0.2333, "step": 151150 }, { "epoch": 1.49, "grad_norm": 5.283291339874268, "learning_rate": 4.2994280437308235e-06, "loss": 0.2194, "step": 151175 }, { "epoch": 1.49, "grad_norm": 14.198362350463867, "learning_rate": 4.299303921276575e-06, "loss": 0.2087, "step": 151200 }, { "epoch": 1.49, "grad_norm": 3.0006394386291504, "learning_rate": 4.299179798822327e-06, "loss": 0.2105, "step": 151225 }, { "epoch": 1.49, "grad_norm": 24.083984375, "learning_rate": 4.299055676368078e-06, "loss": 0.224, "step": 151250 }, { "epoch": 1.49, "grad_norm": 4.064024925231934, "learning_rate": 4.298931553913829e-06, "loss": 0.2572, "step": 151275 }, { "epoch": 1.49, "grad_norm": 31.165754318237305, "learning_rate": 4.298807431459581e-06, "loss": 0.2826, "step": 151300 }, { "epoch": 1.49, "grad_norm": 2.527912139892578, "learning_rate": 4.2986833090053324e-06, "loss": 0.2349, "step": 151325 }, { "epoch": 1.49, "grad_norm": 13.997040748596191, "learning_rate": 4.298559186551084e-06, "loss": 0.2217, "step": 151350 }, { "epoch": 1.49, "grad_norm": 5.17001485824585, "learning_rate": 4.298435064096836e-06, "loss": 0.1804, "step": 151375 }, { "epoch": 1.49, "grad_norm": 13.615949630737305, "learning_rate": 4.298310941642587e-06, "loss": 0.2545, "step": 151400 }, { "epoch": 1.49, "grad_norm": 5.55602502822876, "learning_rate": 4.298186819188339e-06, "loss": 0.1694, "step": 151425 }, { "epoch": 1.49, "grad_norm": 4.638597011566162, "learning_rate": 4.29806269673409e-06, "loss": 0.2163, "step": 151450 }, { "epoch": 1.49, "grad_norm": 9.519047737121582, "learning_rate": 4.297938574279842e-06, "loss": 0.2497, "step": 151475 }, { "epoch": 1.49, "grad_norm": 15.913599967956543, "learning_rate": 4.2978144518255935e-06, "loss": 0.2401, "step": 151500 }, { "epoch": 1.49, "grad_norm": 7.138774394989014, "learning_rate": 4.2976903293713455e-06, "loss": 0.2309, "step": 151525 }, { "epoch": 1.49, "grad_norm": 13.462102890014648, "learning_rate": 4.297566206917097e-06, "loss": 0.2528, "step": 151550 }, { "epoch": 1.49, "grad_norm": 3.630908727645874, "learning_rate": 4.297442084462848e-06, "loss": 0.1986, "step": 151575 }, { "epoch": 1.49, "grad_norm": 16.757516860961914, "learning_rate": 4.2973179620086e-06, "loss": 0.2687, "step": 151600 }, { "epoch": 1.49, "grad_norm": 4.404112339019775, "learning_rate": 4.297193839554351e-06, "loss": 0.2114, "step": 151625 }, { "epoch": 1.49, "grad_norm": 16.109840393066406, "learning_rate": 4.297069717100102e-06, "loss": 0.2988, "step": 151650 }, { "epoch": 1.49, "grad_norm": 2.610111713409424, "learning_rate": 4.2969455946458545e-06, "loss": 0.3174, "step": 151675 }, { "epoch": 1.49, "grad_norm": 16.82986068725586, "learning_rate": 4.296821472191606e-06, "loss": 0.1856, "step": 151700 }, { "epoch": 1.49, "grad_norm": 7.345005035400391, "learning_rate": 4.296697349737357e-06, "loss": 0.1812, "step": 151725 }, { "epoch": 1.49, "grad_norm": 17.730907440185547, "learning_rate": 4.296573227283109e-06, "loss": 0.2809, "step": 151750 }, { "epoch": 1.49, "grad_norm": 8.51539134979248, "learning_rate": 4.29644910482886e-06, "loss": 0.2439, "step": 151775 }, { "epoch": 1.49, "grad_norm": 10.286911010742188, "learning_rate": 4.296324982374611e-06, "loss": 0.2415, "step": 151800 }, { "epoch": 1.49, "grad_norm": 16.344738006591797, "learning_rate": 4.2962008599203634e-06, "loss": 0.2356, "step": 151825 }, { "epoch": 1.49, "grad_norm": 9.200458526611328, "learning_rate": 4.296076737466115e-06, "loss": 0.2251, "step": 151850 }, { "epoch": 1.49, "grad_norm": 4.368118762969971, "learning_rate": 4.295952615011867e-06, "loss": 0.2771, "step": 151875 }, { "epoch": 1.49, "grad_norm": 8.842179298400879, "learning_rate": 4.295828492557618e-06, "loss": 0.2058, "step": 151900 }, { "epoch": 1.49, "grad_norm": 0.5424689054489136, "learning_rate": 4.29570437010337e-06, "loss": 0.2383, "step": 151925 }, { "epoch": 1.49, "grad_norm": 15.434762954711914, "learning_rate": 4.295580247649121e-06, "loss": 0.2032, "step": 151950 }, { "epoch": 1.49, "grad_norm": 2.460139274597168, "learning_rate": 4.295456125194873e-06, "loss": 0.2076, "step": 151975 }, { "epoch": 1.49, "grad_norm": 11.278617858886719, "learning_rate": 4.2953320027406245e-06, "loss": 0.2157, "step": 152000 }, { "epoch": 1.49, "grad_norm": 0.6342712044715881, "learning_rate": 4.295207880286376e-06, "loss": 0.1995, "step": 152025 }, { "epoch": 1.49, "grad_norm": 18.38001823425293, "learning_rate": 4.295083757832127e-06, "loss": 0.2533, "step": 152050 }, { "epoch": 1.5, "grad_norm": 1.9653816223144531, "learning_rate": 4.294959635377879e-06, "loss": 0.2107, "step": 152075 }, { "epoch": 1.5, "grad_norm": 9.120041847229004, "learning_rate": 4.29483551292363e-06, "loss": 0.1988, "step": 152100 }, { "epoch": 1.5, "grad_norm": 11.444635391235352, "learning_rate": 4.294711390469381e-06, "loss": 0.2322, "step": 152125 }, { "epoch": 1.5, "grad_norm": 14.537910461425781, "learning_rate": 4.294587268015133e-06, "loss": 0.237, "step": 152150 }, { "epoch": 1.5, "grad_norm": 6.946614742279053, "learning_rate": 4.294463145560885e-06, "loss": 0.2251, "step": 152175 }, { "epoch": 1.5, "grad_norm": 12.002991676330566, "learning_rate": 4.294339023106636e-06, "loss": 0.2508, "step": 152200 }, { "epoch": 1.5, "grad_norm": 3.177013635635376, "learning_rate": 4.294214900652388e-06, "loss": 0.2463, "step": 152225 }, { "epoch": 1.5, "grad_norm": 18.653854370117188, "learning_rate": 4.294090778198139e-06, "loss": 0.2084, "step": 152250 }, { "epoch": 1.5, "grad_norm": 6.6425371170043945, "learning_rate": 4.293966655743891e-06, "loss": 0.2344, "step": 152275 }, { "epoch": 1.5, "grad_norm": 17.883649826049805, "learning_rate": 4.293842533289642e-06, "loss": 0.2247, "step": 152300 }, { "epoch": 1.5, "grad_norm": 3.7958526611328125, "learning_rate": 4.2937184108353944e-06, "loss": 0.2143, "step": 152325 }, { "epoch": 1.5, "grad_norm": 19.8292236328125, "learning_rate": 4.293594288381146e-06, "loss": 0.2157, "step": 152350 }, { "epoch": 1.5, "grad_norm": 3.149458885192871, "learning_rate": 4.293470165926898e-06, "loss": 0.193, "step": 152375 }, { "epoch": 1.5, "grad_norm": 14.059755325317383, "learning_rate": 4.293346043472649e-06, "loss": 0.287, "step": 152400 }, { "epoch": 1.5, "grad_norm": 11.318999290466309, "learning_rate": 4.2932219210184e-06, "loss": 0.1952, "step": 152425 }, { "epoch": 1.5, "grad_norm": 17.827129364013672, "learning_rate": 4.293097798564152e-06, "loss": 0.2601, "step": 152450 }, { "epoch": 1.5, "grad_norm": 1.250370979309082, "learning_rate": 4.292973676109903e-06, "loss": 0.1867, "step": 152475 }, { "epoch": 1.5, "grad_norm": 13.880295753479004, "learning_rate": 4.292849553655655e-06, "loss": 0.2466, "step": 152500 }, { "epoch": 1.5, "grad_norm": 4.921546459197998, "learning_rate": 4.292725431201407e-06, "loss": 0.2574, "step": 152525 }, { "epoch": 1.5, "grad_norm": 14.01646614074707, "learning_rate": 4.292601308747158e-06, "loss": 0.2679, "step": 152550 }, { "epoch": 1.5, "grad_norm": 4.1957244873046875, "learning_rate": 4.292477186292909e-06, "loss": 0.2386, "step": 152575 }, { "epoch": 1.5, "grad_norm": 23.76744270324707, "learning_rate": 4.292353063838661e-06, "loss": 0.293, "step": 152600 }, { "epoch": 1.5, "grad_norm": 3.9062418937683105, "learning_rate": 4.292228941384412e-06, "loss": 0.2248, "step": 152625 }, { "epoch": 1.5, "grad_norm": 14.980758666992188, "learning_rate": 4.292104818930164e-06, "loss": 0.2513, "step": 152650 }, { "epoch": 1.5, "grad_norm": 2.602530002593994, "learning_rate": 4.291980696475916e-06, "loss": 0.2141, "step": 152675 }, { "epoch": 1.5, "grad_norm": 11.656059265136719, "learning_rate": 4.291856574021667e-06, "loss": 0.2258, "step": 152700 }, { "epoch": 1.5, "grad_norm": 4.581899642944336, "learning_rate": 4.291732451567419e-06, "loss": 0.2011, "step": 152725 }, { "epoch": 1.5, "grad_norm": 15.519526481628418, "learning_rate": 4.29160832911317e-06, "loss": 0.2367, "step": 152750 }, { "epoch": 1.5, "grad_norm": 7.68088960647583, "learning_rate": 4.291489171557092e-06, "loss": 0.2763, "step": 152775 }, { "epoch": 1.5, "grad_norm": 13.301892280578613, "learning_rate": 4.291365049102843e-06, "loss": 0.2572, "step": 152800 }, { "epoch": 1.5, "grad_norm": 7.232580184936523, "learning_rate": 4.291240926648595e-06, "loss": 0.284, "step": 152825 }, { "epoch": 1.5, "grad_norm": 7.187314510345459, "learning_rate": 4.2911168041943465e-06, "loss": 0.1899, "step": 152850 }, { "epoch": 1.5, "grad_norm": 4.699563503265381, "learning_rate": 4.290992681740098e-06, "loss": 0.2309, "step": 152875 }, { "epoch": 1.5, "grad_norm": 15.328055381774902, "learning_rate": 4.29086855928585e-06, "loss": 0.2, "step": 152900 }, { "epoch": 1.5, "grad_norm": 4.883709907531738, "learning_rate": 4.290744436831601e-06, "loss": 0.1654, "step": 152925 }, { "epoch": 1.5, "grad_norm": 15.42871379852295, "learning_rate": 4.290620314377352e-06, "loss": 0.2197, "step": 152950 }, { "epoch": 1.5, "grad_norm": 3.5225162506103516, "learning_rate": 4.290496191923104e-06, "loss": 0.261, "step": 152975 }, { "epoch": 1.5, "grad_norm": 12.132257461547852, "learning_rate": 4.2903720694688554e-06, "loss": 0.2308, "step": 153000 }, { "epoch": 1.5, "grad_norm": 7.690639972686768, "learning_rate": 4.290247947014607e-06, "loss": 0.2331, "step": 153025 }, { "epoch": 1.5, "grad_norm": 13.347126007080078, "learning_rate": 4.290123824560359e-06, "loss": 0.2481, "step": 153050 }, { "epoch": 1.51, "grad_norm": 1.2790546417236328, "learning_rate": 4.28999970210611e-06, "loss": 0.2291, "step": 153075 }, { "epoch": 1.51, "grad_norm": 17.74223518371582, "learning_rate": 4.289875579651861e-06, "loss": 0.2185, "step": 153100 }, { "epoch": 1.51, "grad_norm": 6.25365686416626, "learning_rate": 4.289751457197613e-06, "loss": 0.2409, "step": 153125 }, { "epoch": 1.51, "grad_norm": 13.97240161895752, "learning_rate": 4.289627334743364e-06, "loss": 0.2626, "step": 153150 }, { "epoch": 1.51, "grad_norm": 7.217099666595459, "learning_rate": 4.2895032122891165e-06, "loss": 0.204, "step": 153175 }, { "epoch": 1.51, "grad_norm": 12.167584419250488, "learning_rate": 4.289379089834868e-06, "loss": 0.2114, "step": 153200 }, { "epoch": 1.51, "grad_norm": 9.225696563720703, "learning_rate": 4.28925496738062e-06, "loss": 0.1945, "step": 153225 }, { "epoch": 1.51, "grad_norm": 14.139864921569824, "learning_rate": 4.289130844926371e-06, "loss": 0.2057, "step": 153250 }, { "epoch": 1.51, "grad_norm": 10.070159912109375, "learning_rate": 4.289006722472123e-06, "loss": 0.2112, "step": 153275 }, { "epoch": 1.51, "grad_norm": 10.086607933044434, "learning_rate": 4.288882600017874e-06, "loss": 0.3016, "step": 153300 }, { "epoch": 1.51, "grad_norm": 5.227118968963623, "learning_rate": 4.288758477563625e-06, "loss": 0.2442, "step": 153325 }, { "epoch": 1.51, "grad_norm": 21.57033920288086, "learning_rate": 4.2886343551093775e-06, "loss": 0.2454, "step": 153350 }, { "epoch": 1.51, "grad_norm": 3.2135279178619385, "learning_rate": 4.288510232655129e-06, "loss": 0.1787, "step": 153375 }, { "epoch": 1.51, "grad_norm": 18.208181381225586, "learning_rate": 4.28838611020088e-06, "loss": 0.2395, "step": 153400 }, { "epoch": 1.51, "grad_norm": 0.8704525232315063, "learning_rate": 4.288261987746632e-06, "loss": 0.2182, "step": 153425 }, { "epoch": 1.51, "grad_norm": 12.387584686279297, "learning_rate": 4.288137865292383e-06, "loss": 0.2621, "step": 153450 }, { "epoch": 1.51, "grad_norm": 5.390487194061279, "learning_rate": 4.288013742838134e-06, "loss": 0.2246, "step": 153475 }, { "epoch": 1.51, "grad_norm": 19.275583267211914, "learning_rate": 4.287889620383886e-06, "loss": 0.2146, "step": 153500 }, { "epoch": 1.51, "grad_norm": 3.8705410957336426, "learning_rate": 4.287765497929638e-06, "loss": 0.2491, "step": 153525 }, { "epoch": 1.51, "grad_norm": 18.717336654663086, "learning_rate": 4.287641375475389e-06, "loss": 0.2404, "step": 153550 }, { "epoch": 1.51, "grad_norm": 4.537400245666504, "learning_rate": 4.287517253021141e-06, "loss": 0.2337, "step": 153575 }, { "epoch": 1.51, "grad_norm": 14.391378402709961, "learning_rate": 4.287393130566892e-06, "loss": 0.2113, "step": 153600 }, { "epoch": 1.51, "grad_norm": 9.401254653930664, "learning_rate": 4.287269008112644e-06, "loss": 0.2404, "step": 153625 }, { "epoch": 1.51, "grad_norm": 14.404454231262207, "learning_rate": 4.287144885658395e-06, "loss": 0.2352, "step": 153650 }, { "epoch": 1.51, "grad_norm": 2.8882791996002197, "learning_rate": 4.2870207632041475e-06, "loss": 0.2185, "step": 153675 }, { "epoch": 1.51, "grad_norm": 9.206319808959961, "learning_rate": 4.286896640749899e-06, "loss": 0.2032, "step": 153700 }, { "epoch": 1.51, "grad_norm": 5.509275436401367, "learning_rate": 4.28677251829565e-06, "loss": 0.1806, "step": 153725 }, { "epoch": 1.51, "grad_norm": 14.539215087890625, "learning_rate": 4.286648395841402e-06, "loss": 0.2148, "step": 153750 }, { "epoch": 1.51, "grad_norm": 5.187160491943359, "learning_rate": 4.286524273387153e-06, "loss": 0.2003, "step": 153775 }, { "epoch": 1.51, "grad_norm": 8.849762916564941, "learning_rate": 4.286400150932904e-06, "loss": 0.2392, "step": 153800 }, { "epoch": 1.51, "grad_norm": 3.2868783473968506, "learning_rate": 4.2862760284786564e-06, "loss": 0.1736, "step": 153825 }, { "epoch": 1.51, "grad_norm": 20.98578453063965, "learning_rate": 4.286151906024408e-06, "loss": 0.2616, "step": 153850 }, { "epoch": 1.51, "grad_norm": 5.688167095184326, "learning_rate": 4.286027783570159e-06, "loss": 0.2627, "step": 153875 }, { "epoch": 1.51, "grad_norm": 16.635099411010742, "learning_rate": 4.285903661115911e-06, "loss": 0.2149, "step": 153900 }, { "epoch": 1.51, "grad_norm": 8.063639640808105, "learning_rate": 4.285779538661662e-06, "loss": 0.239, "step": 153925 }, { "epoch": 1.51, "grad_norm": 14.064858436584473, "learning_rate": 4.285655416207414e-06, "loss": 0.2145, "step": 153950 }, { "epoch": 1.51, "grad_norm": 9.467874526977539, "learning_rate": 4.285531293753165e-06, "loss": 0.2412, "step": 153975 }, { "epoch": 1.51, "grad_norm": 18.414480209350586, "learning_rate": 4.2854071712989174e-06, "loss": 0.2462, "step": 154000 }, { "epoch": 1.51, "grad_norm": 4.616114139556885, "learning_rate": 4.285283048844669e-06, "loss": 0.2449, "step": 154025 }, { "epoch": 1.51, "grad_norm": 29.367795944213867, "learning_rate": 4.285158926390421e-06, "loss": 0.2227, "step": 154050 }, { "epoch": 1.51, "grad_norm": 8.419386863708496, "learning_rate": 4.285034803936172e-06, "loss": 0.3056, "step": 154075 }, { "epoch": 1.52, "grad_norm": 17.266204833984375, "learning_rate": 4.284910681481923e-06, "loss": 0.2504, "step": 154100 }, { "epoch": 1.52, "grad_norm": 4.4080491065979, "learning_rate": 4.284786559027675e-06, "loss": 0.1835, "step": 154125 }, { "epoch": 1.52, "grad_norm": 18.424009323120117, "learning_rate": 4.284662436573426e-06, "loss": 0.2894, "step": 154150 }, { "epoch": 1.52, "grad_norm": 2.3726537227630615, "learning_rate": 4.284538314119178e-06, "loss": 0.2035, "step": 154175 }, { "epoch": 1.52, "grad_norm": 12.00874137878418, "learning_rate": 4.28441419166493e-06, "loss": 0.2267, "step": 154200 }, { "epoch": 1.52, "grad_norm": 9.142280578613281, "learning_rate": 4.284290069210681e-06, "loss": 0.1968, "step": 154225 }, { "epoch": 1.52, "grad_norm": 19.840633392333984, "learning_rate": 4.284165946756432e-06, "loss": 0.2839, "step": 154250 }, { "epoch": 1.52, "grad_norm": 4.421907901763916, "learning_rate": 4.284041824302184e-06, "loss": 0.2109, "step": 154275 }, { "epoch": 1.52, "grad_norm": 15.409043312072754, "learning_rate": 4.283917701847935e-06, "loss": 0.2798, "step": 154300 }, { "epoch": 1.52, "grad_norm": 7.832903861999512, "learning_rate": 4.283793579393687e-06, "loss": 0.1938, "step": 154325 }, { "epoch": 1.52, "grad_norm": 23.22515106201172, "learning_rate": 4.283669456939439e-06, "loss": 0.2287, "step": 154350 }, { "epoch": 1.52, "grad_norm": 8.559239387512207, "learning_rate": 4.28354533448519e-06, "loss": 0.2274, "step": 154375 }, { "epoch": 1.52, "grad_norm": 19.64524269104004, "learning_rate": 4.283421212030942e-06, "loss": 0.2234, "step": 154400 }, { "epoch": 1.52, "grad_norm": 7.868331432342529, "learning_rate": 4.283297089576693e-06, "loss": 0.2465, "step": 154425 }, { "epoch": 1.52, "grad_norm": 16.921897888183594, "learning_rate": 4.283172967122445e-06, "loss": 0.2181, "step": 154450 }, { "epoch": 1.52, "grad_norm": 2.6606290340423584, "learning_rate": 4.283048844668196e-06, "loss": 0.2225, "step": 154475 }, { "epoch": 1.52, "grad_norm": 12.43106460571289, "learning_rate": 4.282924722213948e-06, "loss": 0.2203, "step": 154500 }, { "epoch": 1.52, "grad_norm": 3.1090939044952393, "learning_rate": 4.2828005997597e-06, "loss": 0.2242, "step": 154525 }, { "epoch": 1.52, "grad_norm": 13.094964981079102, "learning_rate": 4.282676477305451e-06, "loss": 0.2489, "step": 154550 }, { "epoch": 1.52, "grad_norm": 5.114131927490234, "learning_rate": 4.282552354851202e-06, "loss": 0.1807, "step": 154575 }, { "epoch": 1.52, "grad_norm": 14.561912536621094, "learning_rate": 4.282428232396954e-06, "loss": 0.2443, "step": 154600 }, { "epoch": 1.52, "grad_norm": 3.3102176189422607, "learning_rate": 4.282304109942705e-06, "loss": 0.1339, "step": 154625 }, { "epoch": 1.52, "grad_norm": 13.95071792602539, "learning_rate": 4.2821799874884566e-06, "loss": 0.2514, "step": 154650 }, { "epoch": 1.52, "grad_norm": 3.7653045654296875, "learning_rate": 4.282055865034209e-06, "loss": 0.2447, "step": 154675 }, { "epoch": 1.52, "grad_norm": 14.83792495727539, "learning_rate": 4.28193174257996e-06, "loss": 0.2789, "step": 154700 }, { "epoch": 1.52, "grad_norm": 9.105911254882812, "learning_rate": 4.281807620125711e-06, "loss": 0.2717, "step": 154725 }, { "epoch": 1.52, "grad_norm": 19.17914581298828, "learning_rate": 4.281683497671463e-06, "loss": 0.2427, "step": 154750 }, { "epoch": 1.52, "grad_norm": 3.2872188091278076, "learning_rate": 4.281559375217214e-06, "loss": 0.1858, "step": 154775 }, { "epoch": 1.52, "grad_norm": Infinity, "learning_rate": 4.281440217661136e-06, "loss": 0.2675, "step": 154800 }, { "epoch": 1.52, "grad_norm": 7.430266380310059, "learning_rate": 4.281316095206887e-06, "loss": 0.1944, "step": 154825 }, { "epoch": 1.52, "grad_norm": 16.807294845581055, "learning_rate": 4.2811919727526395e-06, "loss": 0.2259, "step": 154850 }, { "epoch": 1.52, "grad_norm": 9.804718971252441, "learning_rate": 4.281067850298391e-06, "loss": 0.2313, "step": 154875 }, { "epoch": 1.52, "grad_norm": 22.043291091918945, "learning_rate": 4.280943727844143e-06, "loss": 0.2543, "step": 154900 }, { "epoch": 1.52, "grad_norm": 1.1464285850524902, "learning_rate": 4.280819605389894e-06, "loss": 0.2254, "step": 154925 }, { "epoch": 1.52, "grad_norm": 23.77167320251465, "learning_rate": 4.280695482935645e-06, "loss": 0.2513, "step": 154950 }, { "epoch": 1.52, "grad_norm": 4.080907344818115, "learning_rate": 4.280571360481397e-06, "loss": 0.2036, "step": 154975 }, { "epoch": 1.52, "grad_norm": 14.337126731872559, "learning_rate": 4.2804472380271484e-06, "loss": 0.2545, "step": 155000 }, { "epoch": 1.52, "grad_norm": 8.716533660888672, "learning_rate": 4.2803231155729e-06, "loss": 0.2541, "step": 155025 }, { "epoch": 1.52, "grad_norm": 12.551225662231445, "learning_rate": 4.280198993118652e-06, "loss": 0.2486, "step": 155050 }, { "epoch": 1.52, "grad_norm": 7.007824420928955, "learning_rate": 4.280074870664403e-06, "loss": 0.2012, "step": 155075 }, { "epoch": 1.52, "grad_norm": 10.658587455749512, "learning_rate": 4.279950748210154e-06, "loss": 0.1779, "step": 155100 }, { "epoch": 1.53, "grad_norm": 5.963895320892334, "learning_rate": 4.279826625755906e-06, "loss": 0.1896, "step": 155125 }, { "epoch": 1.53, "grad_norm": 13.547354698181152, "learning_rate": 4.279702503301657e-06, "loss": 0.2866, "step": 155150 }, { "epoch": 1.53, "grad_norm": 2.4956610202789307, "learning_rate": 4.279578380847409e-06, "loss": 0.1964, "step": 155175 }, { "epoch": 1.53, "grad_norm": 6.497873306274414, "learning_rate": 4.279454258393161e-06, "loss": 0.2326, "step": 155200 }, { "epoch": 1.53, "grad_norm": 2.8371472358703613, "learning_rate": 4.279330135938912e-06, "loss": 0.2328, "step": 155225 }, { "epoch": 1.53, "grad_norm": 14.98132610321045, "learning_rate": 4.279206013484664e-06, "loss": 0.2564, "step": 155250 }, { "epoch": 1.53, "grad_norm": 6.400091648101807, "learning_rate": 4.279081891030415e-06, "loss": 0.2465, "step": 155275 }, { "epoch": 1.53, "grad_norm": 14.595258712768555, "learning_rate": 4.278957768576167e-06, "loss": 0.2295, "step": 155300 }, { "epoch": 1.53, "grad_norm": 5.2079081535339355, "learning_rate": 4.278833646121918e-06, "loss": 0.2781, "step": 155325 }, { "epoch": 1.53, "grad_norm": 16.919967651367188, "learning_rate": 4.2787095236676705e-06, "loss": 0.2796, "step": 155350 }, { "epoch": 1.53, "grad_norm": 15.26234245300293, "learning_rate": 4.278585401213422e-06, "loss": 0.2479, "step": 155375 }, { "epoch": 1.53, "grad_norm": 16.645009994506836, "learning_rate": 4.278461278759173e-06, "loss": 0.2608, "step": 155400 }, { "epoch": 1.53, "grad_norm": 6.936745643615723, "learning_rate": 4.278337156304925e-06, "loss": 0.241, "step": 155425 }, { "epoch": 1.53, "grad_norm": 12.836515426635742, "learning_rate": 4.278213033850676e-06, "loss": 0.2658, "step": 155450 }, { "epoch": 1.53, "grad_norm": 7.0490851402282715, "learning_rate": 4.278088911396427e-06, "loss": 0.2206, "step": 155475 }, { "epoch": 1.53, "grad_norm": 19.603788375854492, "learning_rate": 4.2779647889421794e-06, "loss": 0.3409, "step": 155500 }, { "epoch": 1.53, "grad_norm": 9.297009468078613, "learning_rate": 4.277840666487931e-06, "loss": 0.2041, "step": 155525 }, { "epoch": 1.53, "grad_norm": 14.546025276184082, "learning_rate": 4.277716544033682e-06, "loss": 0.2486, "step": 155550 }, { "epoch": 1.53, "grad_norm": 10.306429862976074, "learning_rate": 4.277592421579434e-06, "loss": 0.2199, "step": 155575 }, { "epoch": 1.53, "grad_norm": 14.390276908874512, "learning_rate": 4.277468299125185e-06, "loss": 0.2265, "step": 155600 }, { "epoch": 1.53, "grad_norm": 10.366192817687988, "learning_rate": 4.277344176670936e-06, "loss": 0.1947, "step": 155625 }, { "epoch": 1.53, "grad_norm": 14.302179336547852, "learning_rate": 4.277220054216688e-06, "loss": 0.2144, "step": 155650 }, { "epoch": 1.53, "grad_norm": 3.6751954555511475, "learning_rate": 4.27709593176244e-06, "loss": 0.2764, "step": 155675 }, { "epoch": 1.53, "grad_norm": 17.61345863342285, "learning_rate": 4.276971809308192e-06, "loss": 0.2358, "step": 155700 }, { "epoch": 1.53, "grad_norm": 5.511181831359863, "learning_rate": 4.276847686853943e-06, "loss": 0.177, "step": 155725 }, { "epoch": 1.53, "grad_norm": 16.917621612548828, "learning_rate": 4.276723564399695e-06, "loss": 0.2475, "step": 155750 }, { "epoch": 1.53, "grad_norm": 13.597431182861328, "learning_rate": 4.276599441945446e-06, "loss": 0.2429, "step": 155775 }, { "epoch": 1.53, "grad_norm": 16.597440719604492, "learning_rate": 4.276475319491197e-06, "loss": 0.2511, "step": 155800 }, { "epoch": 1.53, "grad_norm": 9.516221046447754, "learning_rate": 4.276351197036949e-06, "loss": 0.2442, "step": 155825 }, { "epoch": 1.53, "grad_norm": 12.429834365844727, "learning_rate": 4.276227074582701e-06, "loss": 0.2774, "step": 155850 }, { "epoch": 1.53, "grad_norm": 4.225486755371094, "learning_rate": 4.276102952128452e-06, "loss": 0.2304, "step": 155875 }, { "epoch": 1.53, "grad_norm": 11.778178215026855, "learning_rate": 4.275978829674204e-06, "loss": 0.2352, "step": 155900 }, { "epoch": 1.53, "grad_norm": 3.9459500312805176, "learning_rate": 4.275854707219955e-06, "loss": 0.2011, "step": 155925 }, { "epoch": 1.53, "grad_norm": 11.339302062988281, "learning_rate": 4.275730584765706e-06, "loss": 0.2457, "step": 155950 }, { "epoch": 1.53, "grad_norm": 2.986203670501709, "learning_rate": 4.275606462311458e-06, "loss": 0.2192, "step": 155975 }, { "epoch": 1.53, "grad_norm": 13.835809707641602, "learning_rate": 4.27548233985721e-06, "loss": 0.2348, "step": 156000 }, { "epoch": 1.53, "grad_norm": 6.803613185882568, "learning_rate": 4.275358217402961e-06, "loss": 0.2179, "step": 156025 }, { "epoch": 1.53, "grad_norm": 10.780908584594727, "learning_rate": 4.275234094948713e-06, "loss": 0.2722, "step": 156050 }, { "epoch": 1.53, "grad_norm": 6.149384021759033, "learning_rate": 4.275109972494464e-06, "loss": 0.2273, "step": 156075 }, { "epoch": 1.53, "grad_norm": 8.405657768249512, "learning_rate": 4.274985850040216e-06, "loss": 0.227, "step": 156100 }, { "epoch": 1.54, "grad_norm": 5.146408557891846, "learning_rate": 4.274861727585967e-06, "loss": 0.2364, "step": 156125 }, { "epoch": 1.54, "grad_norm": 13.498757362365723, "learning_rate": 4.274737605131719e-06, "loss": 0.2365, "step": 156150 }, { "epoch": 1.54, "grad_norm": 5.395448684692383, "learning_rate": 4.274613482677471e-06, "loss": 0.2389, "step": 156175 }, { "epoch": 1.54, "grad_norm": 15.694646835327148, "learning_rate": 4.274489360223223e-06, "loss": 0.2266, "step": 156200 }, { "epoch": 1.54, "grad_norm": 5.627170085906982, "learning_rate": 4.274365237768974e-06, "loss": 0.2313, "step": 156225 }, { "epoch": 1.54, "grad_norm": 12.12633991241455, "learning_rate": 4.274241115314725e-06, "loss": 0.2245, "step": 156250 }, { "epoch": 1.54, "grad_norm": 1.4020408391952515, "learning_rate": 4.274116992860477e-06, "loss": 0.248, "step": 156275 }, { "epoch": 1.54, "grad_norm": 18.57420539855957, "learning_rate": 4.273992870406228e-06, "loss": 0.2752, "step": 156300 }, { "epoch": 1.54, "grad_norm": 6.238099098205566, "learning_rate": 4.27386874795198e-06, "loss": 0.1915, "step": 156325 }, { "epoch": 1.54, "grad_norm": 10.151573181152344, "learning_rate": 4.273744625497732e-06, "loss": 0.2033, "step": 156350 }, { "epoch": 1.54, "grad_norm": 4.427618026733398, "learning_rate": 4.273620503043483e-06, "loss": 0.2436, "step": 156375 }, { "epoch": 1.54, "grad_norm": 9.550130844116211, "learning_rate": 4.273496380589234e-06, "loss": 0.235, "step": 156400 }, { "epoch": 1.54, "grad_norm": 3.972939968109131, "learning_rate": 4.273372258134986e-06, "loss": 0.2198, "step": 156425 }, { "epoch": 1.54, "grad_norm": 11.002304077148438, "learning_rate": 4.273248135680737e-06, "loss": 0.2375, "step": 156450 }, { "epoch": 1.54, "grad_norm": 6.4197611808776855, "learning_rate": 4.2731240132264885e-06, "loss": 0.1553, "step": 156475 }, { "epoch": 1.54, "grad_norm": 13.864256858825684, "learning_rate": 4.272999890772241e-06, "loss": 0.2504, "step": 156500 }, { "epoch": 1.54, "grad_norm": 9.098670959472656, "learning_rate": 4.272875768317992e-06, "loss": 0.2198, "step": 156525 }, { "epoch": 1.54, "grad_norm": 5.079067230224609, "learning_rate": 4.272751645863744e-06, "loss": 0.2285, "step": 156550 }, { "epoch": 1.54, "grad_norm": 5.997228145599365, "learning_rate": 4.272627523409495e-06, "loss": 0.168, "step": 156575 }, { "epoch": 1.54, "grad_norm": 15.548259735107422, "learning_rate": 4.272503400955247e-06, "loss": 0.2377, "step": 156600 }, { "epoch": 1.54, "grad_norm": 4.460999011993408, "learning_rate": 4.272379278500998e-06, "loss": 0.2028, "step": 156625 }, { "epoch": 1.54, "grad_norm": 12.352339744567871, "learning_rate": 4.2722551560467496e-06, "loss": 0.2361, "step": 156650 }, { "epoch": 1.54, "grad_norm": 7.180659294128418, "learning_rate": 4.272131033592502e-06, "loss": 0.1885, "step": 156675 }, { "epoch": 1.54, "grad_norm": 18.259540557861328, "learning_rate": 4.272006911138253e-06, "loss": 0.2486, "step": 156700 }, { "epoch": 1.54, "grad_norm": 10.739992141723633, "learning_rate": 4.271882788684004e-06, "loss": 0.1843, "step": 156725 }, { "epoch": 1.54, "grad_norm": 16.010059356689453, "learning_rate": 4.271758666229756e-06, "loss": 0.2162, "step": 156750 }, { "epoch": 1.54, "grad_norm": 6.175785541534424, "learning_rate": 4.271634543775507e-06, "loss": 0.2009, "step": 156775 }, { "epoch": 1.54, "grad_norm": 12.047767639160156, "learning_rate": 4.2715104213212585e-06, "loss": 0.329, "step": 156800 }, { "epoch": 1.54, "grad_norm": 2.1381304264068604, "learning_rate": 4.271386298867011e-06, "loss": 0.258, "step": 156825 }, { "epoch": 1.54, "grad_norm": 15.682202339172363, "learning_rate": 4.271262176412762e-06, "loss": 0.2973, "step": 156850 }, { "epoch": 1.54, "grad_norm": 6.336143493652344, "learning_rate": 4.271138053958514e-06, "loss": 0.2551, "step": 156875 }, { "epoch": 1.54, "grad_norm": 9.972777366638184, "learning_rate": 4.271013931504265e-06, "loss": 0.227, "step": 156900 }, { "epoch": 1.54, "grad_norm": 5.800518035888672, "learning_rate": 4.270889809050017e-06, "loss": 0.2377, "step": 156925 }, { "epoch": 1.54, "grad_norm": 14.597494125366211, "learning_rate": 4.270765686595768e-06, "loss": 0.236, "step": 156950 }, { "epoch": 1.54, "grad_norm": 2.477376699447632, "learning_rate": 4.27064156414152e-06, "loss": 0.2335, "step": 156975 }, { "epoch": 1.54, "grad_norm": 22.61914825439453, "learning_rate": 4.270517441687272e-06, "loss": 0.2368, "step": 157000 }, { "epoch": 1.54, "grad_norm": 4.375281810760498, "learning_rate": 4.270393319233023e-06, "loss": 0.2227, "step": 157025 }, { "epoch": 1.54, "grad_norm": 14.78236198425293, "learning_rate": 4.270269196778775e-06, "loss": 0.2014, "step": 157050 }, { "epoch": 1.54, "grad_norm": 4.142543315887451, "learning_rate": 4.270145074324526e-06, "loss": 0.2395, "step": 157075 }, { "epoch": 1.54, "grad_norm": 12.592401504516602, "learning_rate": 4.270020951870277e-06, "loss": 0.2228, "step": 157100 }, { "epoch": 1.54, "grad_norm": 5.774787425994873, "learning_rate": 4.269896829416029e-06, "loss": 0.2214, "step": 157125 }, { "epoch": 1.55, "grad_norm": 8.953211784362793, "learning_rate": 4.2697727069617806e-06, "loss": 0.2095, "step": 157150 }, { "epoch": 1.55, "grad_norm": 6.236383438110352, "learning_rate": 4.269648584507532e-06, "loss": 0.2305, "step": 157175 }, { "epoch": 1.55, "grad_norm": 12.82721996307373, "learning_rate": 4.269524462053284e-06, "loss": 0.237, "step": 157200 }, { "epoch": 1.55, "grad_norm": 14.023513793945312, "learning_rate": 4.269405304497205e-06, "loss": 0.2789, "step": 157225 }, { "epoch": 1.55, "grad_norm": 9.552038192749023, "learning_rate": 4.269281182042956e-06, "loss": 0.2236, "step": 157250 }, { "epoch": 1.55, "grad_norm": 2.968822717666626, "learning_rate": 4.269157059588708e-06, "loss": 0.1951, "step": 157275 }, { "epoch": 1.55, "grad_norm": 13.77944278717041, "learning_rate": 4.269032937134459e-06, "loss": 0.2243, "step": 157300 }, { "epoch": 1.55, "grad_norm": 4.868980884552002, "learning_rate": 4.268908814680211e-06, "loss": 0.1978, "step": 157325 }, { "epoch": 1.55, "grad_norm": 12.880077362060547, "learning_rate": 4.268784692225963e-06, "loss": 0.2192, "step": 157350 }, { "epoch": 1.55, "grad_norm": 6.115312099456787, "learning_rate": 4.268660569771715e-06, "loss": 0.2105, "step": 157375 }, { "epoch": 1.55, "grad_norm": 12.494168281555176, "learning_rate": 4.268536447317466e-06, "loss": 0.254, "step": 157400 }, { "epoch": 1.55, "grad_norm": 3.802915334701538, "learning_rate": 4.268412324863218e-06, "loss": 0.2244, "step": 157425 }, { "epoch": 1.55, "grad_norm": 13.880841255187988, "learning_rate": 4.268288202408969e-06, "loss": 0.2882, "step": 157450 }, { "epoch": 1.55, "grad_norm": 4.994378089904785, "learning_rate": 4.26816407995472e-06, "loss": 0.2073, "step": 157475 }, { "epoch": 1.55, "grad_norm": 11.01861572265625, "learning_rate": 4.2680399575004724e-06, "loss": 0.2096, "step": 157500 }, { "epoch": 1.55, "grad_norm": 8.2003812789917, "learning_rate": 4.267915835046224e-06, "loss": 0.2385, "step": 157525 }, { "epoch": 1.55, "grad_norm": 8.962639808654785, "learning_rate": 4.267791712591975e-06, "loss": 0.2994, "step": 157550 }, { "epoch": 1.55, "grad_norm": 3.5325281620025635, "learning_rate": 4.267667590137727e-06, "loss": 0.231, "step": 157575 }, { "epoch": 1.55, "grad_norm": 16.238605499267578, "learning_rate": 4.267543467683478e-06, "loss": 0.2285, "step": 157600 }, { "epoch": 1.55, "grad_norm": 1.005591630935669, "learning_rate": 4.267419345229229e-06, "loss": 0.2237, "step": 157625 }, { "epoch": 1.55, "grad_norm": 17.521453857421875, "learning_rate": 4.267295222774981e-06, "loss": 0.2707, "step": 157650 }, { "epoch": 1.55, "grad_norm": 3.4588334560394287, "learning_rate": 4.267171100320733e-06, "loss": 0.2329, "step": 157675 }, { "epoch": 1.55, "grad_norm": 12.88310432434082, "learning_rate": 4.267046977866484e-06, "loss": 0.2046, "step": 157700 }, { "epoch": 1.55, "grad_norm": 9.68861198425293, "learning_rate": 4.266922855412236e-06, "loss": 0.2234, "step": 157725 }, { "epoch": 1.55, "grad_norm": 10.087014198303223, "learning_rate": 4.266798732957987e-06, "loss": 0.24, "step": 157750 }, { "epoch": 1.55, "grad_norm": 2.9228463172912598, "learning_rate": 4.266674610503739e-06, "loss": 0.2259, "step": 157775 }, { "epoch": 1.55, "grad_norm": 10.63921070098877, "learning_rate": 4.26655048804949e-06, "loss": 0.2329, "step": 157800 }, { "epoch": 1.55, "grad_norm": 6.459100246429443, "learning_rate": 4.266426365595242e-06, "loss": 0.2486, "step": 157825 }, { "epoch": 1.55, "grad_norm": 17.686992645263672, "learning_rate": 4.266302243140994e-06, "loss": 0.2882, "step": 157850 }, { "epoch": 1.55, "grad_norm": 8.322015762329102, "learning_rate": 4.266178120686746e-06, "loss": 0.1888, "step": 157875 }, { "epoch": 1.55, "grad_norm": 14.501426696777344, "learning_rate": 4.266053998232497e-06, "loss": 0.2353, "step": 157900 }, { "epoch": 1.55, "grad_norm": 6.641213893890381, "learning_rate": 4.265929875778248e-06, "loss": 0.2248, "step": 157925 }, { "epoch": 1.55, "grad_norm": 16.314393997192383, "learning_rate": 4.265805753324e-06, "loss": 0.2503, "step": 157950 }, { "epoch": 1.55, "grad_norm": 5.769965171813965, "learning_rate": 4.265681630869751e-06, "loss": 0.236, "step": 157975 }, { "epoch": 1.55, "grad_norm": 12.57950496673584, "learning_rate": 4.265557508415503e-06, "loss": 0.2096, "step": 158000 }, { "epoch": 1.55, "grad_norm": 5.509645938873291, "learning_rate": 4.265433385961255e-06, "loss": 0.2143, "step": 158025 }, { "epoch": 1.55, "grad_norm": 14.94799518585205, "learning_rate": 4.265309263507006e-06, "loss": 0.2588, "step": 158050 }, { "epoch": 1.55, "grad_norm": 4.698999881744385, "learning_rate": 4.265185141052757e-06, "loss": 0.2027, "step": 158075 }, { "epoch": 1.55, "grad_norm": 15.313980102539062, "learning_rate": 4.265061018598508e-06, "loss": 0.2351, "step": 158100 }, { "epoch": 1.55, "grad_norm": 11.759028434753418, "learning_rate": 4.26493689614426e-06, "loss": 0.2021, "step": 158125 }, { "epoch": 1.55, "grad_norm": 10.989863395690918, "learning_rate": 4.2648127736900115e-06, "loss": 0.2904, "step": 158150 }, { "epoch": 1.56, "grad_norm": 8.02359676361084, "learning_rate": 4.264688651235764e-06, "loss": 0.1922, "step": 158175 }, { "epoch": 1.56, "grad_norm": 13.817997932434082, "learning_rate": 4.264564528781515e-06, "loss": 0.2468, "step": 158200 }, { "epoch": 1.56, "grad_norm": 11.608173370361328, "learning_rate": 4.264440406327267e-06, "loss": 0.2169, "step": 158225 }, { "epoch": 1.56, "grad_norm": 16.421226501464844, "learning_rate": 4.264316283873018e-06, "loss": 0.233, "step": 158250 }, { "epoch": 1.56, "grad_norm": 10.546563148498535, "learning_rate": 4.26419216141877e-06, "loss": 0.2108, "step": 158275 }, { "epoch": 1.56, "grad_norm": 10.697269439697266, "learning_rate": 4.264068038964521e-06, "loss": 0.2718, "step": 158300 }, { "epoch": 1.56, "grad_norm": 5.866827964782715, "learning_rate": 4.2639439165102726e-06, "loss": 0.2653, "step": 158325 }, { "epoch": 1.56, "grad_norm": 9.553948402404785, "learning_rate": 4.263819794056025e-06, "loss": 0.2107, "step": 158350 }, { "epoch": 1.56, "grad_norm": 5.95745325088501, "learning_rate": 4.263695671601776e-06, "loss": 0.2397, "step": 158375 }, { "epoch": 1.56, "grad_norm": 14.774171829223633, "learning_rate": 4.263571549147527e-06, "loss": 0.2447, "step": 158400 }, { "epoch": 1.56, "grad_norm": 3.2782742977142334, "learning_rate": 4.263447426693279e-06, "loss": 0.2477, "step": 158425 }, { "epoch": 1.56, "grad_norm": 18.5080509185791, "learning_rate": 4.26332330423903e-06, "loss": 0.2637, "step": 158450 }, { "epoch": 1.56, "grad_norm": 5.58840274810791, "learning_rate": 4.2631991817847815e-06, "loss": 0.2456, "step": 158475 }, { "epoch": 1.56, "grad_norm": 9.572009086608887, "learning_rate": 4.263075059330534e-06, "loss": 0.2061, "step": 158500 }, { "epoch": 1.56, "grad_norm": 8.64587688446045, "learning_rate": 4.262950936876285e-06, "loss": 0.2445, "step": 158525 }, { "epoch": 1.56, "grad_norm": 16.344837188720703, "learning_rate": 4.262826814422036e-06, "loss": 0.2531, "step": 158550 }, { "epoch": 1.56, "grad_norm": 9.352391242980957, "learning_rate": 4.262702691967788e-06, "loss": 0.2509, "step": 158575 }, { "epoch": 1.56, "grad_norm": 10.331683158874512, "learning_rate": 4.262578569513539e-06, "loss": 0.2365, "step": 158600 }, { "epoch": 1.56, "grad_norm": 2.9126715660095215, "learning_rate": 4.262454447059291e-06, "loss": 0.236, "step": 158625 }, { "epoch": 1.56, "grad_norm": 12.844345092773438, "learning_rate": 4.2623303246050426e-06, "loss": 0.2457, "step": 158650 }, { "epoch": 1.56, "grad_norm": 3.6873414516448975, "learning_rate": 4.262206202150795e-06, "loss": 0.2628, "step": 158675 }, { "epoch": 1.56, "grad_norm": 14.406961441040039, "learning_rate": 4.262082079696546e-06, "loss": 0.2549, "step": 158700 }, { "epoch": 1.56, "grad_norm": 7.115943431854248, "learning_rate": 4.261957957242298e-06, "loss": 0.2546, "step": 158725 }, { "epoch": 1.56, "grad_norm": 14.006903648376465, "learning_rate": 4.261833834788049e-06, "loss": 0.3319, "step": 158750 }, { "epoch": 1.56, "grad_norm": 8.407370567321777, "learning_rate": 4.2617097123338e-06, "loss": 0.2124, "step": 158775 }, { "epoch": 1.56, "grad_norm": 19.947711944580078, "learning_rate": 4.261585589879552e-06, "loss": 0.2392, "step": 158800 }, { "epoch": 1.56, "grad_norm": 4.791472434997559, "learning_rate": 4.2614614674253036e-06, "loss": 0.1981, "step": 158825 }, { "epoch": 1.56, "grad_norm": 14.371504783630371, "learning_rate": 4.261337344971055e-06, "loss": 0.2845, "step": 158850 }, { "epoch": 1.56, "grad_norm": 9.275566101074219, "learning_rate": 4.261213222516807e-06, "loss": 0.2224, "step": 158875 }, { "epoch": 1.56, "grad_norm": 19.20778465270996, "learning_rate": 4.261089100062558e-06, "loss": 0.2852, "step": 158900 }, { "epoch": 1.56, "grad_norm": 14.191509246826172, "learning_rate": 4.260964977608309e-06, "loss": 0.2154, "step": 158925 }, { "epoch": 1.56, "grad_norm": 14.407784461975098, "learning_rate": 4.2608408551540605e-06, "loss": 0.2259, "step": 158950 }, { "epoch": 1.56, "grad_norm": 5.461180210113525, "learning_rate": 4.2607167326998125e-06, "loss": 0.2746, "step": 158975 }, { "epoch": 1.56, "grad_norm": 8.704413414001465, "learning_rate": 4.260592610245564e-06, "loss": 0.2171, "step": 159000 }, { "epoch": 1.56, "grad_norm": 4.057959079742432, "learning_rate": 4.260468487791316e-06, "loss": 0.2173, "step": 159025 }, { "epoch": 1.56, "grad_norm": 12.399489402770996, "learning_rate": 4.260344365337067e-06, "loss": 0.2563, "step": 159050 }, { "epoch": 1.56, "grad_norm": 5.143006324768066, "learning_rate": 4.260220242882819e-06, "loss": 0.219, "step": 159075 }, { "epoch": 1.56, "grad_norm": 13.060162544250488, "learning_rate": 4.26009612042857e-06, "loss": 0.22, "step": 159100 }, { "epoch": 1.56, "grad_norm": 4.706029415130615, "learning_rate": 4.259971997974322e-06, "loss": 0.2249, "step": 159125 }, { "epoch": 1.56, "grad_norm": 13.685298919677734, "learning_rate": 4.2598478755200736e-06, "loss": 0.2223, "step": 159150 }, { "epoch": 1.57, "grad_norm": 3.241534948348999, "learning_rate": 4.259723753065825e-06, "loss": 0.2608, "step": 159175 }, { "epoch": 1.57, "grad_norm": 15.09687614440918, "learning_rate": 4.259599630611577e-06, "loss": 0.2692, "step": 159200 }, { "epoch": 1.57, "grad_norm": 0.7164150476455688, "learning_rate": 4.259475508157328e-06, "loss": 0.188, "step": 159225 }, { "epoch": 1.57, "grad_norm": 18.0360164642334, "learning_rate": 4.259351385703079e-06, "loss": 0.264, "step": 159250 }, { "epoch": 1.57, "grad_norm": 3.949188470840454, "learning_rate": 4.259232228147001e-06, "loss": 0.2297, "step": 159275 }, { "epoch": 1.57, "grad_norm": 14.516581535339355, "learning_rate": 4.259108105692752e-06, "loss": 0.2302, "step": 159300 }, { "epoch": 1.57, "grad_norm": 5.780734539031982, "learning_rate": 4.258983983238504e-06, "loss": 0.1842, "step": 159325 }, { "epoch": 1.57, "grad_norm": 14.736557006835938, "learning_rate": 4.258859860784256e-06, "loss": 0.1736, "step": 159350 }, { "epoch": 1.57, "grad_norm": 1.6698193550109863, "learning_rate": 4.258735738330007e-06, "loss": 0.2426, "step": 159375 }, { "epoch": 1.57, "grad_norm": 10.41301441192627, "learning_rate": 4.258611615875759e-06, "loss": 0.2652, "step": 159400 }, { "epoch": 1.57, "grad_norm": 4.394448280334473, "learning_rate": 4.25848749342151e-06, "loss": 0.2696, "step": 159425 }, { "epoch": 1.57, "grad_norm": 12.83985710144043, "learning_rate": 4.258363370967261e-06, "loss": 0.2454, "step": 159450 }, { "epoch": 1.57, "grad_norm": 4.388443470001221, "learning_rate": 4.258239248513013e-06, "loss": 0.2158, "step": 159475 }, { "epoch": 1.57, "grad_norm": 12.510028839111328, "learning_rate": 4.2581151260587646e-06, "loss": 0.2082, "step": 159500 }, { "epoch": 1.57, "grad_norm": 2.6283977031707764, "learning_rate": 4.257991003604517e-06, "loss": 0.2025, "step": 159525 }, { "epoch": 1.57, "grad_norm": 17.29793357849121, "learning_rate": 4.257866881150268e-06, "loss": 0.2659, "step": 159550 }, { "epoch": 1.57, "grad_norm": 5.533842086791992, "learning_rate": 4.25774275869602e-06, "loss": 0.2519, "step": 159575 }, { "epoch": 1.57, "grad_norm": 16.24468994140625, "learning_rate": 4.257618636241771e-06, "loss": 0.2484, "step": 159600 }, { "epoch": 1.57, "grad_norm": 5.853771209716797, "learning_rate": 4.257494513787522e-06, "loss": 0.206, "step": 159625 }, { "epoch": 1.57, "grad_norm": 15.631515502929688, "learning_rate": 4.257370391333274e-06, "loss": 0.2202, "step": 159650 }, { "epoch": 1.57, "grad_norm": 3.286953926086426, "learning_rate": 4.257246268879026e-06, "loss": 0.2041, "step": 159675 }, { "epoch": 1.57, "grad_norm": 9.951560974121094, "learning_rate": 4.257122146424777e-06, "loss": 0.2304, "step": 159700 }, { "epoch": 1.57, "grad_norm": 8.928295135498047, "learning_rate": 4.256998023970529e-06, "loss": 0.2145, "step": 159725 }, { "epoch": 1.57, "grad_norm": 16.524442672729492, "learning_rate": 4.25687390151628e-06, "loss": 0.2187, "step": 159750 }, { "epoch": 1.57, "grad_norm": 8.591662406921387, "learning_rate": 4.256749779062031e-06, "loss": 0.1585, "step": 159775 }, { "epoch": 1.57, "grad_norm": 10.41215705871582, "learning_rate": 4.256625656607783e-06, "loss": 0.2962, "step": 159800 }, { "epoch": 1.57, "grad_norm": 7.295282363891602, "learning_rate": 4.2565015341535346e-06, "loss": 0.1691, "step": 159825 }, { "epoch": 1.57, "grad_norm": 21.239961624145508, "learning_rate": 4.256377411699287e-06, "loss": 0.2476, "step": 159850 }, { "epoch": 1.57, "grad_norm": 9.81852912902832, "learning_rate": 4.256253289245038e-06, "loss": 0.2206, "step": 159875 }, { "epoch": 1.57, "grad_norm": 24.164464950561523, "learning_rate": 4.25612916679079e-06, "loss": 0.234, "step": 159900 }, { "epoch": 1.57, "grad_norm": 8.056458473205566, "learning_rate": 4.256005044336541e-06, "loss": 0.1973, "step": 159925 }, { "epoch": 1.57, "grad_norm": 19.454853057861328, "learning_rate": 4.255880921882293e-06, "loss": 0.2876, "step": 159950 }, { "epoch": 1.57, "grad_norm": 5.693286895751953, "learning_rate": 4.255756799428044e-06, "loss": 0.2052, "step": 159975 }, { "epoch": 1.57, "grad_norm": 18.591270446777344, "learning_rate": 4.2556326769737956e-06, "loss": 0.2461, "step": 160000 }, { "epoch": 1.57, "eval_loss": 0.5067031383514404, "eval_runtime": 6048.0503, "eval_samples_per_second": 1.565, "eval_steps_per_second": 0.196, "eval_wer": 0.13223459730895512, "step": 160000 }, { "epoch": 1.57, "grad_norm": 8.32210636138916, "learning_rate": 4.255508554519548e-06, "loss": 0.227, "step": 160025 }, { "epoch": 1.57, "grad_norm": 16.321674346923828, "learning_rate": 4.255384432065299e-06, "loss": 0.2659, "step": 160050 }, { "epoch": 1.57, "grad_norm": 19.368175506591797, "learning_rate": 4.25526030961105e-06, "loss": 0.2222, "step": 160075 }, { "epoch": 1.57, "grad_norm": 14.011585235595703, "learning_rate": 4.255136187156802e-06, "loss": 0.2564, "step": 160100 }, { "epoch": 1.57, "grad_norm": 3.0810699462890625, "learning_rate": 4.255012064702553e-06, "loss": 0.2057, "step": 160125 }, { "epoch": 1.57, "grad_norm": 13.010673522949219, "learning_rate": 4.2548879422483045e-06, "loss": 0.2532, "step": 160150 }, { "epoch": 1.57, "grad_norm": 1.4353196620941162, "learning_rate": 4.254763819794057e-06, "loss": 0.242, "step": 160175 }, { "epoch": 1.58, "grad_norm": 7.688079833984375, "learning_rate": 4.254639697339808e-06, "loss": 0.2571, "step": 160200 }, { "epoch": 1.58, "grad_norm": 12.639636993408203, "learning_rate": 4.254515574885559e-06, "loss": 0.2597, "step": 160225 }, { "epoch": 1.58, "grad_norm": 16.862838745117188, "learning_rate": 4.254391452431311e-06, "loss": 0.2374, "step": 160250 }, { "epoch": 1.58, "grad_norm": 5.7008280754089355, "learning_rate": 4.254267329977062e-06, "loss": 0.2558, "step": 160275 }, { "epoch": 1.58, "grad_norm": 21.893823623657227, "learning_rate": 4.254143207522814e-06, "loss": 0.2332, "step": 160300 }, { "epoch": 1.58, "grad_norm": 2.338122606277466, "learning_rate": 4.2540190850685656e-06, "loss": 0.2018, "step": 160325 }, { "epoch": 1.58, "grad_norm": 8.360114097595215, "learning_rate": 4.253894962614318e-06, "loss": 0.1935, "step": 160350 }, { "epoch": 1.58, "grad_norm": 6.4757280349731445, "learning_rate": 4.253770840160069e-06, "loss": 0.2606, "step": 160375 }, { "epoch": 1.58, "grad_norm": 17.54697608947754, "learning_rate": 4.25364671770582e-06, "loss": 0.2313, "step": 160400 }, { "epoch": 1.58, "grad_norm": 7.689447402954102, "learning_rate": 4.253522595251572e-06, "loss": 0.2067, "step": 160425 }, { "epoch": 1.58, "grad_norm": 14.39941120147705, "learning_rate": 4.253398472797323e-06, "loss": 0.2673, "step": 160450 }, { "epoch": 1.58, "grad_norm": 11.289338111877441, "learning_rate": 4.2532743503430745e-06, "loss": 0.3035, "step": 160475 }, { "epoch": 1.58, "grad_norm": 12.489645004272461, "learning_rate": 4.253150227888827e-06, "loss": 0.176, "step": 160500 }, { "epoch": 1.58, "grad_norm": 6.548092842102051, "learning_rate": 4.253026105434578e-06, "loss": 0.2218, "step": 160525 }, { "epoch": 1.58, "grad_norm": 9.54586124420166, "learning_rate": 4.252901982980329e-06, "loss": 0.2079, "step": 160550 }, { "epoch": 1.58, "grad_norm": 4.514657020568848, "learning_rate": 4.252777860526081e-06, "loss": 0.1949, "step": 160575 }, { "epoch": 1.58, "grad_norm": 10.698180198669434, "learning_rate": 4.252653738071832e-06, "loss": 0.2358, "step": 160600 }, { "epoch": 1.58, "grad_norm": 7.34112024307251, "learning_rate": 4.2525296156175835e-06, "loss": 0.187, "step": 160625 }, { "epoch": 1.58, "grad_norm": 16.474855422973633, "learning_rate": 4.2524054931633355e-06, "loss": 0.2438, "step": 160650 }, { "epoch": 1.58, "grad_norm": 4.81999397277832, "learning_rate": 4.252281370709087e-06, "loss": 0.2329, "step": 160675 }, { "epoch": 1.58, "grad_norm": 20.46186065673828, "learning_rate": 4.252157248254839e-06, "loss": 0.231, "step": 160700 }, { "epoch": 1.58, "grad_norm": 5.988748073577881, "learning_rate": 4.25203312580059e-06, "loss": 0.2028, "step": 160725 }, { "epoch": 1.58, "grad_norm": 14.924715042114258, "learning_rate": 4.251909003346342e-06, "loss": 0.2259, "step": 160750 }, { "epoch": 1.58, "grad_norm": 0.9257586598396301, "learning_rate": 4.251784880892093e-06, "loss": 0.225, "step": 160775 }, { "epoch": 1.58, "grad_norm": 14.53828239440918, "learning_rate": 4.251660758437845e-06, "loss": 0.2306, "step": 160800 }, { "epoch": 1.58, "grad_norm": 7.894986629486084, "learning_rate": 4.2515366359835966e-06, "loss": 0.2084, "step": 160825 }, { "epoch": 1.58, "grad_norm": 18.48267936706543, "learning_rate": 4.251412513529348e-06, "loss": 0.269, "step": 160850 }, { "epoch": 1.58, "grad_norm": 3.51305890083313, "learning_rate": 4.2512883910751e-06, "loss": 0.205, "step": 160875 }, { "epoch": 1.58, "grad_norm": 12.275657653808594, "learning_rate": 4.251164268620851e-06, "loss": 0.2452, "step": 160900 }, { "epoch": 1.58, "grad_norm": 22.880390167236328, "learning_rate": 4.251040146166602e-06, "loss": 0.2272, "step": 160925 }, { "epoch": 1.58, "grad_norm": 16.09678840637207, "learning_rate": 4.250916023712354e-06, "loss": 0.2463, "step": 160950 }, { "epoch": 1.58, "grad_norm": 4.748548984527588, "learning_rate": 4.2507919012581055e-06, "loss": 0.2488, "step": 160975 }, { "epoch": 1.58, "grad_norm": 11.097140312194824, "learning_rate": 4.250667778803857e-06, "loss": 0.3181, "step": 161000 }, { "epoch": 1.58, "grad_norm": 9.382569313049316, "learning_rate": 4.250543656349609e-06, "loss": 0.1837, "step": 161025 }, { "epoch": 1.58, "grad_norm": 13.046674728393555, "learning_rate": 4.25041953389536e-06, "loss": 0.2075, "step": 161050 }, { "epoch": 1.58, "grad_norm": 5.34596061706543, "learning_rate": 4.250295411441111e-06, "loss": 0.2045, "step": 161075 }, { "epoch": 1.58, "grad_norm": 14.525503158569336, "learning_rate": 4.250171288986863e-06, "loss": 0.2882, "step": 161100 }, { "epoch": 1.58, "grad_norm": 3.656529664993286, "learning_rate": 4.2500471665326145e-06, "loss": 0.2387, "step": 161125 }, { "epoch": 1.58, "grad_norm": 29.09842300415039, "learning_rate": 4.2499230440783665e-06, "loss": 0.2457, "step": 161150 }, { "epoch": 1.58, "grad_norm": 5.749814987182617, "learning_rate": 4.249798921624118e-06, "loss": 0.2234, "step": 161175 }, { "epoch": 1.58, "grad_norm": 17.753450393676758, "learning_rate": 4.24967479916987e-06, "loss": 0.2694, "step": 161200 }, { "epoch": 1.59, "grad_norm": 4.93623161315918, "learning_rate": 4.249550676715621e-06, "loss": 0.2052, "step": 161225 }, { "epoch": 1.59, "grad_norm": 12.632818222045898, "learning_rate": 4.249426554261372e-06, "loss": 0.2094, "step": 161250 }, { "epoch": 1.59, "grad_norm": 9.042328834533691, "learning_rate": 4.249302431807124e-06, "loss": 0.1839, "step": 161275 }, { "epoch": 1.59, "grad_norm": 13.420689582824707, "learning_rate": 4.2491783093528755e-06, "loss": 0.2549, "step": 161300 }, { "epoch": 1.59, "grad_norm": 8.178325653076172, "learning_rate": 4.249054186898627e-06, "loss": 0.2394, "step": 161325 }, { "epoch": 1.59, "grad_norm": 15.884368896484375, "learning_rate": 4.248930064444379e-06, "loss": 0.2209, "step": 161350 }, { "epoch": 1.59, "grad_norm": 7.29208517074585, "learning_rate": 4.24880594199013e-06, "loss": 0.2775, "step": 161375 }, { "epoch": 1.59, "grad_norm": 11.937190055847168, "learning_rate": 4.248681819535881e-06, "loss": 0.2459, "step": 161400 }, { "epoch": 1.59, "grad_norm": 13.19316291809082, "learning_rate": 4.248557697081633e-06, "loss": 0.2385, "step": 161425 }, { "epoch": 1.59, "grad_norm": 19.240259170532227, "learning_rate": 4.2484335746273845e-06, "loss": 0.2139, "step": 161450 }, { "epoch": 1.59, "grad_norm": 5.059479713439941, "learning_rate": 4.248309452173136e-06, "loss": 0.1903, "step": 161475 }, { "epoch": 1.59, "grad_norm": 16.336103439331055, "learning_rate": 4.248185329718888e-06, "loss": 0.2311, "step": 161500 }, { "epoch": 1.59, "grad_norm": 4.702790260314941, "learning_rate": 4.248061207264639e-06, "loss": 0.17, "step": 161525 }, { "epoch": 1.59, "grad_norm": 20.281909942626953, "learning_rate": 4.247937084810391e-06, "loss": 0.2398, "step": 161550 }, { "epoch": 1.59, "grad_norm": 3.816908121109009, "learning_rate": 4.247812962356142e-06, "loss": 0.2276, "step": 161575 }, { "epoch": 1.59, "grad_norm": 15.396364212036133, "learning_rate": 4.247688839901894e-06, "loss": 0.2255, "step": 161600 }, { "epoch": 1.59, "grad_norm": 5.598994731903076, "learning_rate": 4.2475647174476455e-06, "loss": 0.2321, "step": 161625 }, { "epoch": 1.59, "grad_norm": 15.301058769226074, "learning_rate": 4.2474405949933976e-06, "loss": 0.1855, "step": 161650 }, { "epoch": 1.59, "grad_norm": 3.260749101638794, "learning_rate": 4.247316472539149e-06, "loss": 0.2004, "step": 161675 }, { "epoch": 1.59, "grad_norm": 15.035653114318848, "learning_rate": 4.2471923500849e-06, "loss": 0.263, "step": 161700 }, { "epoch": 1.59, "grad_norm": 6.25007438659668, "learning_rate": 4.247068227630652e-06, "loss": 0.2489, "step": 161725 }, { "epoch": 1.59, "grad_norm": 17.898508071899414, "learning_rate": 4.246944105176403e-06, "loss": 0.2986, "step": 161750 }, { "epoch": 1.59, "grad_norm": 5.513760566711426, "learning_rate": 4.2468199827221545e-06, "loss": 0.2526, "step": 161775 }, { "epoch": 1.59, "grad_norm": 15.41172981262207, "learning_rate": 4.2466958602679065e-06, "loss": 0.2285, "step": 161800 }, { "epoch": 1.59, "grad_norm": 5.345950603485107, "learning_rate": 4.246571737813658e-06, "loss": 0.2249, "step": 161825 }, { "epoch": 1.59, "grad_norm": 5.7921462059021, "learning_rate": 4.246447615359409e-06, "loss": 0.2829, "step": 161850 }, { "epoch": 1.59, "grad_norm": 3.699719190597534, "learning_rate": 4.246323492905161e-06, "loss": 0.1967, "step": 161875 }, { "epoch": 1.59, "grad_norm": 21.545873641967773, "learning_rate": 4.246199370450912e-06, "loss": 0.2811, "step": 161900 }, { "epoch": 1.59, "grad_norm": 2.8951544761657715, "learning_rate": 4.246075247996663e-06, "loss": 0.2442, "step": 161925 }, { "epoch": 1.59, "grad_norm": 10.158757209777832, "learning_rate": 4.2459511255424155e-06, "loss": 0.2126, "step": 161950 }, { "epoch": 1.59, "grad_norm": 7.255776405334473, "learning_rate": 4.245827003088167e-06, "loss": 0.2511, "step": 161975 }, { "epoch": 1.59, "grad_norm": 14.14648723602295, "learning_rate": 4.245702880633919e-06, "loss": 0.2941, "step": 162000 }, { "epoch": 1.59, "grad_norm": 13.896936416625977, "learning_rate": 4.24557875817967e-06, "loss": 0.2199, "step": 162025 }, { "epoch": 1.59, "grad_norm": 15.55219554901123, "learning_rate": 4.245454635725422e-06, "loss": 0.2506, "step": 162050 }, { "epoch": 1.59, "grad_norm": 5.889412879943848, "learning_rate": 4.245330513271173e-06, "loss": 0.2204, "step": 162075 }, { "epoch": 1.59, "grad_norm": 6.505858898162842, "learning_rate": 4.2452063908169244e-06, "loss": 0.1999, "step": 162100 }, { "epoch": 1.59, "grad_norm": 7.458347797393799, "learning_rate": 4.2450822683626765e-06, "loss": 0.2216, "step": 162125 }, { "epoch": 1.59, "grad_norm": 12.483957290649414, "learning_rate": 4.244958145908428e-06, "loss": 0.2344, "step": 162150 }, { "epoch": 1.59, "grad_norm": 7.8262248039245605, "learning_rate": 4.244834023454179e-06, "loss": 0.2495, "step": 162175 }, { "epoch": 1.59, "grad_norm": 14.063728332519531, "learning_rate": 4.244709900999931e-06, "loss": 0.2428, "step": 162200 }, { "epoch": 1.6, "grad_norm": 7.292712688446045, "learning_rate": 4.244585778545682e-06, "loss": 0.1892, "step": 162225 }, { "epoch": 1.6, "grad_norm": 8.811478614807129, "learning_rate": 4.244461656091433e-06, "loss": 0.2272, "step": 162250 }, { "epoch": 1.6, "grad_norm": 5.9130859375, "learning_rate": 4.2443375336371855e-06, "loss": 0.2183, "step": 162275 }, { "epoch": 1.6, "grad_norm": 9.058180809020996, "learning_rate": 4.244213411182937e-06, "loss": 0.2114, "step": 162300 }, { "epoch": 1.6, "grad_norm": 6.254515647888184, "learning_rate": 4.244089288728688e-06, "loss": 0.2269, "step": 162325 }, { "epoch": 1.6, "grad_norm": 17.766605377197266, "learning_rate": 4.24396516627444e-06, "loss": 0.1952, "step": 162350 }, { "epoch": 1.6, "grad_norm": 5.03363037109375, "learning_rate": 4.243841043820191e-06, "loss": 0.1694, "step": 162375 }, { "epoch": 1.6, "grad_norm": 18.367538452148438, "learning_rate": 4.243716921365943e-06, "loss": 0.2407, "step": 162400 }, { "epoch": 1.6, "grad_norm": 10.735163688659668, "learning_rate": 4.243597763809864e-06, "loss": 0.2385, "step": 162425 }, { "epoch": 1.6, "grad_norm": 18.674694061279297, "learning_rate": 4.243473641355616e-06, "loss": 0.2576, "step": 162450 }, { "epoch": 1.6, "grad_norm": 3.817281484603882, "learning_rate": 4.2433495189013675e-06, "loss": 0.222, "step": 162475 }, { "epoch": 1.6, "grad_norm": 16.56203269958496, "learning_rate": 4.2432253964471196e-06, "loss": 0.2553, "step": 162500 }, { "epoch": 1.6, "grad_norm": 4.7452802658081055, "learning_rate": 4.243101273992871e-06, "loss": 0.1981, "step": 162525 }, { "epoch": 1.6, "grad_norm": 15.456443786621094, "learning_rate": 4.242977151538623e-06, "loss": 0.2242, "step": 162550 }, { "epoch": 1.6, "grad_norm": 5.726986885070801, "learning_rate": 4.242853029084374e-06, "loss": 0.2261, "step": 162575 }, { "epoch": 1.6, "grad_norm": 16.785900115966797, "learning_rate": 4.242728906630125e-06, "loss": 0.2357, "step": 162600 }, { "epoch": 1.6, "grad_norm": 10.00997257232666, "learning_rate": 4.2426047841758765e-06, "loss": 0.2275, "step": 162625 }, { "epoch": 1.6, "grad_norm": 11.295741081237793, "learning_rate": 4.2424806617216285e-06, "loss": 0.2437, "step": 162650 }, { "epoch": 1.6, "grad_norm": 9.966660499572754, "learning_rate": 4.24235653926738e-06, "loss": 0.1987, "step": 162675 }, { "epoch": 1.6, "grad_norm": 16.41158676147461, "learning_rate": 4.242232416813131e-06, "loss": 0.2733, "step": 162700 }, { "epoch": 1.6, "grad_norm": 6.331147193908691, "learning_rate": 4.242108294358883e-06, "loss": 0.2576, "step": 162725 }, { "epoch": 1.6, "grad_norm": 19.03277587890625, "learning_rate": 4.241984171904634e-06, "loss": 0.2589, "step": 162750 }, { "epoch": 1.6, "grad_norm": 4.352421283721924, "learning_rate": 4.241860049450386e-06, "loss": 0.2083, "step": 162775 }, { "epoch": 1.6, "grad_norm": 19.895971298217773, "learning_rate": 4.2417359269961375e-06, "loss": 0.2682, "step": 162800 }, { "epoch": 1.6, "grad_norm": 4.350435733795166, "learning_rate": 4.2416118045418896e-06, "loss": 0.2054, "step": 162825 }, { "epoch": 1.6, "grad_norm": 21.815292358398438, "learning_rate": 4.241487682087641e-06, "loss": 0.2342, "step": 162850 }, { "epoch": 1.6, "grad_norm": 14.840681076049805, "learning_rate": 4.241363559633393e-06, "loss": 0.1996, "step": 162875 }, { "epoch": 1.6, "grad_norm": 11.360322952270508, "learning_rate": 4.241239437179144e-06, "loss": 0.2223, "step": 162900 }, { "epoch": 1.6, "grad_norm": 3.5842084884643555, "learning_rate": 4.241115314724895e-06, "loss": 0.211, "step": 162925 }, { "epoch": 1.6, "grad_norm": 16.239145278930664, "learning_rate": 4.240991192270647e-06, "loss": 0.2808, "step": 162950 }, { "epoch": 1.6, "grad_norm": 5.4765143394470215, "learning_rate": 4.2408670698163985e-06, "loss": 0.2181, "step": 162975 }, { "epoch": 1.6, "grad_norm": 12.305888175964355, "learning_rate": 4.24074294736215e-06, "loss": 0.2367, "step": 163000 }, { "epoch": 1.6, "grad_norm": 3.385349988937378, "learning_rate": 4.240618824907902e-06, "loss": 0.193, "step": 163025 }, { "epoch": 1.6, "grad_norm": 11.456589698791504, "learning_rate": 4.240494702453653e-06, "loss": 0.2752, "step": 163050 }, { "epoch": 1.6, "grad_norm": 5.408264636993408, "learning_rate": 4.240370579999404e-06, "loss": 0.2054, "step": 163075 }, { "epoch": 1.6, "grad_norm": 14.511981964111328, "learning_rate": 4.240246457545156e-06, "loss": 0.2743, "step": 163100 }, { "epoch": 1.6, "grad_norm": 1.1588666439056396, "learning_rate": 4.2401223350909075e-06, "loss": 0.212, "step": 163125 }, { "epoch": 1.6, "grad_norm": 12.382743835449219, "learning_rate": 4.239998212636659e-06, "loss": 0.2432, "step": 163150 }, { "epoch": 1.6, "grad_norm": 3.7582030296325684, "learning_rate": 4.239874090182411e-06, "loss": 0.2012, "step": 163175 }, { "epoch": 1.6, "grad_norm": 9.414433479309082, "learning_rate": 4.239749967728162e-06, "loss": 0.2316, "step": 163200 }, { "epoch": 1.6, "grad_norm": 5.838921546936035, "learning_rate": 4.239625845273914e-06, "loss": 0.1938, "step": 163225 }, { "epoch": 1.61, "grad_norm": 18.909223556518555, "learning_rate": 4.239501722819665e-06, "loss": 0.2634, "step": 163250 }, { "epoch": 1.61, "grad_norm": 7.226001739501953, "learning_rate": 4.239377600365417e-06, "loss": 0.2523, "step": 163275 }, { "epoch": 1.61, "grad_norm": 12.507454872131348, "learning_rate": 4.2392534779111685e-06, "loss": 0.2877, "step": 163300 }, { "epoch": 1.61, "grad_norm": 5.6204986572265625, "learning_rate": 4.2391293554569206e-06, "loss": 0.2533, "step": 163325 }, { "epoch": 1.61, "grad_norm": 17.56370735168457, "learning_rate": 4.239005233002672e-06, "loss": 0.2342, "step": 163350 }, { "epoch": 1.61, "grad_norm": 1.2700351476669312, "learning_rate": 4.238881110548423e-06, "loss": 0.1829, "step": 163375 }, { "epoch": 1.61, "grad_norm": 8.970782279968262, "learning_rate": 4.238756988094175e-06, "loss": 0.2453, "step": 163400 }, { "epoch": 1.61, "grad_norm": 5.2446818351745605, "learning_rate": 4.238632865639926e-06, "loss": 0.2352, "step": 163425 }, { "epoch": 1.61, "grad_norm": 27.709796905517578, "learning_rate": 4.2385087431856775e-06, "loss": 0.2232, "step": 163450 }, { "epoch": 1.61, "grad_norm": 5.249941825866699, "learning_rate": 4.2383846207314295e-06, "loss": 0.1789, "step": 163475 }, { "epoch": 1.61, "grad_norm": 13.711366653442383, "learning_rate": 4.238260498277181e-06, "loss": 0.2154, "step": 163500 }, { "epoch": 1.61, "grad_norm": 1.1397870779037476, "learning_rate": 4.238136375822932e-06, "loss": 0.2263, "step": 163525 }, { "epoch": 1.61, "grad_norm": 14.93533706665039, "learning_rate": 4.238012253368683e-06, "loss": 0.2375, "step": 163550 }, { "epoch": 1.61, "grad_norm": 6.134949207305908, "learning_rate": 4.237888130914435e-06, "loss": 0.2499, "step": 163575 }, { "epoch": 1.61, "grad_norm": 11.717551231384277, "learning_rate": 4.2377640084601864e-06, "loss": 0.1566, "step": 163600 }, { "epoch": 1.61, "grad_norm": 6.27119255065918, "learning_rate": 4.2376398860059385e-06, "loss": 0.2295, "step": 163625 }, { "epoch": 1.61, "grad_norm": 17.131017684936523, "learning_rate": 4.23751576355169e-06, "loss": 0.2208, "step": 163650 }, { "epoch": 1.61, "grad_norm": 4.6613593101501465, "learning_rate": 4.237391641097442e-06, "loss": 0.2206, "step": 163675 }, { "epoch": 1.61, "grad_norm": 10.724234580993652, "learning_rate": 4.237267518643193e-06, "loss": 0.2495, "step": 163700 }, { "epoch": 1.61, "grad_norm": 4.281327724456787, "learning_rate": 4.237143396188945e-06, "loss": 0.2461, "step": 163725 }, { "epoch": 1.61, "grad_norm": 11.475828170776367, "learning_rate": 4.237019273734696e-06, "loss": 0.232, "step": 163750 }, { "epoch": 1.61, "grad_norm": 2.4698338508605957, "learning_rate": 4.2368951512804474e-06, "loss": 0.2558, "step": 163775 }, { "epoch": 1.61, "grad_norm": 15.75853157043457, "learning_rate": 4.2367710288261995e-06, "loss": 0.2196, "step": 163800 }, { "epoch": 1.61, "grad_norm": 3.041011333465576, "learning_rate": 4.236646906371951e-06, "loss": 0.2057, "step": 163825 }, { "epoch": 1.61, "grad_norm": 20.416522979736328, "learning_rate": 4.236522783917702e-06, "loss": 0.218, "step": 163850 }, { "epoch": 1.61, "grad_norm": 16.626995086669922, "learning_rate": 4.236398661463454e-06, "loss": 0.2346, "step": 163875 }, { "epoch": 1.61, "grad_norm": 19.126367568969727, "learning_rate": 4.236274539009205e-06, "loss": 0.2735, "step": 163900 }, { "epoch": 1.61, "grad_norm": 3.67789626121521, "learning_rate": 4.236150416554956e-06, "loss": 0.2135, "step": 163925 }, { "epoch": 1.61, "grad_norm": 15.09296703338623, "learning_rate": 4.2360262941007085e-06, "loss": 0.2672, "step": 163950 }, { "epoch": 1.61, "grad_norm": 1.7276750802993774, "learning_rate": 4.23590217164646e-06, "loss": 0.217, "step": 163975 }, { "epoch": 1.61, "grad_norm": 17.18401336669922, "learning_rate": 4.235778049192211e-06, "loss": 0.2474, "step": 164000 }, { "epoch": 1.61, "grad_norm": 6.438230991363525, "learning_rate": 4.235653926737963e-06, "loss": 0.2231, "step": 164025 }, { "epoch": 1.61, "grad_norm": 11.596538543701172, "learning_rate": 4.235529804283714e-06, "loss": 0.2884, "step": 164050 }, { "epoch": 1.61, "grad_norm": 7.197636604309082, "learning_rate": 4.235405681829466e-06, "loss": 0.2233, "step": 164075 }, { "epoch": 1.61, "grad_norm": 12.71853256225586, "learning_rate": 4.2352815593752174e-06, "loss": 0.3198, "step": 164100 }, { "epoch": 1.61, "grad_norm": 0.6108932495117188, "learning_rate": 4.2351574369209695e-06, "loss": 0.196, "step": 164125 }, { "epoch": 1.61, "grad_norm": 13.416570663452148, "learning_rate": 4.235033314466721e-06, "loss": 0.2531, "step": 164150 }, { "epoch": 1.61, "grad_norm": 8.490299224853516, "learning_rate": 4.234909192012473e-06, "loss": 0.2338, "step": 164175 }, { "epoch": 1.61, "grad_norm": 6.050469875335693, "learning_rate": 4.234785069558224e-06, "loss": 0.2664, "step": 164200 }, { "epoch": 1.61, "grad_norm": 6.134503364562988, "learning_rate": 4.234660947103975e-06, "loss": 0.2506, "step": 164225 }, { "epoch": 1.61, "grad_norm": 14.636938095092773, "learning_rate": 4.234536824649727e-06, "loss": 0.2855, "step": 164250 }, { "epoch": 1.62, "grad_norm": 7.010653495788574, "learning_rate": 4.2344127021954785e-06, "loss": 0.2238, "step": 164275 }, { "epoch": 1.62, "grad_norm": 9.45267105102539, "learning_rate": 4.23428857974123e-06, "loss": 0.2036, "step": 164300 }, { "epoch": 1.62, "grad_norm": 9.296924591064453, "learning_rate": 4.234164457286982e-06, "loss": 0.1685, "step": 164325 }, { "epoch": 1.62, "grad_norm": 12.739243507385254, "learning_rate": 4.234040334832733e-06, "loss": 0.2119, "step": 164350 }, { "epoch": 1.62, "grad_norm": 4.463563919067383, "learning_rate": 4.233916212378484e-06, "loss": 0.2647, "step": 164375 }, { "epoch": 1.62, "grad_norm": 7.581423282623291, "learning_rate": 4.233792089924235e-06, "loss": 0.2177, "step": 164400 }, { "epoch": 1.62, "grad_norm": 5.8050336837768555, "learning_rate": 4.233672932368157e-06, "loss": 0.2245, "step": 164425 }, { "epoch": 1.62, "grad_norm": 16.456817626953125, "learning_rate": 4.2335488099139084e-06, "loss": 0.2004, "step": 164450 }, { "epoch": 1.62, "grad_norm": 2.326432943344116, "learning_rate": 4.2334246874596605e-06, "loss": 0.2387, "step": 164475 }, { "epoch": 1.62, "grad_norm": 19.925273895263672, "learning_rate": 4.233300565005412e-06, "loss": 0.2609, "step": 164500 }, { "epoch": 1.62, "grad_norm": 3.29634165763855, "learning_rate": 4.233176442551164e-06, "loss": 0.2151, "step": 164525 }, { "epoch": 1.62, "grad_norm": 6.838304042816162, "learning_rate": 4.233052320096915e-06, "loss": 0.2662, "step": 164550 }, { "epoch": 1.62, "grad_norm": 6.743667125701904, "learning_rate": 4.232928197642667e-06, "loss": 0.1868, "step": 164575 }, { "epoch": 1.62, "grad_norm": 15.661770820617676, "learning_rate": 4.232804075188418e-06, "loss": 0.273, "step": 164600 }, { "epoch": 1.62, "grad_norm": 10.88817024230957, "learning_rate": 4.23267995273417e-06, "loss": 0.2481, "step": 164625 }, { "epoch": 1.62, "grad_norm": 14.942523002624512, "learning_rate": 4.2325558302799215e-06, "loss": 0.2245, "step": 164650 }, { "epoch": 1.62, "grad_norm": 2.9382009506225586, "learning_rate": 4.232431707825673e-06, "loss": 0.2184, "step": 164675 }, { "epoch": 1.62, "grad_norm": 25.057493209838867, "learning_rate": 4.232307585371425e-06, "loss": 0.2502, "step": 164700 }, { "epoch": 1.62, "grad_norm": 11.11934757232666, "learning_rate": 4.232183462917176e-06, "loss": 0.2357, "step": 164725 }, { "epoch": 1.62, "grad_norm": 16.77387046813965, "learning_rate": 4.232059340462927e-06, "loss": 0.2461, "step": 164750 }, { "epoch": 1.62, "grad_norm": 9.38636302947998, "learning_rate": 4.231935218008679e-06, "loss": 0.1825, "step": 164775 }, { "epoch": 1.62, "grad_norm": 16.677688598632812, "learning_rate": 4.2318110955544305e-06, "loss": 0.2421, "step": 164800 }, { "epoch": 1.62, "grad_norm": 6.433839321136475, "learning_rate": 4.231686973100182e-06, "loss": 0.2685, "step": 164825 }, { "epoch": 1.62, "grad_norm": 19.21915626525879, "learning_rate": 4.231562850645934e-06, "loss": 0.2638, "step": 164850 }, { "epoch": 1.62, "grad_norm": 8.28194808959961, "learning_rate": 4.231438728191685e-06, "loss": 0.1841, "step": 164875 }, { "epoch": 1.62, "grad_norm": 14.772843360900879, "learning_rate": 4.231314605737436e-06, "loss": 0.2286, "step": 164900 }, { "epoch": 1.62, "grad_norm": 6.0099263191223145, "learning_rate": 4.231190483283188e-06, "loss": 0.2282, "step": 164925 }, { "epoch": 1.62, "grad_norm": 13.073893547058105, "learning_rate": 4.2310663608289394e-06, "loss": 0.1969, "step": 164950 }, { "epoch": 1.62, "grad_norm": 6.418547630310059, "learning_rate": 4.2309422383746915e-06, "loss": 0.2397, "step": 164975 }, { "epoch": 1.62, "grad_norm": 12.730490684509277, "learning_rate": 4.230818115920443e-06, "loss": 0.265, "step": 165000 }, { "epoch": 1.62, "grad_norm": 3.9266281127929688, "learning_rate": 4.230693993466195e-06, "loss": 0.2632, "step": 165025 }, { "epoch": 1.62, "grad_norm": 14.617103576660156, "learning_rate": 4.230569871011946e-06, "loss": 0.2932, "step": 165050 }, { "epoch": 1.62, "grad_norm": 4.099472999572754, "learning_rate": 4.230445748557697e-06, "loss": 0.1918, "step": 165075 }, { "epoch": 1.62, "grad_norm": 17.22178077697754, "learning_rate": 4.230321626103449e-06, "loss": 0.2246, "step": 165100 }, { "epoch": 1.62, "grad_norm": 6.369518280029297, "learning_rate": 4.2301975036492005e-06, "loss": 0.2455, "step": 165125 }, { "epoch": 1.62, "grad_norm": 13.424544334411621, "learning_rate": 4.230073381194952e-06, "loss": 0.2687, "step": 165150 }, { "epoch": 1.62, "grad_norm": 3.9776391983032227, "learning_rate": 4.229949258740704e-06, "loss": 0.1892, "step": 165175 }, { "epoch": 1.62, "grad_norm": 18.312030792236328, "learning_rate": 4.229825136286455e-06, "loss": 0.2613, "step": 165200 }, { "epoch": 1.62, "grad_norm": 9.957953453063965, "learning_rate": 4.229701013832206e-06, "loss": 0.182, "step": 165225 }, { "epoch": 1.62, "grad_norm": 23.4880428314209, "learning_rate": 4.229576891377958e-06, "loss": 0.3015, "step": 165250 }, { "epoch": 1.63, "grad_norm": 7.498068809509277, "learning_rate": 4.2294527689237094e-06, "loss": 0.2478, "step": 165275 }, { "epoch": 1.63, "grad_norm": 15.010202407836914, "learning_rate": 4.229328646469461e-06, "loss": 0.2746, "step": 165300 }, { "epoch": 1.63, "grad_norm": 13.88139820098877, "learning_rate": 4.229204524015213e-06, "loss": 0.2528, "step": 165325 }, { "epoch": 1.63, "grad_norm": 6.529696464538574, "learning_rate": 4.229080401560964e-06, "loss": 0.25, "step": 165350 }, { "epoch": 1.63, "grad_norm": 3.1298537254333496, "learning_rate": 4.228956279106716e-06, "loss": 0.2095, "step": 165375 }, { "epoch": 1.63, "grad_norm": 18.89923858642578, "learning_rate": 4.228832156652467e-06, "loss": 0.2971, "step": 165400 }, { "epoch": 1.63, "grad_norm": 4.476744174957275, "learning_rate": 4.228708034198219e-06, "loss": 0.1643, "step": 165425 }, { "epoch": 1.63, "grad_norm": 13.381348609924316, "learning_rate": 4.2285839117439705e-06, "loss": 0.2224, "step": 165450 }, { "epoch": 1.63, "grad_norm": 5.094894886016846, "learning_rate": 4.2284597892897225e-06, "loss": 0.231, "step": 165475 }, { "epoch": 1.63, "grad_norm": 16.49966049194336, "learning_rate": 4.228335666835474e-06, "loss": 0.2692, "step": 165500 }, { "epoch": 1.63, "grad_norm": 5.439321041107178, "learning_rate": 4.228211544381225e-06, "loss": 0.2283, "step": 165525 }, { "epoch": 1.63, "grad_norm": 10.5802001953125, "learning_rate": 4.228087421926977e-06, "loss": 0.2521, "step": 165550 }, { "epoch": 1.63, "grad_norm": 8.215270042419434, "learning_rate": 4.227963299472728e-06, "loss": 0.3071, "step": 165575 }, { "epoch": 1.63, "grad_norm": 11.825129508972168, "learning_rate": 4.227839177018479e-06, "loss": 0.2212, "step": 165600 }, { "epoch": 1.63, "grad_norm": 2.133521795272827, "learning_rate": 4.2277150545642315e-06, "loss": 0.22, "step": 165625 }, { "epoch": 1.63, "grad_norm": 7.251281261444092, "learning_rate": 4.227590932109983e-06, "loss": 0.2485, "step": 165650 }, { "epoch": 1.63, "grad_norm": 2.956615686416626, "learning_rate": 4.227466809655734e-06, "loss": 0.2267, "step": 165675 }, { "epoch": 1.63, "grad_norm": 10.654228210449219, "learning_rate": 4.227342687201486e-06, "loss": 0.2439, "step": 165700 }, { "epoch": 1.63, "grad_norm": 5.5565009117126465, "learning_rate": 4.227218564747237e-06, "loss": 0.2228, "step": 165725 }, { "epoch": 1.63, "grad_norm": 26.506404876708984, "learning_rate": 4.227094442292989e-06, "loss": 0.2627, "step": 165750 }, { "epoch": 1.63, "grad_norm": 3.2353124618530273, "learning_rate": 4.2269703198387404e-06, "loss": 0.2403, "step": 165775 }, { "epoch": 1.63, "grad_norm": 5.361827373504639, "learning_rate": 4.2268461973844925e-06, "loss": 0.2705, "step": 165800 }, { "epoch": 1.63, "grad_norm": 0.5503620505332947, "learning_rate": 4.226722074930244e-06, "loss": 0.2636, "step": 165825 }, { "epoch": 1.63, "grad_norm": 12.779699325561523, "learning_rate": 4.226597952475995e-06, "loss": 0.2105, "step": 165850 }, { "epoch": 1.63, "grad_norm": 7.134973526000977, "learning_rate": 4.226473830021747e-06, "loss": 0.2913, "step": 165875 }, { "epoch": 1.63, "grad_norm": 13.519185066223145, "learning_rate": 4.226349707567498e-06, "loss": 0.2245, "step": 165900 }, { "epoch": 1.63, "grad_norm": 7.483510971069336, "learning_rate": 4.226225585113249e-06, "loss": 0.2301, "step": 165925 }, { "epoch": 1.63, "grad_norm": 13.836080551147461, "learning_rate": 4.2261014626590015e-06, "loss": 0.2307, "step": 165950 }, { "epoch": 1.63, "grad_norm": 4.511142253875732, "learning_rate": 4.225977340204753e-06, "loss": 0.2132, "step": 165975 }, { "epoch": 1.63, "grad_norm": 15.232223510742188, "learning_rate": 4.225853217750504e-06, "loss": 0.264, "step": 166000 }, { "epoch": 1.63, "grad_norm": 6.563066005706787, "learning_rate": 4.225729095296256e-06, "loss": 0.1849, "step": 166025 }, { "epoch": 1.63, "grad_norm": 12.521296501159668, "learning_rate": 4.225604972842007e-06, "loss": 0.2362, "step": 166050 }, { "epoch": 1.63, "grad_norm": 5.965446472167969, "learning_rate": 4.225480850387758e-06, "loss": 0.2179, "step": 166075 }, { "epoch": 1.63, "grad_norm": 11.04898452758789, "learning_rate": 4.22535672793351e-06, "loss": 0.2509, "step": 166100 }, { "epoch": 1.63, "grad_norm": 7.695297718048096, "learning_rate": 4.225232605479262e-06, "loss": 0.1882, "step": 166125 }, { "epoch": 1.63, "grad_norm": 12.964799880981445, "learning_rate": 4.225108483025014e-06, "loss": 0.2902, "step": 166150 }, { "epoch": 1.63, "grad_norm": 8.890844345092773, "learning_rate": 4.224984360570765e-06, "loss": 0.2227, "step": 166175 }, { "epoch": 1.63, "grad_norm": 9.638410568237305, "learning_rate": 4.224860238116517e-06, "loss": 0.2724, "step": 166200 }, { "epoch": 1.63, "grad_norm": 2.8186585903167725, "learning_rate": 4.224736115662268e-06, "loss": 0.2097, "step": 166225 }, { "epoch": 1.63, "grad_norm": 12.912593841552734, "learning_rate": 4.22461199320802e-06, "loss": 0.2084, "step": 166250 }, { "epoch": 1.63, "grad_norm": 6.221169948577881, "learning_rate": 4.2244878707537714e-06, "loss": 0.2296, "step": 166275 }, { "epoch": 1.64, "grad_norm": 10.941749572753906, "learning_rate": 4.224363748299523e-06, "loss": 0.1901, "step": 166300 }, { "epoch": 1.64, "grad_norm": 5.646084785461426, "learning_rate": 4.224239625845275e-06, "loss": 0.2433, "step": 166325 }, { "epoch": 1.64, "grad_norm": 12.657586097717285, "learning_rate": 4.224115503391026e-06, "loss": 0.2305, "step": 166350 }, { "epoch": 1.64, "grad_norm": 6.732978343963623, "learning_rate": 4.223991380936777e-06, "loss": 0.2908, "step": 166375 }, { "epoch": 1.64, "grad_norm": 18.045076370239258, "learning_rate": 4.223867258482529e-06, "loss": 0.2397, "step": 166400 }, { "epoch": 1.64, "grad_norm": 7.534001350402832, "learning_rate": 4.22374313602828e-06, "loss": 0.2587, "step": 166425 }, { "epoch": 1.64, "grad_norm": 14.912240028381348, "learning_rate": 4.223619013574032e-06, "loss": 0.2346, "step": 166450 }, { "epoch": 1.64, "grad_norm": 5.224283695220947, "learning_rate": 4.223494891119784e-06, "loss": 0.2072, "step": 166475 }, { "epoch": 1.64, "grad_norm": 11.727232933044434, "learning_rate": 4.223370768665535e-06, "loss": 0.2521, "step": 166500 }, { "epoch": 1.64, "grad_norm": 5.082819938659668, "learning_rate": 4.223246646211286e-06, "loss": 0.2144, "step": 166525 }, { "epoch": 1.64, "grad_norm": 14.702232360839844, "learning_rate": 4.223122523757038e-06, "loss": 0.2546, "step": 166550 }, { "epoch": 1.64, "grad_norm": 9.198904037475586, "learning_rate": 4.222998401302789e-06, "loss": 0.1937, "step": 166575 }, { "epoch": 1.64, "grad_norm": 17.194339752197266, "learning_rate": 4.2228742788485414e-06, "loss": 0.2398, "step": 166600 }, { "epoch": 1.64, "grad_norm": 5.6770710945129395, "learning_rate": 4.222750156394293e-06, "loss": 0.2714, "step": 166625 }, { "epoch": 1.64, "grad_norm": 14.423145294189453, "learning_rate": 4.222626033940045e-06, "loss": 0.2028, "step": 166650 }, { "epoch": 1.64, "grad_norm": 7.1543660163879395, "learning_rate": 4.222501911485796e-06, "loss": 0.2331, "step": 166675 }, { "epoch": 1.64, "grad_norm": 8.9918794631958, "learning_rate": 4.222377789031547e-06, "loss": 0.1796, "step": 166700 }, { "epoch": 1.64, "grad_norm": 5.689121246337891, "learning_rate": 4.222253666577299e-06, "loss": 0.2268, "step": 166725 }, { "epoch": 1.64, "grad_norm": 14.790647506713867, "learning_rate": 4.22212954412305e-06, "loss": 0.2802, "step": 166750 }, { "epoch": 1.64, "grad_norm": 6.890866756439209, "learning_rate": 4.222010386566972e-06, "loss": 0.2322, "step": 166775 }, { "epoch": 1.64, "grad_norm": 17.169597625732422, "learning_rate": 4.2218862641127235e-06, "loss": 0.2241, "step": 166800 }, { "epoch": 1.64, "grad_norm": 2.052461862564087, "learning_rate": 4.221762141658475e-06, "loss": 0.2085, "step": 166825 }, { "epoch": 1.64, "grad_norm": 11.429826736450195, "learning_rate": 4.221638019204227e-06, "loss": 0.2709, "step": 166850 }, { "epoch": 1.64, "grad_norm": 7.791018486022949, "learning_rate": 4.221513896749978e-06, "loss": 0.2086, "step": 166875 }, { "epoch": 1.64, "grad_norm": 9.3521146774292, "learning_rate": 4.221389774295729e-06, "loss": 0.3024, "step": 166900 }, { "epoch": 1.64, "grad_norm": 9.877121925354004, "learning_rate": 4.221265651841481e-06, "loss": 0.2968, "step": 166925 }, { "epoch": 1.64, "grad_norm": 14.985045433044434, "learning_rate": 4.2211415293872324e-06, "loss": 0.2585, "step": 166950 }, { "epoch": 1.64, "grad_norm": 4.571515083312988, "learning_rate": 4.221017406932984e-06, "loss": 0.2313, "step": 166975 }, { "epoch": 1.64, "grad_norm": 12.393004417419434, "learning_rate": 4.220893284478736e-06, "loss": 0.2631, "step": 167000 }, { "epoch": 1.64, "grad_norm": 5.189137935638428, "learning_rate": 4.220769162024487e-06, "loss": 0.1784, "step": 167025 }, { "epoch": 1.64, "grad_norm": 15.888693809509277, "learning_rate": 4.220645039570239e-06, "loss": 0.2328, "step": 167050 }, { "epoch": 1.64, "grad_norm": 8.96446704864502, "learning_rate": 4.22052091711599e-06, "loss": 0.2582, "step": 167075 }, { "epoch": 1.64, "grad_norm": 14.397246360778809, "learning_rate": 4.220396794661742e-06, "loss": 0.1989, "step": 167100 }, { "epoch": 1.64, "grad_norm": 8.1630277633667, "learning_rate": 4.2202726722074935e-06, "loss": 0.2384, "step": 167125 }, { "epoch": 1.64, "grad_norm": 22.466815948486328, "learning_rate": 4.2201485497532455e-06, "loss": 0.2741, "step": 167150 }, { "epoch": 1.64, "grad_norm": 5.537144184112549, "learning_rate": 4.220024427298997e-06, "loss": 0.2172, "step": 167175 }, { "epoch": 1.64, "grad_norm": 13.55957317352295, "learning_rate": 4.219900304844748e-06, "loss": 0.2264, "step": 167200 }, { "epoch": 1.64, "grad_norm": 5.5529022216796875, "learning_rate": 4.219776182390499e-06, "loss": 0.2538, "step": 167225 }, { "epoch": 1.64, "grad_norm": 11.881712913513184, "learning_rate": 4.219652059936251e-06, "loss": 0.2257, "step": 167250 }, { "epoch": 1.64, "grad_norm": 4.554155349731445, "learning_rate": 4.2195279374820024e-06, "loss": 0.207, "step": 167275 }, { "epoch": 1.64, "grad_norm": 21.437273025512695, "learning_rate": 4.219403815027754e-06, "loss": 0.2672, "step": 167300 }, { "epoch": 1.65, "grad_norm": 4.358432769775391, "learning_rate": 4.219279692573506e-06, "loss": 0.2036, "step": 167325 }, { "epoch": 1.65, "grad_norm": 12.322132110595703, "learning_rate": 4.219155570119257e-06, "loss": 0.2385, "step": 167350 }, { "epoch": 1.65, "grad_norm": 5.8197922706604, "learning_rate": 4.219031447665008e-06, "loss": 0.1826, "step": 167375 }, { "epoch": 1.65, "grad_norm": 7.7237701416015625, "learning_rate": 4.21890732521076e-06, "loss": 0.2376, "step": 167400 }, { "epoch": 1.65, "grad_norm": 7.01348876953125, "learning_rate": 4.218783202756511e-06, "loss": 0.2167, "step": 167425 }, { "epoch": 1.65, "grad_norm": 13.579211235046387, "learning_rate": 4.2186590803022634e-06, "loss": 0.2116, "step": 167450 }, { "epoch": 1.65, "grad_norm": 2.5765020847320557, "learning_rate": 4.218534957848015e-06, "loss": 0.2362, "step": 167475 }, { "epoch": 1.65, "grad_norm": 11.657780647277832, "learning_rate": 4.218410835393767e-06, "loss": 0.2822, "step": 167500 }, { "epoch": 1.65, "grad_norm": 6.384032726287842, "learning_rate": 4.218286712939518e-06, "loss": 0.2398, "step": 167525 }, { "epoch": 1.65, "grad_norm": 10.69651985168457, "learning_rate": 4.21816259048527e-06, "loss": 0.2603, "step": 167550 }, { "epoch": 1.65, "grad_norm": 9.745361328125, "learning_rate": 4.218038468031021e-06, "loss": 0.2611, "step": 167575 }, { "epoch": 1.65, "grad_norm": 15.314269065856934, "learning_rate": 4.217914345576772e-06, "loss": 0.2328, "step": 167600 }, { "epoch": 1.65, "grad_norm": 6.866494655609131, "learning_rate": 4.2177902231225245e-06, "loss": 0.2095, "step": 167625 }, { "epoch": 1.65, "grad_norm": 16.419240951538086, "learning_rate": 4.217666100668276e-06, "loss": 0.2508, "step": 167650 }, { "epoch": 1.65, "grad_norm": 8.515331268310547, "learning_rate": 4.217541978214027e-06, "loss": 0.2222, "step": 167675 }, { "epoch": 1.65, "grad_norm": 12.168743133544922, "learning_rate": 4.217417855759779e-06, "loss": 0.2255, "step": 167700 }, { "epoch": 1.65, "grad_norm": 8.715867042541504, "learning_rate": 4.21729373330553e-06, "loss": 0.2419, "step": 167725 }, { "epoch": 1.65, "grad_norm": 9.569438934326172, "learning_rate": 4.217169610851281e-06, "loss": 0.2437, "step": 167750 }, { "epoch": 1.65, "grad_norm": 7.260627269744873, "learning_rate": 4.2170454883970334e-06, "loss": 0.2033, "step": 167775 }, { "epoch": 1.65, "grad_norm": 14.850672721862793, "learning_rate": 4.216921365942785e-06, "loss": 0.2155, "step": 167800 }, { "epoch": 1.65, "grad_norm": 8.60444164276123, "learning_rate": 4.216797243488536e-06, "loss": 0.1904, "step": 167825 }, { "epoch": 1.65, "grad_norm": 12.887069702148438, "learning_rate": 4.216673121034288e-06, "loss": 0.2489, "step": 167850 }, { "epoch": 1.65, "grad_norm": 4.811670780181885, "learning_rate": 4.216548998580039e-06, "loss": 0.2135, "step": 167875 }, { "epoch": 1.65, "grad_norm": 7.818385124206543, "learning_rate": 4.216424876125791e-06, "loss": 0.2679, "step": 167900 }, { "epoch": 1.65, "grad_norm": 5.352477550506592, "learning_rate": 4.216300753671542e-06, "loss": 0.2703, "step": 167925 }, { "epoch": 1.65, "grad_norm": 12.158828735351562, "learning_rate": 4.2161766312172945e-06, "loss": 0.2496, "step": 167950 }, { "epoch": 1.65, "grad_norm": 3.789883613586426, "learning_rate": 4.216052508763046e-06, "loss": 0.2394, "step": 167975 }, { "epoch": 1.65, "grad_norm": 12.442788124084473, "learning_rate": 4.215928386308798e-06, "loss": 0.2554, "step": 168000 }, { "epoch": 1.65, "grad_norm": 16.037670135498047, "learning_rate": 4.215804263854549e-06, "loss": 0.2441, "step": 168025 }, { "epoch": 1.65, "grad_norm": 17.04343605041504, "learning_rate": 4.2156801414003e-06, "loss": 0.2697, "step": 168050 }, { "epoch": 1.65, "grad_norm": 0.819153904914856, "learning_rate": 4.215556018946051e-06, "loss": 0.2037, "step": 168075 }, { "epoch": 1.65, "grad_norm": 15.054266929626465, "learning_rate": 4.215431896491803e-06, "loss": 0.2588, "step": 168100 }, { "epoch": 1.65, "grad_norm": 5.4650397300720215, "learning_rate": 4.215307774037555e-06, "loss": 0.2539, "step": 168125 }, { "epoch": 1.65, "grad_norm": 8.548545837402344, "learning_rate": 4.215183651583306e-06, "loss": 0.2317, "step": 168150 }, { "epoch": 1.65, "grad_norm": 7.874210357666016, "learning_rate": 4.215059529129058e-06, "loss": 0.2223, "step": 168175 }, { "epoch": 1.65, "grad_norm": 18.82647132873535, "learning_rate": 4.214935406674809e-06, "loss": 0.2492, "step": 168200 }, { "epoch": 1.65, "grad_norm": 6.511189937591553, "learning_rate": 4.21481128422056e-06, "loss": 0.2148, "step": 168225 }, { "epoch": 1.65, "grad_norm": 18.164154052734375, "learning_rate": 4.214687161766312e-06, "loss": 0.2453, "step": 168250 }, { "epoch": 1.65, "grad_norm": 0.5186573266983032, "learning_rate": 4.214563039312064e-06, "loss": 0.2038, "step": 168275 }, { "epoch": 1.65, "grad_norm": 13.573089599609375, "learning_rate": 4.214438916857816e-06, "loss": 0.248, "step": 168300 }, { "epoch": 1.65, "grad_norm": 5.609113693237305, "learning_rate": 4.214314794403567e-06, "loss": 0.2349, "step": 168325 }, { "epoch": 1.66, "grad_norm": 10.476949691772461, "learning_rate": 4.214190671949319e-06, "loss": 0.2205, "step": 168350 }, { "epoch": 1.66, "grad_norm": 2.8072872161865234, "learning_rate": 4.21406654949507e-06, "loss": 0.2328, "step": 168375 }, { "epoch": 1.66, "grad_norm": 15.808810234069824, "learning_rate": 4.213942427040822e-06, "loss": 0.2104, "step": 168400 }, { "epoch": 1.66, "grad_norm": 5.537600040435791, "learning_rate": 4.213818304586573e-06, "loss": 0.2209, "step": 168425 }, { "epoch": 1.66, "grad_norm": 13.558075904846191, "learning_rate": 4.213694182132325e-06, "loss": 0.2294, "step": 168450 }, { "epoch": 1.66, "grad_norm": 5.432122230529785, "learning_rate": 4.213570059678077e-06, "loss": 0.2071, "step": 168475 }, { "epoch": 1.66, "grad_norm": 12.202664375305176, "learning_rate": 4.213445937223828e-06, "loss": 0.2441, "step": 168500 }, { "epoch": 1.66, "grad_norm": 8.530892372131348, "learning_rate": 4.213321814769579e-06, "loss": 0.2015, "step": 168525 }, { "epoch": 1.66, "grad_norm": 18.094987869262695, "learning_rate": 4.213197692315331e-06, "loss": 0.231, "step": 168550 }, { "epoch": 1.66, "grad_norm": 4.457791805267334, "learning_rate": 4.213073569861082e-06, "loss": 0.2385, "step": 168575 }, { "epoch": 1.66, "grad_norm": 18.138452529907227, "learning_rate": 4.2129494474068336e-06, "loss": 0.2144, "step": 168600 }, { "epoch": 1.66, "grad_norm": 5.088150501251221, "learning_rate": 4.212825324952586e-06, "loss": 0.2272, "step": 168625 }, { "epoch": 1.66, "grad_norm": 15.082597732543945, "learning_rate": 4.212701202498337e-06, "loss": 0.2055, "step": 168650 }, { "epoch": 1.66, "grad_norm": 7.910244464874268, "learning_rate": 4.212577080044089e-06, "loss": 0.2076, "step": 168675 }, { "epoch": 1.66, "grad_norm": 13.858024597167969, "learning_rate": 4.21245295758984e-06, "loss": 0.261, "step": 168700 }, { "epoch": 1.66, "grad_norm": 6.163949012756348, "learning_rate": 4.212328835135592e-06, "loss": 0.2335, "step": 168725 }, { "epoch": 1.66, "grad_norm": 12.006817817687988, "learning_rate": 4.212204712681343e-06, "loss": 0.2506, "step": 168750 }, { "epoch": 1.66, "grad_norm": 6.063399791717529, "learning_rate": 4.2120805902270954e-06, "loss": 0.2365, "step": 168775 }, { "epoch": 1.66, "grad_norm": 11.782102584838867, "learning_rate": 4.211956467772847e-06, "loss": 0.179, "step": 168800 }, { "epoch": 1.66, "grad_norm": 7.898497104644775, "learning_rate": 4.211832345318598e-06, "loss": 0.2338, "step": 168825 }, { "epoch": 1.66, "grad_norm": 16.917068481445312, "learning_rate": 4.21170822286435e-06, "loss": 0.2276, "step": 168850 }, { "epoch": 1.66, "grad_norm": 4.745664596557617, "learning_rate": 4.211584100410101e-06, "loss": 0.2269, "step": 168875 }, { "epoch": 1.66, "grad_norm": 12.748806953430176, "learning_rate": 4.211459977955852e-06, "loss": 0.2462, "step": 168900 }, { "epoch": 1.66, "grad_norm": 11.419897079467773, "learning_rate": 4.211340820399774e-06, "loss": 0.235, "step": 168925 }, { "epoch": 1.66, "grad_norm": 13.954353332519531, "learning_rate": 4.2112166979455254e-06, "loss": 0.2748, "step": 168950 }, { "epoch": 1.66, "grad_norm": 5.986327171325684, "learning_rate": 4.211092575491277e-06, "loss": 0.2469, "step": 168975 }, { "epoch": 1.66, "grad_norm": 18.211191177368164, "learning_rate": 4.210968453037029e-06, "loss": 0.2375, "step": 169000 }, { "epoch": 1.66, "grad_norm": 6.629717826843262, "learning_rate": 4.21084433058278e-06, "loss": 0.2073, "step": 169025 }, { "epoch": 1.66, "grad_norm": 17.98729133605957, "learning_rate": 4.210720208128531e-06, "loss": 0.2412, "step": 169050 }, { "epoch": 1.66, "grad_norm": 4.087923049926758, "learning_rate": 4.210596085674283e-06, "loss": 0.228, "step": 169075 }, { "epoch": 1.66, "grad_norm": 15.465596199035645, "learning_rate": 4.210471963220034e-06, "loss": 0.195, "step": 169100 }, { "epoch": 1.66, "grad_norm": 8.544548988342285, "learning_rate": 4.2103478407657865e-06, "loss": 0.17, "step": 169125 }, { "epoch": 1.66, "grad_norm": 12.758626937866211, "learning_rate": 4.210223718311538e-06, "loss": 0.266, "step": 169150 }, { "epoch": 1.66, "grad_norm": 3.207052707672119, "learning_rate": 4.21009959585729e-06, "loss": 0.2447, "step": 169175 }, { "epoch": 1.66, "grad_norm": 10.971911430358887, "learning_rate": 4.209975473403041e-06, "loss": 0.2328, "step": 169200 }, { "epoch": 1.66, "grad_norm": 3.6919758319854736, "learning_rate": 4.209851350948793e-06, "loss": 0.2179, "step": 169225 }, { "epoch": 1.66, "grad_norm": 18.6595401763916, "learning_rate": 4.209727228494544e-06, "loss": 0.2266, "step": 169250 }, { "epoch": 1.66, "grad_norm": 3.16879940032959, "learning_rate": 4.209603106040295e-06, "loss": 0.2175, "step": 169275 }, { "epoch": 1.66, "grad_norm": 15.972143173217773, "learning_rate": 4.2094789835860475e-06, "loss": 0.241, "step": 169300 }, { "epoch": 1.66, "grad_norm": 4.28960657119751, "learning_rate": 4.209354861131799e-06, "loss": 0.2978, "step": 169325 }, { "epoch": 1.67, "grad_norm": 18.11660385131836, "learning_rate": 4.20923073867755e-06, "loss": 0.222, "step": 169350 }, { "epoch": 1.67, "grad_norm": 5.721127986907959, "learning_rate": 4.209106616223302e-06, "loss": 0.1894, "step": 169375 }, { "epoch": 1.67, "grad_norm": 21.84055519104004, "learning_rate": 4.208982493769053e-06, "loss": 0.2765, "step": 169400 }, { "epoch": 1.67, "grad_norm": 6.391488552093506, "learning_rate": 4.208858371314804e-06, "loss": 0.2479, "step": 169425 }, { "epoch": 1.67, "grad_norm": 8.111001968383789, "learning_rate": 4.2087342488605564e-06, "loss": 0.229, "step": 169450 }, { "epoch": 1.67, "grad_norm": 3.665240526199341, "learning_rate": 4.208610126406308e-06, "loss": 0.2423, "step": 169475 }, { "epoch": 1.67, "grad_norm": 21.19858169555664, "learning_rate": 4.208486003952059e-06, "loss": 0.2367, "step": 169500 }, { "epoch": 1.67, "grad_norm": 4.550782203674316, "learning_rate": 4.208361881497811e-06, "loss": 0.2411, "step": 169525 }, { "epoch": 1.67, "grad_norm": 14.094198226928711, "learning_rate": 4.208237759043562e-06, "loss": 0.2378, "step": 169550 }, { "epoch": 1.67, "grad_norm": 5.574525356292725, "learning_rate": 4.208113636589314e-06, "loss": 0.2576, "step": 169575 }, { "epoch": 1.67, "grad_norm": 18.072772979736328, "learning_rate": 4.207989514135065e-06, "loss": 0.1944, "step": 169600 }, { "epoch": 1.67, "grad_norm": 0.08442968130111694, "learning_rate": 4.2078653916808175e-06, "loss": 0.1946, "step": 169625 }, { "epoch": 1.67, "grad_norm": 14.386847496032715, "learning_rate": 4.207741269226569e-06, "loss": 0.2004, "step": 169650 }, { "epoch": 1.67, "grad_norm": 2.8112452030181885, "learning_rate": 4.20761714677232e-06, "loss": 0.204, "step": 169675 }, { "epoch": 1.67, "grad_norm": 11.137802124023438, "learning_rate": 4.207493024318072e-06, "loss": 0.2484, "step": 169700 }, { "epoch": 1.67, "grad_norm": 1.6964786052703857, "learning_rate": 4.207368901863823e-06, "loss": 0.2479, "step": 169725 }, { "epoch": 1.67, "grad_norm": 14.911396026611328, "learning_rate": 4.207244779409574e-06, "loss": 0.2087, "step": 169750 }, { "epoch": 1.67, "grad_norm": 10.53459644317627, "learning_rate": 4.207120656955326e-06, "loss": 0.2546, "step": 169775 }, { "epoch": 1.67, "grad_norm": 14.94627857208252, "learning_rate": 4.206996534501078e-06, "loss": 0.2858, "step": 169800 }, { "epoch": 1.67, "grad_norm": 2.743818759918213, "learning_rate": 4.206872412046829e-06, "loss": 0.2264, "step": 169825 }, { "epoch": 1.67, "grad_norm": 17.448152542114258, "learning_rate": 4.206748289592581e-06, "loss": 0.2342, "step": 169850 }, { "epoch": 1.67, "grad_norm": 4.164576053619385, "learning_rate": 4.206624167138332e-06, "loss": 0.2086, "step": 169875 }, { "epoch": 1.67, "grad_norm": 15.853090286254883, "learning_rate": 4.206500044684083e-06, "loss": 0.221, "step": 169900 }, { "epoch": 1.67, "grad_norm": 4.66028356552124, "learning_rate": 4.206375922229835e-06, "loss": 0.2219, "step": 169925 }, { "epoch": 1.67, "grad_norm": 20.474769592285156, "learning_rate": 4.206251799775587e-06, "loss": 0.2196, "step": 169950 }, { "epoch": 1.67, "grad_norm": 7.010471343994141, "learning_rate": 4.206127677321339e-06, "loss": 0.1967, "step": 169975 }, { "epoch": 1.67, "grad_norm": 18.59964370727539, "learning_rate": 4.20600355486709e-06, "loss": 0.2787, "step": 170000 }, { "epoch": 1.67, "grad_norm": 6.684977054595947, "learning_rate": 4.205879432412842e-06, "loss": 0.24, "step": 170025 }, { "epoch": 1.67, "grad_norm": 15.295269966125488, "learning_rate": 4.205755309958593e-06, "loss": 0.2527, "step": 170050 }, { "epoch": 1.67, "grad_norm": 1.8977223634719849, "learning_rate": 4.205631187504345e-06, "loss": 0.2272, "step": 170075 }, { "epoch": 1.67, "grad_norm": 10.440857887268066, "learning_rate": 4.205507065050096e-06, "loss": 0.2158, "step": 170100 }, { "epoch": 1.67, "grad_norm": 4.36046838760376, "learning_rate": 4.205382942595848e-06, "loss": 0.2165, "step": 170125 }, { "epoch": 1.67, "grad_norm": 12.596887588500977, "learning_rate": 4.2052588201416e-06, "loss": 0.2936, "step": 170150 }, { "epoch": 1.67, "grad_norm": 6.945038795471191, "learning_rate": 4.205134697687351e-06, "loss": 0.2138, "step": 170175 }, { "epoch": 1.67, "grad_norm": 21.258962631225586, "learning_rate": 4.205010575233102e-06, "loss": 0.2566, "step": 170200 }, { "epoch": 1.67, "grad_norm": 0.6875801682472229, "learning_rate": 4.204886452778854e-06, "loss": 0.2496, "step": 170225 }, { "epoch": 1.67, "grad_norm": 17.96680450439453, "learning_rate": 4.204762330324605e-06, "loss": 0.216, "step": 170250 }, { "epoch": 1.67, "grad_norm": 1.5137524604797363, "learning_rate": 4.204638207870357e-06, "loss": 0.2665, "step": 170275 }, { "epoch": 1.67, "grad_norm": 16.234024047851562, "learning_rate": 4.204514085416109e-06, "loss": 0.2863, "step": 170300 }, { "epoch": 1.67, "grad_norm": 3.6437277793884277, "learning_rate": 4.20438996296186e-06, "loss": 0.25, "step": 170325 }, { "epoch": 1.67, "grad_norm": 11.948816299438477, "learning_rate": 4.204265840507611e-06, "loss": 0.1903, "step": 170350 }, { "epoch": 1.68, "grad_norm": 5.53333854675293, "learning_rate": 4.204141718053363e-06, "loss": 0.21, "step": 170375 }, { "epoch": 1.68, "grad_norm": 15.800352096557617, "learning_rate": 4.204017595599114e-06, "loss": 0.2888, "step": 170400 }, { "epoch": 1.68, "grad_norm": 4.648287773132324, "learning_rate": 4.203893473144866e-06, "loss": 0.2669, "step": 170425 }, { "epoch": 1.68, "grad_norm": 8.521036148071289, "learning_rate": 4.203769350690618e-06, "loss": 0.2459, "step": 170450 }, { "epoch": 1.68, "grad_norm": 2.6862592697143555, "learning_rate": 4.20364522823637e-06, "loss": 0.1831, "step": 170475 }, { "epoch": 1.68, "grad_norm": 7.7780585289001465, "learning_rate": 4.203521105782121e-06, "loss": 0.2716, "step": 170500 }, { "epoch": 1.68, "grad_norm": 33.65388488769531, "learning_rate": 4.203396983327872e-06, "loss": 0.1968, "step": 170525 }, { "epoch": 1.68, "grad_norm": 12.200464248657227, "learning_rate": 4.203272860873624e-06, "loss": 0.2458, "step": 170550 }, { "epoch": 1.68, "grad_norm": 4.055016040802002, "learning_rate": 4.203148738419375e-06, "loss": 0.2505, "step": 170575 }, { "epoch": 1.68, "grad_norm": 14.129193305969238, "learning_rate": 4.2030246159651266e-06, "loss": 0.2375, "step": 170600 }, { "epoch": 1.68, "grad_norm": 5.030505180358887, "learning_rate": 4.202900493510879e-06, "loss": 0.2609, "step": 170625 }, { "epoch": 1.68, "grad_norm": 17.530105590820312, "learning_rate": 4.20277637105663e-06, "loss": 0.2162, "step": 170650 }, { "epoch": 1.68, "grad_norm": 1.730349063873291, "learning_rate": 4.202652248602381e-06, "loss": 0.2018, "step": 170675 }, { "epoch": 1.68, "grad_norm": 13.092292785644531, "learning_rate": 4.202528126148133e-06, "loss": 0.1907, "step": 170700 }, { "epoch": 1.68, "grad_norm": 6.972848892211914, "learning_rate": 4.202404003693884e-06, "loss": 0.1812, "step": 170725 }, { "epoch": 1.68, "grad_norm": 14.665081024169922, "learning_rate": 4.2022798812396355e-06, "loss": 0.253, "step": 170750 }, { "epoch": 1.68, "grad_norm": 6.108858108520508, "learning_rate": 4.202155758785388e-06, "loss": 0.2608, "step": 170775 }, { "epoch": 1.68, "grad_norm": 13.796566009521484, "learning_rate": 4.202031636331139e-06, "loss": 0.2484, "step": 170800 }, { "epoch": 1.68, "grad_norm": 9.04323673248291, "learning_rate": 4.201907513876891e-06, "loss": 0.2244, "step": 170825 }, { "epoch": 1.68, "grad_norm": 18.110074996948242, "learning_rate": 4.201783391422642e-06, "loss": 0.2608, "step": 170850 }, { "epoch": 1.68, "grad_norm": 9.36681079864502, "learning_rate": 4.201659268968394e-06, "loss": 0.2484, "step": 170875 }, { "epoch": 1.68, "grad_norm": 11.497395515441895, "learning_rate": 4.201535146514145e-06, "loss": 0.2525, "step": 170900 }, { "epoch": 1.68, "grad_norm": 3.4643521308898926, "learning_rate": 4.201411024059897e-06, "loss": 0.1951, "step": 170925 }, { "epoch": 1.68, "grad_norm": 14.713373184204102, "learning_rate": 4.201286901605649e-06, "loss": 0.2202, "step": 170950 }, { "epoch": 1.68, "grad_norm": 4.231216907501221, "learning_rate": 4.2011627791514e-06, "loss": 0.173, "step": 170975 }, { "epoch": 1.68, "grad_norm": 15.958690643310547, "learning_rate": 4.201038656697152e-06, "loss": 0.2299, "step": 171000 }, { "epoch": 1.68, "grad_norm": 3.236788034439087, "learning_rate": 4.200914534242903e-06, "loss": 0.2078, "step": 171025 }, { "epoch": 1.68, "grad_norm": 15.232629776000977, "learning_rate": 4.200790411788654e-06, "loss": 0.2775, "step": 171050 }, { "epoch": 1.68, "grad_norm": 2.6674978733062744, "learning_rate": 4.200666289334406e-06, "loss": 0.2234, "step": 171075 }, { "epoch": 1.68, "grad_norm": 15.384839057922363, "learning_rate": 4.2005421668801576e-06, "loss": 0.2142, "step": 171100 }, { "epoch": 1.68, "grad_norm": 6.151510238647461, "learning_rate": 4.200423009324079e-06, "loss": 0.2147, "step": 171125 }, { "epoch": 1.68, "grad_norm": 24.638744354248047, "learning_rate": 4.200298886869831e-06, "loss": 0.2286, "step": 171150 }, { "epoch": 1.68, "grad_norm": 7.44077730178833, "learning_rate": 4.200174764415582e-06, "loss": 0.2276, "step": 171175 }, { "epoch": 1.68, "grad_norm": 17.964805603027344, "learning_rate": 4.200050641961333e-06, "loss": 0.2776, "step": 171200 }, { "epoch": 1.68, "grad_norm": 9.195226669311523, "learning_rate": 4.199926519507085e-06, "loss": 0.2531, "step": 171225 }, { "epoch": 1.68, "grad_norm": 21.161685943603516, "learning_rate": 4.199802397052836e-06, "loss": 0.2346, "step": 171250 }, { "epoch": 1.68, "grad_norm": 10.80451774597168, "learning_rate": 4.199678274598588e-06, "loss": 0.2105, "step": 171275 }, { "epoch": 1.68, "grad_norm": 12.552309036254883, "learning_rate": 4.19955415214434e-06, "loss": 0.2389, "step": 171300 }, { "epoch": 1.68, "grad_norm": 3.7033276557922363, "learning_rate": 4.199430029690092e-06, "loss": 0.2, "step": 171325 }, { "epoch": 1.68, "grad_norm": 16.77930450439453, "learning_rate": 4.199305907235843e-06, "loss": 0.2529, "step": 171350 }, { "epoch": 1.68, "grad_norm": 6.158016204833984, "learning_rate": 4.199181784781595e-06, "loss": 0.216, "step": 171375 }, { "epoch": 1.69, "grad_norm": 16.425579071044922, "learning_rate": 4.199057662327346e-06, "loss": 0.2537, "step": 171400 }, { "epoch": 1.69, "grad_norm": 5.551018714904785, "learning_rate": 4.198933539873097e-06, "loss": 0.259, "step": 171425 }, { "epoch": 1.69, "grad_norm": 21.70393180847168, "learning_rate": 4.1988094174188494e-06, "loss": 0.2379, "step": 171450 }, { "epoch": 1.69, "grad_norm": 6.582533836364746, "learning_rate": 4.198685294964601e-06, "loss": 0.195, "step": 171475 }, { "epoch": 1.69, "grad_norm": 16.186809539794922, "learning_rate": 4.198561172510352e-06, "loss": 0.246, "step": 171500 }, { "epoch": 1.69, "grad_norm": 7.409328937530518, "learning_rate": 4.198437050056104e-06, "loss": 0.2275, "step": 171525 }, { "epoch": 1.69, "grad_norm": 12.007125854492188, "learning_rate": 4.198312927601855e-06, "loss": 0.287, "step": 171550 }, { "epoch": 1.69, "grad_norm": 6.969616413116455, "learning_rate": 4.198188805147606e-06, "loss": 0.2092, "step": 171575 }, { "epoch": 1.69, "grad_norm": 7.787749767303467, "learning_rate": 4.198064682693358e-06, "loss": 0.281, "step": 171600 }, { "epoch": 1.69, "grad_norm": 4.362039089202881, "learning_rate": 4.19794056023911e-06, "loss": 0.2504, "step": 171625 }, { "epoch": 1.69, "grad_norm": 11.032361030578613, "learning_rate": 4.197816437784862e-06, "loss": 0.1904, "step": 171650 }, { "epoch": 1.69, "grad_norm": 5.973587512969971, "learning_rate": 4.197692315330613e-06, "loss": 0.2425, "step": 171675 }, { "epoch": 1.69, "grad_norm": 9.542643547058105, "learning_rate": 4.197568192876365e-06, "loss": 0.2491, "step": 171700 }, { "epoch": 1.69, "grad_norm": 3.89640474319458, "learning_rate": 4.197444070422116e-06, "loss": 0.185, "step": 171725 }, { "epoch": 1.69, "grad_norm": 16.707048416137695, "learning_rate": 4.197319947967868e-06, "loss": 0.2399, "step": 171750 }, { "epoch": 1.69, "grad_norm": 4.780830383300781, "learning_rate": 4.197195825513619e-06, "loss": 0.2618, "step": 171775 }, { "epoch": 1.69, "grad_norm": 20.645727157592773, "learning_rate": 4.197071703059371e-06, "loss": 0.2568, "step": 171800 }, { "epoch": 1.69, "grad_norm": 5.501124382019043, "learning_rate": 4.196947580605122e-06, "loss": 0.2218, "step": 171825 }, { "epoch": 1.69, "grad_norm": 21.310972213745117, "learning_rate": 4.196823458150874e-06, "loss": 0.2719, "step": 171850 }, { "epoch": 1.69, "grad_norm": 10.576058387756348, "learning_rate": 4.196699335696625e-06, "loss": 0.195, "step": 171875 }, { "epoch": 1.69, "grad_norm": 11.301678657531738, "learning_rate": 4.196575213242376e-06, "loss": 0.2112, "step": 171900 }, { "epoch": 1.69, "grad_norm": 0.10856977105140686, "learning_rate": 4.196451090788128e-06, "loss": 0.228, "step": 171925 }, { "epoch": 1.69, "grad_norm": 16.50242805480957, "learning_rate": 4.19632696833388e-06, "loss": 0.2823, "step": 171950 }, { "epoch": 1.69, "grad_norm": 6.713362216949463, "learning_rate": 4.196202845879631e-06, "loss": 0.2393, "step": 171975 }, { "epoch": 1.69, "grad_norm": 13.690503120422363, "learning_rate": 4.196078723425383e-06, "loss": 0.2064, "step": 172000 }, { "epoch": 1.69, "grad_norm": 4.058726787567139, "learning_rate": 4.195954600971134e-06, "loss": 0.206, "step": 172025 }, { "epoch": 1.69, "grad_norm": 11.65870189666748, "learning_rate": 4.195830478516886e-06, "loss": 0.2021, "step": 172050 }, { "epoch": 1.69, "grad_norm": 4.757433891296387, "learning_rate": 4.195706356062637e-06, "loss": 0.2795, "step": 172075 }, { "epoch": 1.69, "grad_norm": 12.085593223571777, "learning_rate": 4.195582233608389e-06, "loss": 0.2106, "step": 172100 }, { "epoch": 1.69, "grad_norm": 6.176605701446533, "learning_rate": 4.195458111154141e-06, "loss": 0.2363, "step": 172125 }, { "epoch": 1.69, "grad_norm": 15.2699556350708, "learning_rate": 4.195333988699893e-06, "loss": 0.2462, "step": 172150 }, { "epoch": 1.69, "grad_norm": 3.2318480014801025, "learning_rate": 4.195209866245644e-06, "loss": 0.2099, "step": 172175 }, { "epoch": 1.69, "grad_norm": 16.111427307128906, "learning_rate": 4.195085743791395e-06, "loss": 0.1868, "step": 172200 }, { "epoch": 1.69, "grad_norm": 2.568753957748413, "learning_rate": 4.194961621337147e-06, "loss": 0.206, "step": 172225 }, { "epoch": 1.69, "grad_norm": 9.755260467529297, "learning_rate": 4.194837498882898e-06, "loss": 0.2309, "step": 172250 }, { "epoch": 1.69, "grad_norm": 2.8675005435943604, "learning_rate": 4.1947133764286496e-06, "loss": 0.1914, "step": 172275 }, { "epoch": 1.69, "grad_norm": 14.344614028930664, "learning_rate": 4.194589253974402e-06, "loss": 0.2135, "step": 172300 }, { "epoch": 1.69, "grad_norm": 4.589351654052734, "learning_rate": 4.194465131520153e-06, "loss": 0.191, "step": 172325 }, { "epoch": 1.69, "grad_norm": 10.408957481384277, "learning_rate": 4.194341009065904e-06, "loss": 0.2282, "step": 172350 }, { "epoch": 1.69, "grad_norm": 3.500000238418579, "learning_rate": 4.194216886611656e-06, "loss": 0.2267, "step": 172375 }, { "epoch": 1.7, "grad_norm": 15.260454177856445, "learning_rate": 4.194092764157407e-06, "loss": 0.2598, "step": 172400 }, { "epoch": 1.7, "grad_norm": 5.701524257659912, "learning_rate": 4.1939686417031585e-06, "loss": 0.2447, "step": 172425 }, { "epoch": 1.7, "grad_norm": 17.6025333404541, "learning_rate": 4.193844519248911e-06, "loss": 0.2186, "step": 172450 }, { "epoch": 1.7, "grad_norm": 3.0573678016662598, "learning_rate": 4.193720396794662e-06, "loss": 0.2574, "step": 172475 }, { "epoch": 1.7, "grad_norm": 11.673125267028809, "learning_rate": 4.193596274340414e-06, "loss": 0.2455, "step": 172500 }, { "epoch": 1.7, "grad_norm": 5.9224419593811035, "learning_rate": 4.193472151886165e-06, "loss": 0.2583, "step": 172525 }, { "epoch": 1.7, "grad_norm": 9.26230239868164, "learning_rate": 4.193348029431917e-06, "loss": 0.2064, "step": 172550 }, { "epoch": 1.7, "grad_norm": 0.3403245210647583, "learning_rate": 4.193223906977668e-06, "loss": 0.1929, "step": 172575 }, { "epoch": 1.7, "grad_norm": 14.296487808227539, "learning_rate": 4.19309978452342e-06, "loss": 0.2671, "step": 172600 }, { "epoch": 1.7, "grad_norm": 7.1649627685546875, "learning_rate": 4.192975662069172e-06, "loss": 0.2524, "step": 172625 }, { "epoch": 1.7, "grad_norm": 12.748209953308105, "learning_rate": 4.192851539614923e-06, "loss": 0.2536, "step": 172650 }, { "epoch": 1.7, "grad_norm": 5.832851886749268, "learning_rate": 4.192727417160674e-06, "loss": 0.2132, "step": 172675 }, { "epoch": 1.7, "grad_norm": 12.956161499023438, "learning_rate": 4.192603294706426e-06, "loss": 0.2312, "step": 172700 }, { "epoch": 1.7, "grad_norm": 1.2467690706253052, "learning_rate": 4.192479172252177e-06, "loss": 0.2127, "step": 172725 }, { "epoch": 1.7, "grad_norm": 8.852383613586426, "learning_rate": 4.1923550497979285e-06, "loss": 0.2677, "step": 172750 }, { "epoch": 1.7, "grad_norm": 3.7809855937957764, "learning_rate": 4.1922309273436806e-06, "loss": 0.213, "step": 172775 }, { "epoch": 1.7, "grad_norm": 14.515302658081055, "learning_rate": 4.192106804889432e-06, "loss": 0.2399, "step": 172800 }, { "epoch": 1.7, "grad_norm": 16.703556060791016, "learning_rate": 4.191982682435183e-06, "loss": 0.1884, "step": 172825 }, { "epoch": 1.7, "grad_norm": 15.032468795776367, "learning_rate": 4.191858559980935e-06, "loss": 0.2402, "step": 172850 }, { "epoch": 1.7, "grad_norm": 6.910752296447754, "learning_rate": 4.191734437526686e-06, "loss": 0.2289, "step": 172875 }, { "epoch": 1.7, "grad_norm": 11.327834129333496, "learning_rate": 4.191610315072438e-06, "loss": 0.2188, "step": 172900 }, { "epoch": 1.7, "grad_norm": 7.4749531745910645, "learning_rate": 4.1914861926181895e-06, "loss": 0.2139, "step": 172925 }, { "epoch": 1.7, "grad_norm": 9.916000366210938, "learning_rate": 4.191362070163942e-06, "loss": 0.2267, "step": 172950 }, { "epoch": 1.7, "grad_norm": 5.967417240142822, "learning_rate": 4.191237947709693e-06, "loss": 0.2045, "step": 172975 }, { "epoch": 1.7, "grad_norm": 12.488153457641602, "learning_rate": 4.191113825255445e-06, "loss": 0.2282, "step": 173000 }, { "epoch": 1.7, "grad_norm": 8.806806564331055, "learning_rate": 4.190989702801196e-06, "loss": 0.2201, "step": 173025 }, { "epoch": 1.7, "grad_norm": 12.598759651184082, "learning_rate": 4.190865580346947e-06, "loss": 0.2315, "step": 173050 }, { "epoch": 1.7, "grad_norm": 11.225105285644531, "learning_rate": 4.190741457892699e-06, "loss": 0.1835, "step": 173075 }, { "epoch": 1.7, "grad_norm": 10.933603286743164, "learning_rate": 4.1906173354384506e-06, "loss": 0.2177, "step": 173100 }, { "epoch": 1.7, "grad_norm": 4.095961570739746, "learning_rate": 4.1904981778823724e-06, "loss": 0.2261, "step": 173125 }, { "epoch": 1.7, "grad_norm": 10.61336612701416, "learning_rate": 4.190374055428124e-06, "loss": 0.2158, "step": 173150 }, { "epoch": 1.7, "grad_norm": 9.00421142578125, "learning_rate": 4.190249932973875e-06, "loss": 0.2552, "step": 173175 }, { "epoch": 1.7, "grad_norm": 15.648290634155273, "learning_rate": 4.190125810519627e-06, "loss": 0.2966, "step": 173200 }, { "epoch": 1.7, "grad_norm": 8.607791900634766, "learning_rate": 4.190001688065378e-06, "loss": 0.2681, "step": 173225 }, { "epoch": 1.7, "grad_norm": 12.605850219726562, "learning_rate": 4.189877565611129e-06, "loss": 0.2446, "step": 173250 }, { "epoch": 1.7, "grad_norm": 4.888160228729248, "learning_rate": 4.1897534431568805e-06, "loss": 0.2203, "step": 173275 }, { "epoch": 1.7, "grad_norm": 7.902602672576904, "learning_rate": 4.189629320702633e-06, "loss": 0.2129, "step": 173300 }, { "epoch": 1.7, "grad_norm": 6.267287254333496, "learning_rate": 4.189505198248384e-06, "loss": 0.2028, "step": 173325 }, { "epoch": 1.7, "grad_norm": 13.574358940124512, "learning_rate": 4.189381075794136e-06, "loss": 0.2108, "step": 173350 }, { "epoch": 1.7, "grad_norm": 4.811686038970947, "learning_rate": 4.189256953339887e-06, "loss": 0.2152, "step": 173375 }, { "epoch": 1.7, "grad_norm": 14.276673316955566, "learning_rate": 4.189132830885639e-06, "loss": 0.2753, "step": 173400 }, { "epoch": 1.71, "grad_norm": 1.8178104162216187, "learning_rate": 4.18900870843139e-06, "loss": 0.2182, "step": 173425 }, { "epoch": 1.71, "grad_norm": 10.835060119628906, "learning_rate": 4.188884585977142e-06, "loss": 0.2303, "step": 173450 }, { "epoch": 1.71, "grad_norm": 5.806607246398926, "learning_rate": 4.188760463522894e-06, "loss": 0.1985, "step": 173475 }, { "epoch": 1.71, "grad_norm": 12.991730690002441, "learning_rate": 4.188636341068645e-06, "loss": 0.2301, "step": 173500 }, { "epoch": 1.71, "grad_norm": 3.1376430988311768, "learning_rate": 4.188512218614397e-06, "loss": 0.1978, "step": 173525 }, { "epoch": 1.71, "grad_norm": 10.937779426574707, "learning_rate": 4.188388096160148e-06, "loss": 0.2026, "step": 173550 }, { "epoch": 1.71, "grad_norm": 6.426458358764648, "learning_rate": 4.188263973705899e-06, "loss": 0.2485, "step": 173575 }, { "epoch": 1.71, "grad_norm": 25.27817726135254, "learning_rate": 4.188139851251651e-06, "loss": 0.2858, "step": 173600 }, { "epoch": 1.71, "grad_norm": 8.553464889526367, "learning_rate": 4.188015728797403e-06, "loss": 0.2199, "step": 173625 }, { "epoch": 1.71, "grad_norm": 13.334441184997559, "learning_rate": 4.187891606343154e-06, "loss": 0.2781, "step": 173650 }, { "epoch": 1.71, "grad_norm": 2.5065410137176514, "learning_rate": 4.187767483888906e-06, "loss": 0.2292, "step": 173675 }, { "epoch": 1.71, "grad_norm": 13.255143165588379, "learning_rate": 4.187643361434657e-06, "loss": 0.2822, "step": 173700 }, { "epoch": 1.71, "grad_norm": 6.534243106842041, "learning_rate": 4.187519238980408e-06, "loss": 0.201, "step": 173725 }, { "epoch": 1.71, "grad_norm": 12.564863204956055, "learning_rate": 4.18739511652616e-06, "loss": 0.2518, "step": 173750 }, { "epoch": 1.71, "grad_norm": 2.381565809249878, "learning_rate": 4.1872709940719116e-06, "loss": 0.1839, "step": 173775 }, { "epoch": 1.71, "grad_norm": 15.414847373962402, "learning_rate": 4.187146871617664e-06, "loss": 0.2844, "step": 173800 }, { "epoch": 1.71, "grad_norm": 0.5220584869384766, "learning_rate": 4.187022749163415e-06, "loss": 0.2103, "step": 173825 }, { "epoch": 1.71, "grad_norm": 14.86107063293457, "learning_rate": 4.186898626709167e-06, "loss": 0.2222, "step": 173850 }, { "epoch": 1.71, "grad_norm": 6.981709957122803, "learning_rate": 4.186774504254918e-06, "loss": 0.2216, "step": 173875 }, { "epoch": 1.71, "grad_norm": 12.361085891723633, "learning_rate": 4.18665038180067e-06, "loss": 0.2186, "step": 173900 }, { "epoch": 1.71, "grad_norm": 7.532336711883545, "learning_rate": 4.186526259346421e-06, "loss": 0.2169, "step": 173925 }, { "epoch": 1.71, "grad_norm": 14.053210258483887, "learning_rate": 4.1864021368921726e-06, "loss": 0.2184, "step": 173950 }, { "epoch": 1.71, "grad_norm": 6.612510681152344, "learning_rate": 4.186278014437925e-06, "loss": 0.1874, "step": 173975 }, { "epoch": 1.71, "grad_norm": 12.074161529541016, "learning_rate": 4.186153891983676e-06, "loss": 0.2355, "step": 174000 }, { "epoch": 1.71, "grad_norm": 4.589795112609863, "learning_rate": 4.186029769529427e-06, "loss": 0.2509, "step": 174025 }, { "epoch": 1.71, "grad_norm": 12.417339324951172, "learning_rate": 4.185905647075179e-06, "loss": 0.2158, "step": 174050 }, { "epoch": 1.71, "grad_norm": 4.819154739379883, "learning_rate": 4.18578152462093e-06, "loss": 0.2172, "step": 174075 }, { "epoch": 1.71, "grad_norm": 12.520519256591797, "learning_rate": 4.1856574021666815e-06, "loss": 0.2351, "step": 174100 }, { "epoch": 1.71, "grad_norm": 4.458657264709473, "learning_rate": 4.185533279712433e-06, "loss": 0.2121, "step": 174125 }, { "epoch": 1.71, "grad_norm": 7.749428749084473, "learning_rate": 4.185409157258185e-06, "loss": 0.187, "step": 174150 }, { "epoch": 1.71, "grad_norm": 3.032400131225586, "learning_rate": 4.185285034803936e-06, "loss": 0.226, "step": 174175 }, { "epoch": 1.71, "grad_norm": 12.260418891906738, "learning_rate": 4.185160912349688e-06, "loss": 0.2435, "step": 174200 }, { "epoch": 1.71, "grad_norm": 5.8541717529296875, "learning_rate": 4.185036789895439e-06, "loss": 0.2197, "step": 174225 }, { "epoch": 1.71, "grad_norm": 12.140885353088379, "learning_rate": 4.184912667441191e-06, "loss": 0.2221, "step": 174250 }, { "epoch": 1.71, "grad_norm": 9.132375717163086, "learning_rate": 4.1847885449869426e-06, "loss": 0.2437, "step": 174275 }, { "epoch": 1.71, "grad_norm": 21.728561401367188, "learning_rate": 4.184664422532695e-06, "loss": 0.1973, "step": 174300 }, { "epoch": 1.71, "grad_norm": 8.501897811889648, "learning_rate": 4.184540300078446e-06, "loss": 0.2154, "step": 174325 }, { "epoch": 1.71, "grad_norm": 15.18875503540039, "learning_rate": 4.184416177624197e-06, "loss": 0.2352, "step": 174350 }, { "epoch": 1.71, "grad_norm": 7.15988302230835, "learning_rate": 4.184292055169949e-06, "loss": 0.249, "step": 174375 }, { "epoch": 1.71, "grad_norm": 12.16435718536377, "learning_rate": 4.1841679327157e-06, "loss": 0.2464, "step": 174400 }, { "epoch": 1.71, "grad_norm": 8.718165397644043, "learning_rate": 4.1840438102614515e-06, "loss": 0.2121, "step": 174425 }, { "epoch": 1.72, "grad_norm": 12.928572654724121, "learning_rate": 4.183919687807204e-06, "loss": 0.2385, "step": 174450 }, { "epoch": 1.72, "grad_norm": 7.599174976348877, "learning_rate": 4.183795565352955e-06, "loss": 0.3036, "step": 174475 }, { "epoch": 1.72, "grad_norm": 9.783994674682617, "learning_rate": 4.183671442898706e-06, "loss": 0.2402, "step": 174500 }, { "epoch": 1.72, "grad_norm": 6.249093055725098, "learning_rate": 4.183547320444458e-06, "loss": 0.2089, "step": 174525 }, { "epoch": 1.72, "grad_norm": 14.391213417053223, "learning_rate": 4.183423197990209e-06, "loss": 0.258, "step": 174550 }, { "epoch": 1.72, "grad_norm": 4.603264331817627, "learning_rate": 4.183299075535961e-06, "loss": 0.2646, "step": 174575 }, { "epoch": 1.72, "grad_norm": 16.275310516357422, "learning_rate": 4.1831749530817125e-06, "loss": 0.2294, "step": 174600 }, { "epoch": 1.72, "grad_norm": 2.9562745094299316, "learning_rate": 4.183050830627465e-06, "loss": 0.2451, "step": 174625 }, { "epoch": 1.72, "grad_norm": 19.262941360473633, "learning_rate": 4.182926708173216e-06, "loss": 0.2552, "step": 174650 }, { "epoch": 1.72, "grad_norm": 4.120140075683594, "learning_rate": 4.182802585718968e-06, "loss": 0.2423, "step": 174675 }, { "epoch": 1.72, "grad_norm": 15.061219215393066, "learning_rate": 4.182678463264719e-06, "loss": 0.2157, "step": 174700 }, { "epoch": 1.72, "grad_norm": 7.830382823944092, "learning_rate": 4.18255434081047e-06, "loss": 0.2673, "step": 174725 }, { "epoch": 1.72, "grad_norm": 18.67448616027832, "learning_rate": 4.182430218356222e-06, "loss": 0.2563, "step": 174750 }, { "epoch": 1.72, "grad_norm": 4.331458568572998, "learning_rate": 4.1823060959019736e-06, "loss": 0.1916, "step": 174775 }, { "epoch": 1.72, "grad_norm": 19.419641494750977, "learning_rate": 4.182181973447725e-06, "loss": 0.2559, "step": 174800 }, { "epoch": 1.72, "grad_norm": 4.6557183265686035, "learning_rate": 4.182057850993477e-06, "loss": 0.218, "step": 174825 }, { "epoch": 1.72, "grad_norm": 14.689213752746582, "learning_rate": 4.181933728539228e-06, "loss": 0.1978, "step": 174850 }, { "epoch": 1.72, "grad_norm": 3.6947405338287354, "learning_rate": 4.181809606084979e-06, "loss": 0.2511, "step": 174875 }, { "epoch": 1.72, "grad_norm": 9.744742393493652, "learning_rate": 4.181685483630731e-06, "loss": 0.208, "step": 174900 }, { "epoch": 1.72, "grad_norm": 10.193831443786621, "learning_rate": 4.1815613611764825e-06, "loss": 0.2407, "step": 174925 }, { "epoch": 1.72, "grad_norm": 10.578340530395508, "learning_rate": 4.181437238722234e-06, "loss": 0.2424, "step": 174950 }, { "epoch": 1.72, "grad_norm": 3.8016457557678223, "learning_rate": 4.181313116267986e-06, "loss": 0.2148, "step": 174975 }, { "epoch": 1.72, "grad_norm": 18.701847076416016, "learning_rate": 4.181188993813737e-06, "loss": 0.2267, "step": 175000 }, { "epoch": 1.72, "grad_norm": 7.468899726867676, "learning_rate": 4.181064871359489e-06, "loss": 0.2245, "step": 175025 }, { "epoch": 1.72, "grad_norm": 15.469891548156738, "learning_rate": 4.18094074890524e-06, "loss": 0.2631, "step": 175050 }, { "epoch": 1.72, "grad_norm": 8.301180839538574, "learning_rate": 4.180816626450992e-06, "loss": 0.2235, "step": 175075 }, { "epoch": 1.72, "grad_norm": 15.263486862182617, "learning_rate": 4.1806925039967435e-06, "loss": 0.2576, "step": 175100 }, { "epoch": 1.72, "grad_norm": 4.749359130859375, "learning_rate": 4.180568381542495e-06, "loss": 0.2475, "step": 175125 }, { "epoch": 1.72, "grad_norm": 21.183565139770508, "learning_rate": 4.180444259088247e-06, "loss": 0.2295, "step": 175150 }, { "epoch": 1.72, "grad_norm": 7.104905128479004, "learning_rate": 4.180320136633998e-06, "loss": 0.2573, "step": 175175 }, { "epoch": 1.72, "grad_norm": 8.49376106262207, "learning_rate": 4.180196014179749e-06, "loss": 0.1696, "step": 175200 }, { "epoch": 1.72, "grad_norm": 4.336923122406006, "learning_rate": 4.180071891725501e-06, "loss": 0.2256, "step": 175225 }, { "epoch": 1.72, "grad_norm": 16.42073631286621, "learning_rate": 4.1799477692712525e-06, "loss": 0.2483, "step": 175250 }, { "epoch": 1.72, "grad_norm": 5.474428176879883, "learning_rate": 4.179823646817004e-06, "loss": 0.2224, "step": 175275 }, { "epoch": 1.72, "grad_norm": 15.171175003051758, "learning_rate": 4.179699524362756e-06, "loss": 0.2652, "step": 175300 }, { "epoch": 1.72, "grad_norm": 8.221902847290039, "learning_rate": 4.179575401908507e-06, "loss": 0.2196, "step": 175325 }, { "epoch": 1.72, "grad_norm": 14.079784393310547, "learning_rate": 4.179451279454258e-06, "loss": 0.1991, "step": 175350 }, { "epoch": 1.72, "grad_norm": 4.977658271789551, "learning_rate": 4.17932715700001e-06, "loss": 0.1983, "step": 175375 }, { "epoch": 1.72, "grad_norm": 14.311833381652832, "learning_rate": 4.1792030345457615e-06, "loss": 0.2456, "step": 175400 }, { "epoch": 1.72, "grad_norm": 0.11407702416181564, "learning_rate": 4.179083876989683e-06, "loss": 0.2169, "step": 175425 }, { "epoch": 1.73, "grad_norm": 8.35644817352295, "learning_rate": 4.1789597545354346e-06, "loss": 0.2606, "step": 175450 }, { "epoch": 1.73, "grad_norm": 3.9788601398468018, "learning_rate": 4.178835632081187e-06, "loss": 0.1704, "step": 175475 }, { "epoch": 1.73, "grad_norm": 9.057153701782227, "learning_rate": 4.178711509626938e-06, "loss": 0.2151, "step": 175500 }, { "epoch": 1.73, "grad_norm": 5.097064018249512, "learning_rate": 4.17858738717269e-06, "loss": 0.2249, "step": 175525 }, { "epoch": 1.73, "grad_norm": 12.10925006866455, "learning_rate": 4.178463264718441e-06, "loss": 0.2564, "step": 175550 }, { "epoch": 1.73, "grad_norm": 6.9299397468566895, "learning_rate": 4.178339142264192e-06, "loss": 0.2027, "step": 175575 }, { "epoch": 1.73, "grad_norm": 13.842397689819336, "learning_rate": 4.178215019809944e-06, "loss": 0.2676, "step": 175600 }, { "epoch": 1.73, "grad_norm": 9.329523086547852, "learning_rate": 4.178090897355696e-06, "loss": 0.2571, "step": 175625 }, { "epoch": 1.73, "grad_norm": 11.62110710144043, "learning_rate": 4.177966774901447e-06, "loss": 0.1829, "step": 175650 }, { "epoch": 1.73, "grad_norm": 6.281414985656738, "learning_rate": 4.177842652447199e-06, "loss": 0.2235, "step": 175675 }, { "epoch": 1.73, "grad_norm": 14.432574272155762, "learning_rate": 4.17771852999295e-06, "loss": 0.2833, "step": 175700 }, { "epoch": 1.73, "grad_norm": 3.880016803741455, "learning_rate": 4.177594407538701e-06, "loss": 0.26, "step": 175725 }, { "epoch": 1.73, "grad_norm": 12.538910865783691, "learning_rate": 4.177470285084453e-06, "loss": 0.242, "step": 175750 }, { "epoch": 1.73, "grad_norm": 4.09771203994751, "learning_rate": 4.1773461626302045e-06, "loss": 0.2256, "step": 175775 }, { "epoch": 1.73, "grad_norm": 15.32519817352295, "learning_rate": 4.177222040175956e-06, "loss": 0.2827, "step": 175800 }, { "epoch": 1.73, "grad_norm": 2.117445707321167, "learning_rate": 4.177097917721708e-06, "loss": 0.2513, "step": 175825 }, { "epoch": 1.73, "grad_norm": 13.87445068359375, "learning_rate": 4.176973795267459e-06, "loss": 0.2036, "step": 175850 }, { "epoch": 1.73, "grad_norm": 6.080634117126465, "learning_rate": 4.176849672813211e-06, "loss": 0.1915, "step": 175875 }, { "epoch": 1.73, "grad_norm": 12.778924942016602, "learning_rate": 4.176725550358962e-06, "loss": 0.2191, "step": 175900 }, { "epoch": 1.73, "grad_norm": 6.762461185455322, "learning_rate": 4.176601427904714e-06, "loss": 0.2519, "step": 175925 }, { "epoch": 1.73, "grad_norm": 15.022372245788574, "learning_rate": 4.1764773054504656e-06, "loss": 0.2379, "step": 175950 }, { "epoch": 1.73, "grad_norm": 6.157121658325195, "learning_rate": 4.176353182996218e-06, "loss": 0.1832, "step": 175975 }, { "epoch": 1.73, "grad_norm": 15.819411277770996, "learning_rate": 4.176229060541969e-06, "loss": 0.243, "step": 176000 }, { "epoch": 1.73, "grad_norm": 1.6866639852523804, "learning_rate": 4.17610493808772e-06, "loss": 0.2199, "step": 176025 }, { "epoch": 1.73, "grad_norm": 8.949325561523438, "learning_rate": 4.175980815633472e-06, "loss": 0.2513, "step": 176050 }, { "epoch": 1.73, "grad_norm": 13.56167221069336, "learning_rate": 4.175856693179223e-06, "loss": 0.2178, "step": 176075 }, { "epoch": 1.73, "grad_norm": 16.967327117919922, "learning_rate": 4.1757325707249745e-06, "loss": 0.2446, "step": 176100 }, { "epoch": 1.73, "grad_norm": 9.116889953613281, "learning_rate": 4.175608448270727e-06, "loss": 0.2017, "step": 176125 }, { "epoch": 1.73, "grad_norm": 13.172561645507812, "learning_rate": 4.175484325816478e-06, "loss": 0.2011, "step": 176150 }, { "epoch": 1.73, "grad_norm": 5.629402160644531, "learning_rate": 4.175360203362229e-06, "loss": 0.2289, "step": 176175 }, { "epoch": 1.73, "grad_norm": 9.647383689880371, "learning_rate": 4.175236080907981e-06, "loss": 0.1878, "step": 176200 }, { "epoch": 1.73, "grad_norm": 7.8059797286987305, "learning_rate": 4.175111958453732e-06, "loss": 0.2237, "step": 176225 }, { "epoch": 1.73, "grad_norm": 13.156599044799805, "learning_rate": 4.1749878359994835e-06, "loss": 0.1931, "step": 176250 }, { "epoch": 1.73, "grad_norm": 7.6915154457092285, "learning_rate": 4.1748637135452356e-06, "loss": 0.2434, "step": 176275 }, { "epoch": 1.73, "grad_norm": 16.929637908935547, "learning_rate": 4.174739591090987e-06, "loss": 0.2609, "step": 176300 }, { "epoch": 1.73, "grad_norm": 5.1640849113464355, "learning_rate": 4.174615468636739e-06, "loss": 0.2109, "step": 176325 }, { "epoch": 1.73, "grad_norm": 17.43756675720215, "learning_rate": 4.17449134618249e-06, "loss": 0.2613, "step": 176350 }, { "epoch": 1.73, "grad_norm": 3.8583974838256836, "learning_rate": 4.174367223728242e-06, "loss": 0.2228, "step": 176375 }, { "epoch": 1.73, "grad_norm": 33.55613708496094, "learning_rate": 4.174243101273993e-06, "loss": 0.2492, "step": 176400 }, { "epoch": 1.73, "grad_norm": 3.1542065143585205, "learning_rate": 4.1741189788197445e-06, "loss": 0.2065, "step": 176425 }, { "epoch": 1.73, "grad_norm": 8.083502769470215, "learning_rate": 4.1739948563654966e-06, "loss": 0.2547, "step": 176450 }, { "epoch": 1.74, "grad_norm": 4.435540676116943, "learning_rate": 4.173870733911248e-06, "loss": 0.1918, "step": 176475 }, { "epoch": 1.74, "grad_norm": 12.110350608825684, "learning_rate": 4.173746611456999e-06, "loss": 0.2474, "step": 176500 }, { "epoch": 1.74, "grad_norm": 3.1111605167388916, "learning_rate": 4.173622489002751e-06, "loss": 0.2344, "step": 176525 }, { "epoch": 1.74, "grad_norm": 19.262165069580078, "learning_rate": 4.173498366548502e-06, "loss": 0.1909, "step": 176550 }, { "epoch": 1.74, "grad_norm": 1.3531376123428345, "learning_rate": 4.1733742440942535e-06, "loss": 0.1841, "step": 176575 }, { "epoch": 1.74, "grad_norm": 22.911582946777344, "learning_rate": 4.1732501216400055e-06, "loss": 0.2414, "step": 176600 }, { "epoch": 1.74, "grad_norm": 3.2206900119781494, "learning_rate": 4.173125999185757e-06, "loss": 0.2073, "step": 176625 }, { "epoch": 1.74, "grad_norm": 18.49068832397461, "learning_rate": 4.173001876731508e-06, "loss": 0.2618, "step": 176650 }, { "epoch": 1.74, "grad_norm": 6.901815414428711, "learning_rate": 4.17287775427726e-06, "loss": 0.2445, "step": 176675 }, { "epoch": 1.74, "grad_norm": 16.02075958251953, "learning_rate": 4.172753631823011e-06, "loss": 0.2158, "step": 176700 }, { "epoch": 1.74, "grad_norm": 7.402349472045898, "learning_rate": 4.172629509368763e-06, "loss": 0.2087, "step": 176725 }, { "epoch": 1.74, "grad_norm": 7.568716049194336, "learning_rate": 4.1725053869145145e-06, "loss": 0.3066, "step": 176750 }, { "epoch": 1.74, "grad_norm": 5.497699737548828, "learning_rate": 4.1723812644602666e-06, "loss": 0.1943, "step": 176775 }, { "epoch": 1.74, "grad_norm": 15.279176712036133, "learning_rate": 4.172257142006018e-06, "loss": 0.2498, "step": 176800 }, { "epoch": 1.74, "grad_norm": 5.650694847106934, "learning_rate": 4.17213301955177e-06, "loss": 0.2258, "step": 176825 }, { "epoch": 1.74, "grad_norm": 13.633115768432617, "learning_rate": 4.172008897097521e-06, "loss": 0.2136, "step": 176850 }, { "epoch": 1.74, "grad_norm": 3.3529887199401855, "learning_rate": 4.171884774643272e-06, "loss": 0.1816, "step": 176875 }, { "epoch": 1.74, "grad_norm": 22.75724983215332, "learning_rate": 4.171760652189024e-06, "loss": 0.2617, "step": 176900 }, { "epoch": 1.74, "grad_norm": 2.8145527839660645, "learning_rate": 4.1716365297347755e-06, "loss": 0.2293, "step": 176925 }, { "epoch": 1.74, "grad_norm": 9.501679420471191, "learning_rate": 4.171512407280527e-06, "loss": 0.2478, "step": 176950 }, { "epoch": 1.74, "grad_norm": 6.552928447723389, "learning_rate": 4.171388284826279e-06, "loss": 0.1818, "step": 176975 }, { "epoch": 1.74, "grad_norm": 14.457135200500488, "learning_rate": 4.17126416237203e-06, "loss": 0.3014, "step": 177000 }, { "epoch": 1.74, "grad_norm": 6.970794200897217, "learning_rate": 4.171140039917781e-06, "loss": 0.2249, "step": 177025 }, { "epoch": 1.74, "grad_norm": 17.876752853393555, "learning_rate": 4.171015917463533e-06, "loss": 0.2549, "step": 177050 }, { "epoch": 1.74, "grad_norm": 5.583348751068115, "learning_rate": 4.1708917950092845e-06, "loss": 0.193, "step": 177075 }, { "epoch": 1.74, "grad_norm": 12.27364444732666, "learning_rate": 4.1707676725550365e-06, "loss": 0.2035, "step": 177100 }, { "epoch": 1.74, "grad_norm": 6.9019670486450195, "learning_rate": 4.170643550100788e-06, "loss": 0.2015, "step": 177125 }, { "epoch": 1.74, "grad_norm": 20.48147201538086, "learning_rate": 4.17051942764654e-06, "loss": 0.238, "step": 177150 }, { "epoch": 1.74, "grad_norm": 6.792764186859131, "learning_rate": 4.170395305192291e-06, "loss": 0.2051, "step": 177175 }, { "epoch": 1.74, "grad_norm": 11.21169376373291, "learning_rate": 4.170271182738042e-06, "loss": 0.2374, "step": 177200 }, { "epoch": 1.74, "grad_norm": 6.691202640533447, "learning_rate": 4.170147060283794e-06, "loss": 0.249, "step": 177225 }, { "epoch": 1.74, "grad_norm": 13.022902488708496, "learning_rate": 4.1700229378295455e-06, "loss": 0.2297, "step": 177250 }, { "epoch": 1.74, "grad_norm": 6.7370100021362305, "learning_rate": 4.169898815375297e-06, "loss": 0.2052, "step": 177275 }, { "epoch": 1.74, "grad_norm": 18.515949249267578, "learning_rate": 4.169774692921049e-06, "loss": 0.2553, "step": 177300 }, { "epoch": 1.74, "grad_norm": 6.137365341186523, "learning_rate": 4.1696505704668e-06, "loss": 0.2048, "step": 177325 }, { "epoch": 1.74, "grad_norm": 13.39863109588623, "learning_rate": 4.169526448012551e-06, "loss": 0.2106, "step": 177350 }, { "epoch": 1.74, "grad_norm": 9.21934700012207, "learning_rate": 4.169402325558303e-06, "loss": 0.2258, "step": 177375 }, { "epoch": 1.74, "grad_norm": 18.69388771057129, "learning_rate": 4.1692782031040545e-06, "loss": 0.2554, "step": 177400 }, { "epoch": 1.74, "grad_norm": 3.1297245025634766, "learning_rate": 4.169154080649806e-06, "loss": 0.2035, "step": 177425 }, { "epoch": 1.74, "grad_norm": 13.832818031311035, "learning_rate": 4.169029958195558e-06, "loss": 0.2537, "step": 177450 }, { "epoch": 1.74, "grad_norm": 1.6572014093399048, "learning_rate": 4.168905835741309e-06, "loss": 0.2565, "step": 177475 }, { "epoch": 1.75, "grad_norm": 11.777176856994629, "learning_rate": 4.168781713287061e-06, "loss": 0.1919, "step": 177500 }, { "epoch": 1.75, "grad_norm": 2.708091974258423, "learning_rate": 4.168657590832812e-06, "loss": 0.2206, "step": 177525 }, { "epoch": 1.75, "grad_norm": 19.18946647644043, "learning_rate": 4.168538433276734e-06, "loss": 0.2206, "step": 177550 }, { "epoch": 1.75, "grad_norm": 6.162364482879639, "learning_rate": 4.168414310822485e-06, "loss": 0.2267, "step": 177575 }, { "epoch": 1.75, "grad_norm": 12.327737808227539, "learning_rate": 4.168290188368237e-06, "loss": 0.1762, "step": 177600 }, { "epoch": 1.75, "grad_norm": 8.541921615600586, "learning_rate": 4.1681660659139886e-06, "loss": 0.2355, "step": 177625 }, { "epoch": 1.75, "grad_norm": 12.963032722473145, "learning_rate": 4.168041943459741e-06, "loss": 0.2141, "step": 177650 }, { "epoch": 1.75, "grad_norm": 6.322623252868652, "learning_rate": 4.167917821005492e-06, "loss": 0.1723, "step": 177675 }, { "epoch": 1.75, "grad_norm": 28.651081085205078, "learning_rate": 4.167793698551243e-06, "loss": 0.2216, "step": 177700 }, { "epoch": 1.75, "grad_norm": 6.8391876220703125, "learning_rate": 4.167669576096995e-06, "loss": 0.2442, "step": 177725 }, { "epoch": 1.75, "grad_norm": 14.655774116516113, "learning_rate": 4.167545453642746e-06, "loss": 0.2645, "step": 177750 }, { "epoch": 1.75, "grad_norm": 6.144460201263428, "learning_rate": 4.1674213311884975e-06, "loss": 0.2054, "step": 177775 }, { "epoch": 1.75, "grad_norm": 15.342538833618164, "learning_rate": 4.16729720873425e-06, "loss": 0.2805, "step": 177800 }, { "epoch": 1.75, "grad_norm": 3.5913453102111816, "learning_rate": 4.167173086280001e-06, "loss": 0.2347, "step": 177825 }, { "epoch": 1.75, "grad_norm": 12.037142753601074, "learning_rate": 4.167048963825752e-06, "loss": 0.2687, "step": 177850 }, { "epoch": 1.75, "grad_norm": 6.215919494628906, "learning_rate": 4.166924841371503e-06, "loss": 0.1947, "step": 177875 }, { "epoch": 1.75, "grad_norm": 17.624820709228516, "learning_rate": 4.166800718917255e-06, "loss": 0.2681, "step": 177900 }, { "epoch": 1.75, "grad_norm": 6.694762706756592, "learning_rate": 4.1666765964630065e-06, "loss": 0.1977, "step": 177925 }, { "epoch": 1.75, "grad_norm": 15.982779502868652, "learning_rate": 4.1665524740087586e-06, "loss": 0.2256, "step": 177950 }, { "epoch": 1.75, "grad_norm": 0.7219848036766052, "learning_rate": 4.16642835155451e-06, "loss": 0.1865, "step": 177975 }, { "epoch": 1.75, "grad_norm": 9.544222831726074, "learning_rate": 4.166304229100262e-06, "loss": 0.2493, "step": 178000 }, { "epoch": 1.75, "grad_norm": 7.056037425994873, "learning_rate": 4.166180106646013e-06, "loss": 0.2146, "step": 178025 }, { "epoch": 1.75, "grad_norm": 9.982569694519043, "learning_rate": 4.166055984191765e-06, "loss": 0.231, "step": 178050 }, { "epoch": 1.75, "grad_norm": 16.199670791625977, "learning_rate": 4.165931861737516e-06, "loss": 0.2002, "step": 178075 }, { "epoch": 1.75, "grad_norm": 16.57938575744629, "learning_rate": 4.1658077392832675e-06, "loss": 0.1865, "step": 178100 }, { "epoch": 1.75, "grad_norm": 5.987547397613525, "learning_rate": 4.16568361682902e-06, "loss": 0.2774, "step": 178125 }, { "epoch": 1.75, "grad_norm": 14.36285400390625, "learning_rate": 4.165559494374771e-06, "loss": 0.1934, "step": 178150 }, { "epoch": 1.75, "grad_norm": 7.834503650665283, "learning_rate": 4.165435371920522e-06, "loss": 0.2174, "step": 178175 }, { "epoch": 1.75, "grad_norm": 12.1676607131958, "learning_rate": 4.165311249466274e-06, "loss": 0.2303, "step": 178200 }, { "epoch": 1.75, "grad_norm": 3.878225564956665, "learning_rate": 4.165187127012025e-06, "loss": 0.2324, "step": 178225 }, { "epoch": 1.75, "grad_norm": 18.876325607299805, "learning_rate": 4.1650630045577765e-06, "loss": 0.287, "step": 178250 }, { "epoch": 1.75, "grad_norm": 3.9845030307769775, "learning_rate": 4.1649388821035285e-06, "loss": 0.1936, "step": 178275 }, { "epoch": 1.75, "grad_norm": 10.77422046661377, "learning_rate": 4.16481475964928e-06, "loss": 0.2506, "step": 178300 }, { "epoch": 1.75, "grad_norm": 3.019547462463379, "learning_rate": 4.164690637195031e-06, "loss": 0.2066, "step": 178325 }, { "epoch": 1.75, "grad_norm": 13.890077590942383, "learning_rate": 4.164566514740783e-06, "loss": 0.2706, "step": 178350 }, { "epoch": 1.75, "grad_norm": 3.420685052871704, "learning_rate": 4.164442392286534e-06, "loss": 0.2619, "step": 178375 }, { "epoch": 1.75, "grad_norm": 15.955480575561523, "learning_rate": 4.164318269832286e-06, "loss": 0.3176, "step": 178400 }, { "epoch": 1.75, "grad_norm": 7.861671447753906, "learning_rate": 4.1641941473780375e-06, "loss": 0.2394, "step": 178425 }, { "epoch": 1.75, "grad_norm": 12.750142097473145, "learning_rate": 4.1640700249237896e-06, "loss": 0.2454, "step": 178450 }, { "epoch": 1.75, "grad_norm": 2.9533939361572266, "learning_rate": 4.163945902469541e-06, "loss": 0.2181, "step": 178475 }, { "epoch": 1.76, "grad_norm": 18.852365493774414, "learning_rate": 4.163821780015293e-06, "loss": 0.2453, "step": 178500 }, { "epoch": 1.76, "grad_norm": 23.250120162963867, "learning_rate": 4.163697657561044e-06, "loss": 0.2103, "step": 178525 }, { "epoch": 1.76, "grad_norm": 12.443967819213867, "learning_rate": 4.163573535106795e-06, "loss": 0.2459, "step": 178550 }, { "epoch": 1.76, "grad_norm": 6.2176432609558105, "learning_rate": 4.163449412652547e-06, "loss": 0.215, "step": 178575 }, { "epoch": 1.76, "grad_norm": 16.314311981201172, "learning_rate": 4.1633252901982985e-06, "loss": 0.2462, "step": 178600 }, { "epoch": 1.76, "grad_norm": 3.455533504486084, "learning_rate": 4.16320116774405e-06, "loss": 0.2853, "step": 178625 }, { "epoch": 1.76, "grad_norm": 32.21551513671875, "learning_rate": 4.163077045289802e-06, "loss": 0.2904, "step": 178650 }, { "epoch": 1.76, "grad_norm": 7.2196431159973145, "learning_rate": 4.162952922835553e-06, "loss": 0.2333, "step": 178675 }, { "epoch": 1.76, "grad_norm": 11.292970657348633, "learning_rate": 4.162828800381304e-06, "loss": 0.1872, "step": 178700 }, { "epoch": 1.76, "grad_norm": 8.847391128540039, "learning_rate": 4.1627046779270554e-06, "loss": 0.211, "step": 178725 }, { "epoch": 1.76, "grad_norm": 17.8010311126709, "learning_rate": 4.1625805554728075e-06, "loss": 0.2139, "step": 178750 }, { "epoch": 1.76, "grad_norm": 5.352287769317627, "learning_rate": 4.162456433018559e-06, "loss": 0.2036, "step": 178775 }, { "epoch": 1.76, "grad_norm": 8.75120735168457, "learning_rate": 4.162332310564311e-06, "loss": 0.2425, "step": 178800 }, { "epoch": 1.76, "grad_norm": 9.279842376708984, "learning_rate": 4.162208188110062e-06, "loss": 0.213, "step": 178825 }, { "epoch": 1.76, "grad_norm": 21.472148895263672, "learning_rate": 4.162084065655814e-06, "loss": 0.2653, "step": 178850 }, { "epoch": 1.76, "grad_norm": 10.61827564239502, "learning_rate": 4.161959943201565e-06, "loss": 0.2485, "step": 178875 }, { "epoch": 1.76, "grad_norm": 7.84374475479126, "learning_rate": 4.161835820747317e-06, "loss": 0.2198, "step": 178900 }, { "epoch": 1.76, "grad_norm": 2.17606520652771, "learning_rate": 4.1617116982930685e-06, "loss": 0.2198, "step": 178925 }, { "epoch": 1.76, "grad_norm": 16.19835662841797, "learning_rate": 4.16158757583882e-06, "loss": 0.2174, "step": 178950 }, { "epoch": 1.76, "grad_norm": 8.820435523986816, "learning_rate": 4.161463453384572e-06, "loss": 0.1983, "step": 178975 }, { "epoch": 1.76, "grad_norm": 16.530920028686523, "learning_rate": 4.161339330930323e-06, "loss": 0.2811, "step": 179000 }, { "epoch": 1.76, "grad_norm": 3.882810354232788, "learning_rate": 4.161215208476074e-06, "loss": 0.216, "step": 179025 }, { "epoch": 1.76, "grad_norm": 16.966541290283203, "learning_rate": 4.161091086021826e-06, "loss": 0.2644, "step": 179050 }, { "epoch": 1.76, "grad_norm": 7.432717323303223, "learning_rate": 4.1609669635675775e-06, "loss": 0.2237, "step": 179075 }, { "epoch": 1.76, "grad_norm": 13.569820404052734, "learning_rate": 4.160842841113329e-06, "loss": 0.2127, "step": 179100 }, { "epoch": 1.76, "grad_norm": 4.607607364654541, "learning_rate": 4.160718718659081e-06, "loss": 0.2309, "step": 179125 }, { "epoch": 1.76, "grad_norm": 22.11627769470215, "learning_rate": 4.160594596204832e-06, "loss": 0.2445, "step": 179150 }, { "epoch": 1.76, "grad_norm": 7.317708492279053, "learning_rate": 4.160470473750583e-06, "loss": 0.2108, "step": 179175 }, { "epoch": 1.76, "grad_norm": 15.417474746704102, "learning_rate": 4.160346351296335e-06, "loss": 0.238, "step": 179200 }, { "epoch": 1.76, "grad_norm": 4.690154552459717, "learning_rate": 4.1602222288420864e-06, "loss": 0.2535, "step": 179225 }, { "epoch": 1.76, "grad_norm": 21.63469886779785, "learning_rate": 4.1600981063878385e-06, "loss": 0.238, "step": 179250 }, { "epoch": 1.76, "grad_norm": 7.167932510375977, "learning_rate": 4.15997398393359e-06, "loss": 0.1864, "step": 179275 }, { "epoch": 1.76, "grad_norm": 9.706254959106445, "learning_rate": 4.159849861479342e-06, "loss": 0.2217, "step": 179300 }, { "epoch": 1.76, "grad_norm": 5.681267738342285, "learning_rate": 4.159725739025093e-06, "loss": 0.2403, "step": 179325 }, { "epoch": 1.76, "grad_norm": 15.010942459106445, "learning_rate": 4.159601616570845e-06, "loss": 0.2788, "step": 179350 }, { "epoch": 1.76, "grad_norm": 10.679976463317871, "learning_rate": 4.159477494116596e-06, "loss": 0.2163, "step": 179375 }, { "epoch": 1.76, "grad_norm": 24.61672019958496, "learning_rate": 4.1593533716623475e-06, "loss": 0.2211, "step": 179400 }, { "epoch": 1.76, "grad_norm": 5.452969074249268, "learning_rate": 4.1592292492080995e-06, "loss": 0.2642, "step": 179425 }, { "epoch": 1.76, "grad_norm": 24.157197952270508, "learning_rate": 4.159105126753851e-06, "loss": 0.3041, "step": 179450 }, { "epoch": 1.76, "grad_norm": 28.250015258789062, "learning_rate": 4.158981004299602e-06, "loss": 0.2107, "step": 179475 }, { "epoch": 1.76, "grad_norm": 9.276224136352539, "learning_rate": 4.158856881845354e-06, "loss": 0.2141, "step": 179500 }, { "epoch": 1.77, "grad_norm": 6.256712913513184, "learning_rate": 4.158732759391105e-06, "loss": 0.2116, "step": 179525 }, { "epoch": 1.77, "grad_norm": 12.127355575561523, "learning_rate": 4.158608636936856e-06, "loss": 0.2134, "step": 179550 }, { "epoch": 1.77, "grad_norm": 4.71666145324707, "learning_rate": 4.158484514482608e-06, "loss": 0.2373, "step": 179575 }, { "epoch": 1.77, "grad_norm": 10.42551040649414, "learning_rate": 4.15836039202836e-06, "loss": 0.2407, "step": 179600 }, { "epoch": 1.77, "grad_norm": 4.164745807647705, "learning_rate": 4.158236269574111e-06, "loss": 0.2378, "step": 179625 }, { "epoch": 1.77, "grad_norm": 18.91021728515625, "learning_rate": 4.158112147119863e-06, "loss": 0.2122, "step": 179650 }, { "epoch": 1.77, "grad_norm": 3.644023895263672, "learning_rate": 4.157988024665614e-06, "loss": 0.2042, "step": 179675 }, { "epoch": 1.77, "grad_norm": 9.400744438171387, "learning_rate": 4.157863902211366e-06, "loss": 0.2131, "step": 179700 }, { "epoch": 1.77, "grad_norm": 4.664267063140869, "learning_rate": 4.157744744655287e-06, "loss": 0.2221, "step": 179725 }, { "epoch": 1.77, "grad_norm": 21.856361389160156, "learning_rate": 4.157620622201039e-06, "loss": 0.2221, "step": 179750 }, { "epoch": 1.77, "grad_norm": 4.504908561706543, "learning_rate": 4.1574964997467905e-06, "loss": 0.2637, "step": 179775 }, { "epoch": 1.77, "grad_norm": 14.158611297607422, "learning_rate": 4.157372377292543e-06, "loss": 0.2414, "step": 179800 }, { "epoch": 1.77, "grad_norm": 8.103801727294922, "learning_rate": 4.157248254838294e-06, "loss": 0.1974, "step": 179825 }, { "epoch": 1.77, "grad_norm": 14.644357681274414, "learning_rate": 4.157124132384045e-06, "loss": 0.254, "step": 179850 }, { "epoch": 1.77, "grad_norm": 4.743893146514893, "learning_rate": 4.157000009929797e-06, "loss": 0.234, "step": 179875 }, { "epoch": 1.77, "grad_norm": 14.025588035583496, "learning_rate": 4.156875887475548e-06, "loss": 0.236, "step": 179900 }, { "epoch": 1.77, "grad_norm": 5.231332302093506, "learning_rate": 4.1567517650212995e-06, "loss": 0.2233, "step": 179925 }, { "epoch": 1.77, "grad_norm": 18.416458129882812, "learning_rate": 4.1566276425670515e-06, "loss": 0.2021, "step": 179950 }, { "epoch": 1.77, "grad_norm": 5.263070106506348, "learning_rate": 4.156503520112803e-06, "loss": 0.2376, "step": 179975 }, { "epoch": 1.77, "grad_norm": 12.877582550048828, "learning_rate": 4.156379397658554e-06, "loss": 0.2078, "step": 180000 }, { "epoch": 1.77, "eval_loss": 0.508732795715332, "eval_runtime": 6052.1629, "eval_samples_per_second": 1.564, "eval_steps_per_second": 0.196, "eval_wer": 0.12912830747440932, "step": 180000 }, { "epoch": 1.77, "grad_norm": 4.159947395324707, "learning_rate": 4.156255275204306e-06, "loss": 0.17, "step": 180025 }, { "epoch": 1.77, "grad_norm": 19.904155731201172, "learning_rate": 4.156131152750057e-06, "loss": 0.2636, "step": 180050 }, { "epoch": 1.77, "grad_norm": 6.832714080810547, "learning_rate": 4.1560070302958085e-06, "loss": 0.2318, "step": 180075 }, { "epoch": 1.77, "grad_norm": 10.097146987915039, "learning_rate": 4.1558829078415605e-06, "loss": 0.2134, "step": 180100 }, { "epoch": 1.77, "grad_norm": 1.4959766864776611, "learning_rate": 4.155758785387312e-06, "loss": 0.2204, "step": 180125 }, { "epoch": 1.77, "grad_norm": 19.764629364013672, "learning_rate": 4.155634662933064e-06, "loss": 0.1675, "step": 180150 }, { "epoch": 1.77, "grad_norm": 3.5356860160827637, "learning_rate": 4.155510540478815e-06, "loss": 0.2108, "step": 180175 }, { "epoch": 1.77, "grad_norm": 10.60684871673584, "learning_rate": 4.155386418024567e-06, "loss": 0.2239, "step": 180200 }, { "epoch": 1.77, "grad_norm": 7.037903308868408, "learning_rate": 4.155262295570318e-06, "loss": 0.2274, "step": 180225 }, { "epoch": 1.77, "grad_norm": 11.01896858215332, "learning_rate": 4.1551381731160695e-06, "loss": 0.2008, "step": 180250 }, { "epoch": 1.77, "grad_norm": 2.210740089416504, "learning_rate": 4.1550140506618215e-06, "loss": 0.2119, "step": 180275 }, { "epoch": 1.77, "grad_norm": 11.982646942138672, "learning_rate": 4.154889928207573e-06, "loss": 0.194, "step": 180300 }, { "epoch": 1.77, "grad_norm": 8.537399291992188, "learning_rate": 4.154765805753324e-06, "loss": 0.269, "step": 180325 }, { "epoch": 1.77, "grad_norm": 14.936304092407227, "learning_rate": 4.154641683299076e-06, "loss": 0.2558, "step": 180350 }, { "epoch": 1.77, "grad_norm": 0.19127649068832397, "learning_rate": 4.154517560844827e-06, "loss": 0.214, "step": 180375 }, { "epoch": 1.77, "grad_norm": 16.060617446899414, "learning_rate": 4.1543934383905784e-06, "loss": 0.211, "step": 180400 }, { "epoch": 1.77, "grad_norm": 4.419489860534668, "learning_rate": 4.1542693159363305e-06, "loss": 0.2134, "step": 180425 }, { "epoch": 1.77, "grad_norm": 10.159207344055176, "learning_rate": 4.154145193482082e-06, "loss": 0.2412, "step": 180450 }, { "epoch": 1.77, "grad_norm": 5.074368000030518, "learning_rate": 4.154021071027834e-06, "loss": 0.1863, "step": 180475 }, { "epoch": 1.77, "grad_norm": 18.8772029876709, "learning_rate": 4.153896948573585e-06, "loss": 0.2681, "step": 180500 }, { "epoch": 1.77, "grad_norm": 3.7172415256500244, "learning_rate": 4.153772826119337e-06, "loss": 0.2169, "step": 180525 }, { "epoch": 1.78, "grad_norm": 15.509276390075684, "learning_rate": 4.153648703665088e-06, "loss": 0.2332, "step": 180550 }, { "epoch": 1.78, "grad_norm": 2.4453816413879395, "learning_rate": 4.15352458121084e-06, "loss": 0.2035, "step": 180575 }, { "epoch": 1.78, "grad_norm": 10.91254711151123, "learning_rate": 4.1534004587565915e-06, "loss": 0.2301, "step": 180600 }, { "epoch": 1.78, "grad_norm": 5.652781963348389, "learning_rate": 4.153276336302343e-06, "loss": 0.2222, "step": 180625 }, { "epoch": 1.78, "grad_norm": 11.583903312683105, "learning_rate": 4.153152213848095e-06, "loss": 0.2405, "step": 180650 }, { "epoch": 1.78, "grad_norm": 7.460647106170654, "learning_rate": 4.153028091393846e-06, "loss": 0.2294, "step": 180675 }, { "epoch": 1.78, "grad_norm": 10.134230613708496, "learning_rate": 4.152903968939597e-06, "loss": 0.2262, "step": 180700 }, { "epoch": 1.78, "grad_norm": 7.822766304016113, "learning_rate": 4.152779846485349e-06, "loss": 0.2792, "step": 180725 }, { "epoch": 1.78, "grad_norm": 12.34929084777832, "learning_rate": 4.1526557240311005e-06, "loss": 0.275, "step": 180750 }, { "epoch": 1.78, "grad_norm": 5.137584686279297, "learning_rate": 4.152531601576852e-06, "loss": 0.248, "step": 180775 }, { "epoch": 1.78, "grad_norm": 15.95208740234375, "learning_rate": 4.152407479122604e-06, "loss": 0.2425, "step": 180800 }, { "epoch": 1.78, "grad_norm": 18.311542510986328, "learning_rate": 4.152283356668355e-06, "loss": 0.2377, "step": 180825 }, { "epoch": 1.78, "grad_norm": 16.384868621826172, "learning_rate": 4.152159234214106e-06, "loss": 0.2574, "step": 180850 }, { "epoch": 1.78, "grad_norm": 3.229015588760376, "learning_rate": 4.152035111759858e-06, "loss": 0.2263, "step": 180875 }, { "epoch": 1.78, "grad_norm": 6.411686420440674, "learning_rate": 4.1519109893056094e-06, "loss": 0.2355, "step": 180900 }, { "epoch": 1.78, "grad_norm": 3.31042742729187, "learning_rate": 4.1517868668513615e-06, "loss": 0.181, "step": 180925 }, { "epoch": 1.78, "grad_norm": 5.4627838134765625, "learning_rate": 4.151662744397113e-06, "loss": 0.2367, "step": 180950 }, { "epoch": 1.78, "grad_norm": 8.69710922241211, "learning_rate": 4.151538621942865e-06, "loss": 0.2084, "step": 180975 }, { "epoch": 1.78, "grad_norm": 11.989889144897461, "learning_rate": 4.151414499488616e-06, "loss": 0.2735, "step": 181000 }, { "epoch": 1.78, "grad_norm": 4.984213352203369, "learning_rate": 4.151290377034367e-06, "loss": 0.2252, "step": 181025 }, { "epoch": 1.78, "grad_norm": 11.934013366699219, "learning_rate": 4.151166254580119e-06, "loss": 0.2415, "step": 181050 }, { "epoch": 1.78, "grad_norm": 2.6084372997283936, "learning_rate": 4.1510421321258705e-06, "loss": 0.1973, "step": 181075 }, { "epoch": 1.78, "grad_norm": 7.868055820465088, "learning_rate": 4.150918009671622e-06, "loss": 0.2239, "step": 181100 }, { "epoch": 1.78, "grad_norm": 12.329483985900879, "learning_rate": 4.150793887217374e-06, "loss": 0.2406, "step": 181125 }, { "epoch": 1.78, "grad_norm": 11.86671257019043, "learning_rate": 4.150669764763125e-06, "loss": 0.2448, "step": 181150 }, { "epoch": 1.78, "grad_norm": 7.017637729644775, "learning_rate": 4.150545642308876e-06, "loss": 0.2084, "step": 181175 }, { "epoch": 1.78, "grad_norm": 8.756844520568848, "learning_rate": 4.150421519854628e-06, "loss": 0.2462, "step": 181200 }, { "epoch": 1.78, "grad_norm": 3.0532875061035156, "learning_rate": 4.1502973974003794e-06, "loss": 0.2626, "step": 181225 }, { "epoch": 1.78, "grad_norm": 12.40062427520752, "learning_rate": 4.150173274946131e-06, "loss": 0.2474, "step": 181250 }, { "epoch": 1.78, "grad_norm": 8.548690795898438, "learning_rate": 4.150049152491883e-06, "loss": 0.203, "step": 181275 }, { "epoch": 1.78, "grad_norm": 9.76732349395752, "learning_rate": 4.149925030037634e-06, "loss": 0.2373, "step": 181300 }, { "epoch": 1.78, "grad_norm": 5.947532653808594, "learning_rate": 4.149800907583386e-06, "loss": 0.2005, "step": 181325 }, { "epoch": 1.78, "grad_norm": 12.976624488830566, "learning_rate": 4.149676785129137e-06, "loss": 0.2227, "step": 181350 }, { "epoch": 1.78, "grad_norm": 4.768967628479004, "learning_rate": 4.149552662674889e-06, "loss": 0.2396, "step": 181375 }, { "epoch": 1.78, "grad_norm": 14.568801879882812, "learning_rate": 4.1494285402206404e-06, "loss": 0.1818, "step": 181400 }, { "epoch": 1.78, "grad_norm": 5.011138916015625, "learning_rate": 4.1493044177663925e-06, "loss": 0.258, "step": 181425 }, { "epoch": 1.78, "grad_norm": 14.838680267333984, "learning_rate": 4.149180295312144e-06, "loss": 0.2613, "step": 181450 }, { "epoch": 1.78, "grad_norm": 0.9332171082496643, "learning_rate": 4.149056172857895e-06, "loss": 0.2144, "step": 181475 }, { "epoch": 1.78, "grad_norm": 15.631319046020508, "learning_rate": 4.148932050403647e-06, "loss": 0.2416, "step": 181500 }, { "epoch": 1.78, "grad_norm": 6.905598163604736, "learning_rate": 4.148807927949398e-06, "loss": 0.2383, "step": 181525 }, { "epoch": 1.79, "grad_norm": 15.764376640319824, "learning_rate": 4.148683805495149e-06, "loss": 0.2287, "step": 181550 }, { "epoch": 1.79, "grad_norm": 5.248910903930664, "learning_rate": 4.1485596830409015e-06, "loss": 0.2657, "step": 181575 }, { "epoch": 1.79, "grad_norm": 14.53784465789795, "learning_rate": 4.148435560586653e-06, "loss": 0.2698, "step": 181600 }, { "epoch": 1.79, "grad_norm": 3.26945161819458, "learning_rate": 4.148311438132404e-06, "loss": 0.2307, "step": 181625 }, { "epoch": 1.79, "grad_norm": 10.634373664855957, "learning_rate": 4.148187315678156e-06, "loss": 0.2387, "step": 181650 }, { "epoch": 1.79, "grad_norm": 4.548434257507324, "learning_rate": 4.148063193223907e-06, "loss": 0.2554, "step": 181675 }, { "epoch": 1.79, "grad_norm": 12.830912590026855, "learning_rate": 4.147939070769658e-06, "loss": 0.2671, "step": 181700 }, { "epoch": 1.79, "grad_norm": 7.9748406410217285, "learning_rate": 4.1478149483154104e-06, "loss": 0.2025, "step": 181725 }, { "epoch": 1.79, "grad_norm": 21.09305763244629, "learning_rate": 4.147690825861162e-06, "loss": 0.2723, "step": 181750 }, { "epoch": 1.79, "grad_norm": 4.513723850250244, "learning_rate": 4.147566703406914e-06, "loss": 0.2339, "step": 181775 }, { "epoch": 1.79, "grad_norm": 9.666593551635742, "learning_rate": 4.147442580952665e-06, "loss": 0.2497, "step": 181800 }, { "epoch": 1.79, "grad_norm": 3.249957799911499, "learning_rate": 4.147318458498417e-06, "loss": 0.2032, "step": 181825 }, { "epoch": 1.79, "grad_norm": 18.056453704833984, "learning_rate": 4.147194336044168e-06, "loss": 0.248, "step": 181850 }, { "epoch": 1.79, "grad_norm": 3.451622247695923, "learning_rate": 4.14707517848809e-06, "loss": 0.2374, "step": 181875 }, { "epoch": 1.79, "grad_norm": 22.3433895111084, "learning_rate": 4.146951056033841e-06, "loss": 0.3131, "step": 181900 }, { "epoch": 1.79, "grad_norm": 3.9103357791900635, "learning_rate": 4.1468269335795925e-06, "loss": 0.2542, "step": 181925 }, { "epoch": 1.79, "grad_norm": 12.638370513916016, "learning_rate": 4.1467028111253445e-06, "loss": 0.243, "step": 181950 }, { "epoch": 1.79, "grad_norm": 5.686283588409424, "learning_rate": 4.146578688671096e-06, "loss": 0.2453, "step": 181975 }, { "epoch": 1.79, "grad_norm": 18.46466064453125, "learning_rate": 4.146454566216847e-06, "loss": 0.2593, "step": 182000 }, { "epoch": 1.79, "grad_norm": 7.117757797241211, "learning_rate": 4.146330443762599e-06, "loss": 0.215, "step": 182025 }, { "epoch": 1.79, "grad_norm": 21.274669647216797, "learning_rate": 4.14620632130835e-06, "loss": 0.2115, "step": 182050 }, { "epoch": 1.79, "grad_norm": 8.040074348449707, "learning_rate": 4.1460821988541014e-06, "loss": 0.2225, "step": 182075 }, { "epoch": 1.79, "grad_norm": 11.973113059997559, "learning_rate": 4.1459580763998535e-06, "loss": 0.2336, "step": 182100 }, { "epoch": 1.79, "grad_norm": 8.191410064697266, "learning_rate": 4.145833953945605e-06, "loss": 0.2358, "step": 182125 }, { "epoch": 1.79, "grad_norm": 10.515642166137695, "learning_rate": 4.145709831491356e-06, "loss": 0.263, "step": 182150 }, { "epoch": 1.79, "grad_norm": 29.863996505737305, "learning_rate": 4.145585709037108e-06, "loss": 0.2339, "step": 182175 }, { "epoch": 1.79, "grad_norm": 15.761070251464844, "learning_rate": 4.145461586582859e-06, "loss": 0.2341, "step": 182200 }, { "epoch": 1.79, "grad_norm": 4.858307838439941, "learning_rate": 4.145337464128611e-06, "loss": 0.2426, "step": 182225 }, { "epoch": 1.79, "grad_norm": 15.529865264892578, "learning_rate": 4.1452133416743625e-06, "loss": 0.1981, "step": 182250 }, { "epoch": 1.79, "grad_norm": 10.584206581115723, "learning_rate": 4.1450892192201145e-06, "loss": 0.258, "step": 182275 }, { "epoch": 1.79, "grad_norm": 14.14629077911377, "learning_rate": 4.144965096765866e-06, "loss": 0.2526, "step": 182300 }, { "epoch": 1.79, "grad_norm": 7.093653678894043, "learning_rate": 4.144840974311618e-06, "loss": 0.2316, "step": 182325 }, { "epoch": 1.79, "grad_norm": 14.442866325378418, "learning_rate": 4.144716851857369e-06, "loss": 0.24, "step": 182350 }, { "epoch": 1.79, "grad_norm": 5.084959030151367, "learning_rate": 4.14459272940312e-06, "loss": 0.239, "step": 182375 }, { "epoch": 1.79, "grad_norm": 16.504005432128906, "learning_rate": 4.1444686069488714e-06, "loss": 0.2395, "step": 182400 }, { "epoch": 1.79, "grad_norm": 6.164929389953613, "learning_rate": 4.1443444844946235e-06, "loss": 0.2603, "step": 182425 }, { "epoch": 1.79, "grad_norm": 10.946980476379395, "learning_rate": 4.144220362040375e-06, "loss": 0.2496, "step": 182450 }, { "epoch": 1.79, "grad_norm": 6.729987621307373, "learning_rate": 4.144096239586126e-06, "loss": 0.23, "step": 182475 }, { "epoch": 1.79, "grad_norm": 13.373886108398438, "learning_rate": 4.143972117131878e-06, "loss": 0.2393, "step": 182500 }, { "epoch": 1.79, "grad_norm": 7.092830181121826, "learning_rate": 4.143847994677629e-06, "loss": 0.2769, "step": 182525 }, { "epoch": 1.79, "grad_norm": 12.715496063232422, "learning_rate": 4.14372387222338e-06, "loss": 0.239, "step": 182550 }, { "epoch": 1.8, "grad_norm": 0.503757655620575, "learning_rate": 4.1435997497691324e-06, "loss": 0.2346, "step": 182575 }, { "epoch": 1.8, "grad_norm": 7.452907085418701, "learning_rate": 4.143475627314884e-06, "loss": 0.1929, "step": 182600 }, { "epoch": 1.8, "grad_norm": 2.2435672283172607, "learning_rate": 4.143351504860636e-06, "loss": 0.2345, "step": 182625 }, { "epoch": 1.8, "grad_norm": 10.693780899047852, "learning_rate": 4.143227382406387e-06, "loss": 0.2452, "step": 182650 }, { "epoch": 1.8, "grad_norm": 1.0088058710098267, "learning_rate": 4.143103259952139e-06, "loss": 0.1956, "step": 182675 }, { "epoch": 1.8, "grad_norm": 15.329100608825684, "learning_rate": 4.14297913749789e-06, "loss": 0.2224, "step": 182700 }, { "epoch": 1.8, "grad_norm": 13.244329452514648, "learning_rate": 4.142855015043642e-06, "loss": 0.2626, "step": 182725 }, { "epoch": 1.8, "grad_norm": 14.572138786315918, "learning_rate": 4.1427308925893935e-06, "loss": 0.2439, "step": 182750 }, { "epoch": 1.8, "grad_norm": 4.156318664550781, "learning_rate": 4.142606770135145e-06, "loss": 0.1886, "step": 182775 }, { "epoch": 1.8, "grad_norm": 8.714751243591309, "learning_rate": 4.142482647680897e-06, "loss": 0.2382, "step": 182800 }, { "epoch": 1.8, "grad_norm": 7.1273674964904785, "learning_rate": 4.142358525226648e-06, "loss": 0.2238, "step": 182825 }, { "epoch": 1.8, "grad_norm": 14.96950912475586, "learning_rate": 4.142234402772399e-06, "loss": 0.2195, "step": 182850 }, { "epoch": 1.8, "grad_norm": 6.331822395324707, "learning_rate": 4.142110280318151e-06, "loss": 0.1851, "step": 182875 }, { "epoch": 1.8, "grad_norm": 20.14883804321289, "learning_rate": 4.1419861578639024e-06, "loss": 0.2414, "step": 182900 }, { "epoch": 1.8, "grad_norm": 3.107630968093872, "learning_rate": 4.141862035409654e-06, "loss": 0.2784, "step": 182925 }, { "epoch": 1.8, "grad_norm": 12.10906982421875, "learning_rate": 4.141737912955406e-06, "loss": 0.2532, "step": 182950 }, { "epoch": 1.8, "grad_norm": 6.954058647155762, "learning_rate": 4.141613790501157e-06, "loss": 0.2436, "step": 182975 }, { "epoch": 1.8, "grad_norm": 19.232187271118164, "learning_rate": 4.141489668046909e-06, "loss": 0.2228, "step": 183000 }, { "epoch": 1.8, "grad_norm": 2.7458932399749756, "learning_rate": 4.14136554559266e-06, "loss": 0.1907, "step": 183025 }, { "epoch": 1.8, "grad_norm": 16.790328979492188, "learning_rate": 4.141241423138412e-06, "loss": 0.2657, "step": 183050 }, { "epoch": 1.8, "grad_norm": 5.456855297088623, "learning_rate": 4.1411173006841635e-06, "loss": 0.2289, "step": 183075 }, { "epoch": 1.8, "grad_norm": 8.952136039733887, "learning_rate": 4.1409931782299155e-06, "loss": 0.266, "step": 183100 }, { "epoch": 1.8, "grad_norm": 5.117611885070801, "learning_rate": 4.140869055775667e-06, "loss": 0.2173, "step": 183125 }, { "epoch": 1.8, "grad_norm": 15.140619277954102, "learning_rate": 4.140744933321418e-06, "loss": 0.2556, "step": 183150 }, { "epoch": 1.8, "grad_norm": 2.725956678390503, "learning_rate": 4.14062081086717e-06, "loss": 0.2382, "step": 183175 }, { "epoch": 1.8, "grad_norm": 9.39852237701416, "learning_rate": 4.140496688412921e-06, "loss": 0.2053, "step": 183200 }, { "epoch": 1.8, "grad_norm": 5.267068862915039, "learning_rate": 4.140372565958672e-06, "loss": 0.2198, "step": 183225 }, { "epoch": 1.8, "grad_norm": 18.62930679321289, "learning_rate": 4.140248443504424e-06, "loss": 0.2683, "step": 183250 }, { "epoch": 1.8, "grad_norm": 5.305161952972412, "learning_rate": 4.140124321050176e-06, "loss": 0.2283, "step": 183275 }, { "epoch": 1.8, "grad_norm": 16.286598205566406, "learning_rate": 4.140000198595927e-06, "loss": 0.2445, "step": 183300 }, { "epoch": 1.8, "grad_norm": 5.222745895385742, "learning_rate": 4.139876076141678e-06, "loss": 0.2212, "step": 183325 }, { "epoch": 1.8, "grad_norm": 18.20534896850586, "learning_rate": 4.13975195368743e-06, "loss": 0.2408, "step": 183350 }, { "epoch": 1.8, "grad_norm": 2.8553848266601562, "learning_rate": 4.139627831233181e-06, "loss": 0.2158, "step": 183375 }, { "epoch": 1.8, "grad_norm": 14.478964805603027, "learning_rate": 4.1395037087789334e-06, "loss": 0.2232, "step": 183400 }, { "epoch": 1.8, "grad_norm": 6.9517693519592285, "learning_rate": 4.139379586324685e-06, "loss": 0.2242, "step": 183425 }, { "epoch": 1.8, "grad_norm": 10.902273178100586, "learning_rate": 4.139255463870437e-06, "loss": 0.234, "step": 183450 }, { "epoch": 1.8, "grad_norm": 5.745944499969482, "learning_rate": 4.139131341416188e-06, "loss": 0.182, "step": 183475 }, { "epoch": 1.8, "grad_norm": 12.19802474975586, "learning_rate": 4.13900721896194e-06, "loss": 0.2158, "step": 183500 }, { "epoch": 1.8, "grad_norm": 2.403404951095581, "learning_rate": 4.138883096507691e-06, "loss": 0.2059, "step": 183525 }, { "epoch": 1.8, "grad_norm": 13.965312957763672, "learning_rate": 4.138758974053442e-06, "loss": 0.2199, "step": 183550 }, { "epoch": 1.8, "grad_norm": 4.493752956390381, "learning_rate": 4.1386348515991945e-06, "loss": 0.1864, "step": 183575 }, { "epoch": 1.81, "grad_norm": 17.77336311340332, "learning_rate": 4.138510729144946e-06, "loss": 0.2672, "step": 183600 }, { "epoch": 1.81, "grad_norm": 1.4739705324172974, "learning_rate": 4.138386606690697e-06, "loss": 0.2125, "step": 183625 }, { "epoch": 1.81, "grad_norm": 11.826635360717773, "learning_rate": 4.138262484236449e-06, "loss": 0.2186, "step": 183650 }, { "epoch": 1.81, "grad_norm": 8.173539161682129, "learning_rate": 4.1381383617822e-06, "loss": 0.2273, "step": 183675 }, { "epoch": 1.81, "grad_norm": 13.794312477111816, "learning_rate": 4.138014239327951e-06, "loss": 0.2241, "step": 183700 }, { "epoch": 1.81, "grad_norm": 5.665229320526123, "learning_rate": 4.137890116873703e-06, "loss": 0.2167, "step": 183725 }, { "epoch": 1.81, "grad_norm": 6.254683971405029, "learning_rate": 4.137765994419455e-06, "loss": 0.1912, "step": 183750 }, { "epoch": 1.81, "grad_norm": 1.3212981224060059, "learning_rate": 4.137641871965206e-06, "loss": 0.2066, "step": 183775 }, { "epoch": 1.81, "grad_norm": 8.733804702758789, "learning_rate": 4.137517749510958e-06, "loss": 0.2423, "step": 183800 }, { "epoch": 1.81, "grad_norm": 0.4852055311203003, "learning_rate": 4.137393627056709e-06, "loss": 0.2749, "step": 183825 }, { "epoch": 1.81, "grad_norm": 15.91127872467041, "learning_rate": 4.137269504602461e-06, "loss": 0.2386, "step": 183850 }, { "epoch": 1.81, "grad_norm": 7.40143346786499, "learning_rate": 4.137145382148212e-06, "loss": 0.239, "step": 183875 }, { "epoch": 1.81, "grad_norm": 8.948089599609375, "learning_rate": 4.1370212596939644e-06, "loss": 0.2555, "step": 183900 }, { "epoch": 1.81, "grad_norm": 5.796048164367676, "learning_rate": 4.136897137239716e-06, "loss": 0.2361, "step": 183925 }, { "epoch": 1.81, "grad_norm": 18.90922737121582, "learning_rate": 4.136773014785468e-06, "loss": 0.3102, "step": 183950 }, { "epoch": 1.81, "grad_norm": 3.693444013595581, "learning_rate": 4.136648892331219e-06, "loss": 0.2046, "step": 183975 }, { "epoch": 1.81, "grad_norm": 11.073644638061523, "learning_rate": 4.13652476987697e-06, "loss": 0.2062, "step": 184000 }, { "epoch": 1.81, "grad_norm": 3.6498382091522217, "learning_rate": 4.136400647422722e-06, "loss": 0.2088, "step": 184025 }, { "epoch": 1.81, "grad_norm": 15.03309440612793, "learning_rate": 4.136276524968473e-06, "loss": 0.2838, "step": 184050 }, { "epoch": 1.81, "grad_norm": 5.8107500076293945, "learning_rate": 4.136152402514225e-06, "loss": 0.213, "step": 184075 }, { "epoch": 1.81, "grad_norm": 29.62744140625, "learning_rate": 4.136028280059976e-06, "loss": 0.197, "step": 184100 }, { "epoch": 1.81, "grad_norm": 5.226640224456787, "learning_rate": 4.135904157605728e-06, "loss": 0.2559, "step": 184125 }, { "epoch": 1.81, "grad_norm": 12.986268997192383, "learning_rate": 4.135780035151479e-06, "loss": 0.2209, "step": 184150 }, { "epoch": 1.81, "grad_norm": 8.210693359375, "learning_rate": 4.13565591269723e-06, "loss": 0.2009, "step": 184175 }, { "epoch": 1.81, "grad_norm": 17.184616088867188, "learning_rate": 4.135531790242982e-06, "loss": 0.2527, "step": 184200 }, { "epoch": 1.81, "grad_norm": 6.335649490356445, "learning_rate": 4.135407667788734e-06, "loss": 0.2131, "step": 184225 }, { "epoch": 1.81, "grad_norm": 19.530994415283203, "learning_rate": 4.135283545334486e-06, "loss": 0.2456, "step": 184250 }, { "epoch": 1.81, "grad_norm": 3.638468027114868, "learning_rate": 4.135164387778407e-06, "loss": 0.2042, "step": 184275 }, { "epoch": 1.81, "grad_norm": 10.192561149597168, "learning_rate": 4.135040265324159e-06, "loss": 0.2211, "step": 184300 }, { "epoch": 1.81, "grad_norm": 9.164064407348633, "learning_rate": 4.13491614286991e-06, "loss": 0.2003, "step": 184325 }, { "epoch": 1.81, "grad_norm": 11.385080337524414, "learning_rate": 4.134792020415662e-06, "loss": 0.2756, "step": 184350 }, { "epoch": 1.81, "grad_norm": 11.443388938903809, "learning_rate": 4.134667897961413e-06, "loss": 0.2599, "step": 184375 }, { "epoch": 1.81, "grad_norm": 12.692510604858398, "learning_rate": 4.134543775507165e-06, "loss": 0.2457, "step": 184400 }, { "epoch": 1.81, "grad_norm": 5.510866165161133, "learning_rate": 4.1344196530529165e-06, "loss": 0.2458, "step": 184425 }, { "epoch": 1.81, "grad_norm": 10.953751564025879, "learning_rate": 4.134295530598668e-06, "loss": 0.213, "step": 184450 }, { "epoch": 1.81, "grad_norm": 7.5775346755981445, "learning_rate": 4.13417140814442e-06, "loss": 0.2342, "step": 184475 }, { "epoch": 1.81, "grad_norm": 10.079689979553223, "learning_rate": 4.134047285690171e-06, "loss": 0.2793, "step": 184500 }, { "epoch": 1.81, "grad_norm": 6.563985824584961, "learning_rate": 4.133923163235922e-06, "loss": 0.2187, "step": 184525 }, { "epoch": 1.81, "grad_norm": 11.563149452209473, "learning_rate": 4.133799040781674e-06, "loss": 0.1909, "step": 184550 }, { "epoch": 1.81, "grad_norm": 6.261585712432861, "learning_rate": 4.1336749183274254e-06, "loss": 0.2075, "step": 184575 }, { "epoch": 1.82, "grad_norm": 20.508752822875977, "learning_rate": 4.133550795873177e-06, "loss": 0.2277, "step": 184600 }, { "epoch": 1.82, "grad_norm": 4.815083026885986, "learning_rate": 4.133426673418929e-06, "loss": 0.2064, "step": 184625 }, { "epoch": 1.82, "grad_norm": 15.831868171691895, "learning_rate": 4.13330255096468e-06, "loss": 0.3045, "step": 184650 }, { "epoch": 1.82, "grad_norm": 3.0103766918182373, "learning_rate": 4.133178428510431e-06, "loss": 0.2479, "step": 184675 }, { "epoch": 1.82, "grad_norm": 16.08658790588379, "learning_rate": 4.133054306056183e-06, "loss": 0.2053, "step": 184700 }, { "epoch": 1.82, "grad_norm": 8.095017433166504, "learning_rate": 4.132930183601934e-06, "loss": 0.1808, "step": 184725 }, { "epoch": 1.82, "grad_norm": 14.199189186096191, "learning_rate": 4.1328060611476865e-06, "loss": 0.2588, "step": 184750 }, { "epoch": 1.82, "grad_norm": 3.626000165939331, "learning_rate": 4.132681938693438e-06, "loss": 0.2116, "step": 184775 }, { "epoch": 1.82, "grad_norm": 14.137678146362305, "learning_rate": 4.13255781623919e-06, "loss": 0.2626, "step": 184800 }, { "epoch": 1.82, "grad_norm": 1.6442112922668457, "learning_rate": 4.132433693784941e-06, "loss": 0.2205, "step": 184825 }, { "epoch": 1.82, "grad_norm": 7.498824119567871, "learning_rate": 4.132309571330692e-06, "loss": 0.2225, "step": 184850 }, { "epoch": 1.82, "grad_norm": 2.9370458126068115, "learning_rate": 4.132185448876444e-06, "loss": 0.2314, "step": 184875 }, { "epoch": 1.82, "grad_norm": 13.909088134765625, "learning_rate": 4.132061326422195e-06, "loss": 0.1719, "step": 184900 }, { "epoch": 1.82, "grad_norm": 1.5486310720443726, "learning_rate": 4.131937203967947e-06, "loss": 0.2077, "step": 184925 }, { "epoch": 1.82, "grad_norm": 27.34657859802246, "learning_rate": 4.131813081513699e-06, "loss": 0.2791, "step": 184950 }, { "epoch": 1.82, "grad_norm": 3.085280418395996, "learning_rate": 4.13168895905945e-06, "loss": 0.1981, "step": 184975 }, { "epoch": 1.82, "grad_norm": 9.06177043914795, "learning_rate": 4.131564836605201e-06, "loss": 0.2136, "step": 185000 }, { "epoch": 1.82, "grad_norm": 5.619743824005127, "learning_rate": 4.131440714150953e-06, "loss": 0.2035, "step": 185025 }, { "epoch": 1.82, "grad_norm": 11.96342658996582, "learning_rate": 4.131316591696704e-06, "loss": 0.2283, "step": 185050 }, { "epoch": 1.82, "grad_norm": 2.8708624839782715, "learning_rate": 4.131192469242456e-06, "loss": 0.2552, "step": 185075 }, { "epoch": 1.82, "grad_norm": 12.675752639770508, "learning_rate": 4.131068346788208e-06, "loss": 0.231, "step": 185100 }, { "epoch": 1.82, "grad_norm": 4.030893325805664, "learning_rate": 4.130944224333959e-06, "loss": 0.2275, "step": 185125 }, { "epoch": 1.82, "grad_norm": 13.928082466125488, "learning_rate": 4.130820101879711e-06, "loss": 0.2446, "step": 185150 }, { "epoch": 1.82, "grad_norm": 6.862747669219971, "learning_rate": 4.130695979425462e-06, "loss": 0.2258, "step": 185175 }, { "epoch": 1.82, "grad_norm": 14.96605110168457, "learning_rate": 4.130571856971214e-06, "loss": 0.1987, "step": 185200 }, { "epoch": 1.82, "grad_norm": 5.687107086181641, "learning_rate": 4.130447734516965e-06, "loss": 0.2133, "step": 185225 }, { "epoch": 1.82, "grad_norm": 13.418437004089355, "learning_rate": 4.1303236120627175e-06, "loss": 0.2478, "step": 185250 }, { "epoch": 1.82, "grad_norm": 7.4611430168151855, "learning_rate": 4.130199489608469e-06, "loss": 0.2029, "step": 185275 }, { "epoch": 1.82, "grad_norm": 17.265941619873047, "learning_rate": 4.13007536715422e-06, "loss": 0.2456, "step": 185300 }, { "epoch": 1.82, "grad_norm": 4.1456685066223145, "learning_rate": 4.129951244699972e-06, "loss": 0.2396, "step": 185325 }, { "epoch": 1.82, "grad_norm": 24.818923950195312, "learning_rate": 4.129827122245723e-06, "loss": 0.2294, "step": 185350 }, { "epoch": 1.82, "grad_norm": 4.801368236541748, "learning_rate": 4.129702999791474e-06, "loss": 0.181, "step": 185375 }, { "epoch": 1.82, "grad_norm": 12.918869018554688, "learning_rate": 4.1295788773372264e-06, "loss": 0.2307, "step": 185400 }, { "epoch": 1.82, "grad_norm": 7.587204933166504, "learning_rate": 4.129454754882978e-06, "loss": 0.248, "step": 185425 }, { "epoch": 1.82, "grad_norm": 21.65411376953125, "learning_rate": 4.129330632428729e-06, "loss": 0.2594, "step": 185450 }, { "epoch": 1.82, "grad_norm": 4.778591156005859, "learning_rate": 4.129206509974481e-06, "loss": 0.2523, "step": 185475 }, { "epoch": 1.82, "grad_norm": 16.72121810913086, "learning_rate": 4.129082387520232e-06, "loss": 0.2492, "step": 185500 }, { "epoch": 1.82, "grad_norm": 6.12026309967041, "learning_rate": 4.128958265065983e-06, "loss": 0.195, "step": 185525 }, { "epoch": 1.82, "grad_norm": 13.29797077178955, "learning_rate": 4.128834142611735e-06, "loss": 0.258, "step": 185550 }, { "epoch": 1.82, "grad_norm": 2.3203787803649902, "learning_rate": 4.128710020157487e-06, "loss": 0.2971, "step": 185575 }, { "epoch": 1.82, "grad_norm": 11.8778715133667, "learning_rate": 4.128585897703239e-06, "loss": 0.2412, "step": 185600 }, { "epoch": 1.83, "grad_norm": 10.28148365020752, "learning_rate": 4.12846177524899e-06, "loss": 0.1912, "step": 185625 }, { "epoch": 1.83, "grad_norm": 13.246026039123535, "learning_rate": 4.128337652794742e-06, "loss": 0.268, "step": 185650 }, { "epoch": 1.83, "grad_norm": 7.312665939331055, "learning_rate": 4.128213530340493e-06, "loss": 0.2637, "step": 185675 }, { "epoch": 1.83, "grad_norm": 23.66227912902832, "learning_rate": 4.128089407886244e-06, "loss": 0.2169, "step": 185700 }, { "epoch": 1.83, "grad_norm": 3.308100938796997, "learning_rate": 4.127965285431996e-06, "loss": 0.2346, "step": 185725 }, { "epoch": 1.83, "grad_norm": 17.39434242248535, "learning_rate": 4.127841162977748e-06, "loss": 0.2556, "step": 185750 }, { "epoch": 1.83, "grad_norm": 4.669917583465576, "learning_rate": 4.127717040523499e-06, "loss": 0.2614, "step": 185775 }, { "epoch": 1.83, "grad_norm": 11.158219337463379, "learning_rate": 4.127592918069251e-06, "loss": 0.2388, "step": 185800 }, { "epoch": 1.83, "grad_norm": 1.9504178762435913, "learning_rate": 4.127468795615002e-06, "loss": 0.2385, "step": 185825 }, { "epoch": 1.83, "grad_norm": 15.30204963684082, "learning_rate": 4.127344673160753e-06, "loss": 0.2567, "step": 185850 }, { "epoch": 1.83, "grad_norm": 5.69773530960083, "learning_rate": 4.127220550706505e-06, "loss": 0.2151, "step": 185875 }, { "epoch": 1.83, "grad_norm": 13.339089393615723, "learning_rate": 4.127096428252257e-06, "loss": 0.2544, "step": 185900 }, { "epoch": 1.83, "grad_norm": 5.602242946624756, "learning_rate": 4.126972305798009e-06, "loss": 0.2045, "step": 185925 }, { "epoch": 1.83, "grad_norm": 9.11212158203125, "learning_rate": 4.12684818334376e-06, "loss": 0.1917, "step": 185950 }, { "epoch": 1.83, "grad_norm": 4.59731912612915, "learning_rate": 4.126724060889512e-06, "loss": 0.2512, "step": 185975 }, { "epoch": 1.83, "grad_norm": 12.890869140625, "learning_rate": 4.126599938435263e-06, "loss": 0.2078, "step": 186000 }, { "epoch": 1.83, "grad_norm": 7.2102742195129395, "learning_rate": 4.126475815981015e-06, "loss": 0.2484, "step": 186025 }, { "epoch": 1.83, "grad_norm": 18.246749877929688, "learning_rate": 4.126351693526766e-06, "loss": 0.246, "step": 186050 }, { "epoch": 1.83, "grad_norm": 5.6904096603393555, "learning_rate": 4.126227571072518e-06, "loss": 0.1963, "step": 186075 }, { "epoch": 1.83, "grad_norm": 5.179117202758789, "learning_rate": 4.12610344861827e-06, "loss": 0.1844, "step": 186100 }, { "epoch": 1.83, "grad_norm": 6.8474578857421875, "learning_rate": 4.125979326164021e-06, "loss": 0.2054, "step": 186125 }, { "epoch": 1.83, "grad_norm": 6.526378154754639, "learning_rate": 4.125855203709772e-06, "loss": 0.1937, "step": 186150 }, { "epoch": 1.83, "grad_norm": 8.31670093536377, "learning_rate": 4.125731081255524e-06, "loss": 0.2471, "step": 186175 }, { "epoch": 1.83, "grad_norm": 4.297574996948242, "learning_rate": 4.125606958801275e-06, "loss": 0.2968, "step": 186200 }, { "epoch": 1.83, "grad_norm": 3.910676956176758, "learning_rate": 4.1254828363470266e-06, "loss": 0.2147, "step": 186225 }, { "epoch": 1.83, "grad_norm": 21.06086540222168, "learning_rate": 4.125358713892779e-06, "loss": 0.2694, "step": 186250 }, { "epoch": 1.83, "grad_norm": 0.5667106509208679, "learning_rate": 4.12523459143853e-06, "loss": 0.1864, "step": 186275 }, { "epoch": 1.83, "grad_norm": 7.429973125457764, "learning_rate": 4.125110468984281e-06, "loss": 0.2145, "step": 186300 }, { "epoch": 1.83, "grad_norm": 3.6015007495880127, "learning_rate": 4.124986346530033e-06, "loss": 0.188, "step": 186325 }, { "epoch": 1.83, "grad_norm": 13.964130401611328, "learning_rate": 4.124862224075784e-06, "loss": 0.2597, "step": 186350 }, { "epoch": 1.83, "grad_norm": 1.4345096349716187, "learning_rate": 4.124738101621536e-06, "loss": 0.1709, "step": 186375 }, { "epoch": 1.83, "grad_norm": 8.45447826385498, "learning_rate": 4.124613979167288e-06, "loss": 0.2446, "step": 186400 }, { "epoch": 1.83, "grad_norm": 3.1514956951141357, "learning_rate": 4.12448985671304e-06, "loss": 0.1973, "step": 186425 }, { "epoch": 1.83, "grad_norm": 22.357147216796875, "learning_rate": 4.124365734258791e-06, "loss": 0.2877, "step": 186450 }, { "epoch": 1.83, "grad_norm": 4.414636611938477, "learning_rate": 4.124241611804542e-06, "loss": 0.2296, "step": 186475 }, { "epoch": 1.83, "grad_norm": 14.691603660583496, "learning_rate": 4.124117489350294e-06, "loss": 0.3037, "step": 186500 }, { "epoch": 1.83, "grad_norm": 8.392091751098633, "learning_rate": 4.123993366896045e-06, "loss": 0.2897, "step": 186525 }, { "epoch": 1.83, "grad_norm": 32.433189392089844, "learning_rate": 4.1238692444417966e-06, "loss": 0.2397, "step": 186550 }, { "epoch": 1.83, "grad_norm": 3.494432210922241, "learning_rate": 4.123745121987549e-06, "loss": 0.1971, "step": 186575 }, { "epoch": 1.83, "grad_norm": 10.63371467590332, "learning_rate": 4.1236209995333e-06, "loss": 0.2511, "step": 186600 }, { "epoch": 1.83, "grad_norm": 7.588599681854248, "learning_rate": 4.123496877079051e-06, "loss": 0.2317, "step": 186625 }, { "epoch": 1.84, "grad_norm": 14.320483207702637, "learning_rate": 4.123372754624803e-06, "loss": 0.1933, "step": 186650 }, { "epoch": 1.84, "grad_norm": 3.7566769123077393, "learning_rate": 4.123248632170554e-06, "loss": 0.1991, "step": 186675 }, { "epoch": 1.84, "grad_norm": 10.260422706604004, "learning_rate": 4.1231245097163055e-06, "loss": 0.2747, "step": 186700 }, { "epoch": 1.84, "grad_norm": 9.573831558227539, "learning_rate": 4.1230003872620576e-06, "loss": 0.1994, "step": 186725 }, { "epoch": 1.84, "grad_norm": 18.771175384521484, "learning_rate": 4.122876264807809e-06, "loss": 0.2194, "step": 186750 }, { "epoch": 1.84, "grad_norm": 5.8344645500183105, "learning_rate": 4.122752142353561e-06, "loss": 0.1972, "step": 186775 }, { "epoch": 1.84, "grad_norm": 18.91372299194336, "learning_rate": 4.122632984797482e-06, "loss": 0.2562, "step": 186800 }, { "epoch": 1.84, "grad_norm": 7.800936222076416, "learning_rate": 4.122508862343234e-06, "loss": 0.2682, "step": 186825 }, { "epoch": 1.84, "grad_norm": 18.89600944519043, "learning_rate": 4.122384739888985e-06, "loss": 0.2416, "step": 186850 }, { "epoch": 1.84, "grad_norm": 1.0808801651000977, "learning_rate": 4.122260617434737e-06, "loss": 0.1872, "step": 186875 }, { "epoch": 1.84, "grad_norm": 13.055283546447754, "learning_rate": 4.122136494980488e-06, "loss": 0.2384, "step": 186900 }, { "epoch": 1.84, "grad_norm": 6.677107810974121, "learning_rate": 4.1220123725262405e-06, "loss": 0.1949, "step": 186925 }, { "epoch": 1.84, "grad_norm": 26.11070442199707, "learning_rate": 4.121888250071992e-06, "loss": 0.2648, "step": 186950 }, { "epoch": 1.84, "grad_norm": 3.7111055850982666, "learning_rate": 4.121764127617743e-06, "loss": 0.195, "step": 186975 }, { "epoch": 1.84, "grad_norm": 13.807583808898926, "learning_rate": 4.121640005163494e-06, "loss": 0.2402, "step": 187000 }, { "epoch": 1.84, "grad_norm": 1.5478392839431763, "learning_rate": 4.121515882709246e-06, "loss": 0.2072, "step": 187025 }, { "epoch": 1.84, "grad_norm": 10.949018478393555, "learning_rate": 4.121391760254997e-06, "loss": 0.1805, "step": 187050 }, { "epoch": 1.84, "grad_norm": 10.304705619812012, "learning_rate": 4.121267637800749e-06, "loss": 0.2362, "step": 187075 }, { "epoch": 1.84, "grad_norm": 14.818578720092773, "learning_rate": 4.121143515346501e-06, "loss": 0.2101, "step": 187100 }, { "epoch": 1.84, "grad_norm": 6.8027544021606445, "learning_rate": 4.121019392892252e-06, "loss": 0.189, "step": 187125 }, { "epoch": 1.84, "grad_norm": 16.412614822387695, "learning_rate": 4.120895270438003e-06, "loss": 0.2638, "step": 187150 }, { "epoch": 1.84, "grad_norm": 6.601423740386963, "learning_rate": 4.120771147983755e-06, "loss": 0.2744, "step": 187175 }, { "epoch": 1.84, "grad_norm": 15.785601615905762, "learning_rate": 4.120647025529506e-06, "loss": 0.2104, "step": 187200 }, { "epoch": 1.84, "grad_norm": 2.2813711166381836, "learning_rate": 4.120522903075258e-06, "loss": 0.2354, "step": 187225 }, { "epoch": 1.84, "grad_norm": 10.042261123657227, "learning_rate": 4.12039878062101e-06, "loss": 0.2436, "step": 187250 }, { "epoch": 1.84, "grad_norm": 4.2942328453063965, "learning_rate": 4.120274658166762e-06, "loss": 0.2365, "step": 187275 }, { "epoch": 1.84, "grad_norm": 14.67687702178955, "learning_rate": 4.120150535712513e-06, "loss": 0.2865, "step": 187300 }, { "epoch": 1.84, "grad_norm": 9.514043807983398, "learning_rate": 4.120026413258265e-06, "loss": 0.1923, "step": 187325 }, { "epoch": 1.84, "grad_norm": 13.358392715454102, "learning_rate": 4.119902290804016e-06, "loss": 0.2829, "step": 187350 }, { "epoch": 1.84, "grad_norm": 10.284952163696289, "learning_rate": 4.119778168349767e-06, "loss": 0.2106, "step": 187375 }, { "epoch": 1.84, "grad_norm": 13.424773216247559, "learning_rate": 4.119654045895519e-06, "loss": 0.2641, "step": 187400 }, { "epoch": 1.84, "grad_norm": 4.573482513427734, "learning_rate": 4.119529923441271e-06, "loss": 0.2082, "step": 187425 }, { "epoch": 1.84, "grad_norm": 11.466211318969727, "learning_rate": 4.119405800987022e-06, "loss": 0.2512, "step": 187450 }, { "epoch": 1.84, "grad_norm": 4.848443031311035, "learning_rate": 4.119281678532774e-06, "loss": 0.2343, "step": 187475 }, { "epoch": 1.84, "grad_norm": 14.762880325317383, "learning_rate": 4.119157556078525e-06, "loss": 0.195, "step": 187500 }, { "epoch": 1.84, "grad_norm": 1.305351734161377, "learning_rate": 4.119033433624276e-06, "loss": 0.1746, "step": 187525 }, { "epoch": 1.84, "grad_norm": 12.886651992797852, "learning_rate": 4.118909311170028e-06, "loss": 0.2385, "step": 187550 }, { "epoch": 1.84, "grad_norm": 4.273967742919922, "learning_rate": 4.11878518871578e-06, "loss": 0.2201, "step": 187575 }, { "epoch": 1.84, "grad_norm": 12.108542442321777, "learning_rate": 4.118661066261531e-06, "loss": 0.2057, "step": 187600 }, { "epoch": 1.84, "grad_norm": 5.880010604858398, "learning_rate": 4.118536943807283e-06, "loss": 0.2566, "step": 187625 }, { "epoch": 1.85, "grad_norm": 12.004857063293457, "learning_rate": 4.118412821353034e-06, "loss": 0.2196, "step": 187650 }, { "epoch": 1.85, "grad_norm": 6.189697265625, "learning_rate": 4.118288698898786e-06, "loss": 0.2622, "step": 187675 }, { "epoch": 1.85, "grad_norm": 13.369511604309082, "learning_rate": 4.118164576444537e-06, "loss": 0.2388, "step": 187700 }, { "epoch": 1.85, "grad_norm": 5.653712272644043, "learning_rate": 4.118040453990289e-06, "loss": 0.1926, "step": 187725 }, { "epoch": 1.85, "grad_norm": 18.41616439819336, "learning_rate": 4.117916331536041e-06, "loss": 0.2277, "step": 187750 }, { "epoch": 1.85, "grad_norm": 6.549982070922852, "learning_rate": 4.117792209081793e-06, "loss": 0.1801, "step": 187775 }, { "epoch": 1.85, "grad_norm": 35.86118698120117, "learning_rate": 4.117668086627544e-06, "loss": 0.2167, "step": 187800 }, { "epoch": 1.85, "grad_norm": 4.632813930511475, "learning_rate": 4.117543964173295e-06, "loss": 0.2286, "step": 187825 }, { "epoch": 1.85, "grad_norm": 13.546357154846191, "learning_rate": 4.117419841719046e-06, "loss": 0.2485, "step": 187850 }, { "epoch": 1.85, "grad_norm": 3.1819422245025635, "learning_rate": 4.117295719264798e-06, "loss": 0.2193, "step": 187875 }, { "epoch": 1.85, "grad_norm": 17.36139678955078, "learning_rate": 4.11717159681055e-06, "loss": 0.2185, "step": 187900 }, { "epoch": 1.85, "grad_norm": 10.098389625549316, "learning_rate": 4.117047474356301e-06, "loss": 0.2315, "step": 187925 }, { "epoch": 1.85, "grad_norm": 4.677242755889893, "learning_rate": 4.116923351902053e-06, "loss": 0.2193, "step": 187950 }, { "epoch": 1.85, "grad_norm": 5.777592182159424, "learning_rate": 4.116799229447804e-06, "loss": 0.2364, "step": 187975 }, { "epoch": 1.85, "grad_norm": 14.103791236877441, "learning_rate": 4.116675106993555e-06, "loss": 0.2455, "step": 188000 }, { "epoch": 1.85, "grad_norm": 3.0443310737609863, "learning_rate": 4.116550984539307e-06, "loss": 0.2102, "step": 188025 }, { "epoch": 1.85, "grad_norm": 11.096477508544922, "learning_rate": 4.1164268620850585e-06, "loss": 0.1988, "step": 188050 }, { "epoch": 1.85, "grad_norm": 7.133997917175293, "learning_rate": 4.116302739630811e-06, "loss": 0.2418, "step": 188075 }, { "epoch": 1.85, "grad_norm": 11.225248336791992, "learning_rate": 4.116178617176562e-06, "loss": 0.2817, "step": 188100 }, { "epoch": 1.85, "grad_norm": 5.464417457580566, "learning_rate": 4.116054494722314e-06, "loss": 0.1458, "step": 188125 }, { "epoch": 1.85, "grad_norm": 14.684135437011719, "learning_rate": 4.115930372268065e-06, "loss": 0.2928, "step": 188150 }, { "epoch": 1.85, "grad_norm": 2.684753179550171, "learning_rate": 4.115806249813817e-06, "loss": 0.2325, "step": 188175 }, { "epoch": 1.85, "grad_norm": 15.303905487060547, "learning_rate": 4.115682127359568e-06, "loss": 0.1973, "step": 188200 }, { "epoch": 1.85, "grad_norm": 3.923550844192505, "learning_rate": 4.1155580049053196e-06, "loss": 0.194, "step": 188225 }, { "epoch": 1.85, "grad_norm": 15.865768432617188, "learning_rate": 4.115433882451072e-06, "loss": 0.2805, "step": 188250 }, { "epoch": 1.85, "grad_norm": 7.006770610809326, "learning_rate": 4.115309759996823e-06, "loss": 0.2207, "step": 188275 }, { "epoch": 1.85, "grad_norm": 18.153661727905273, "learning_rate": 4.115185637542574e-06, "loss": 0.2233, "step": 188300 }, { "epoch": 1.85, "grad_norm": 6.0647993087768555, "learning_rate": 4.115061515088326e-06, "loss": 0.2214, "step": 188325 }, { "epoch": 1.85, "grad_norm": 9.809272766113281, "learning_rate": 4.114937392634077e-06, "loss": 0.2534, "step": 188350 }, { "epoch": 1.85, "grad_norm": 4.888190746307373, "learning_rate": 4.1148132701798285e-06, "loss": 0.2056, "step": 188375 }, { "epoch": 1.85, "grad_norm": 11.73151683807373, "learning_rate": 4.114689147725581e-06, "loss": 0.2248, "step": 188400 }, { "epoch": 1.85, "grad_norm": 7.787087440490723, "learning_rate": 4.114565025271332e-06, "loss": 0.204, "step": 188425 }, { "epoch": 1.85, "grad_norm": 17.834075927734375, "learning_rate": 4.114440902817083e-06, "loss": 0.2177, "step": 188450 }, { "epoch": 1.85, "grad_norm": 3.515202283859253, "learning_rate": 4.114316780362835e-06, "loss": 0.217, "step": 188475 }, { "epoch": 1.85, "grad_norm": 18.55071449279785, "learning_rate": 4.114192657908586e-06, "loss": 0.2462, "step": 188500 }, { "epoch": 1.85, "grad_norm": 3.1979799270629883, "learning_rate": 4.114068535454338e-06, "loss": 0.2245, "step": 188525 }, { "epoch": 1.85, "grad_norm": 18.88746452331543, "learning_rate": 4.1139444130000895e-06, "loss": 0.2482, "step": 188550 }, { "epoch": 1.85, "grad_norm": 5.7725677490234375, "learning_rate": 4.113820290545842e-06, "loss": 0.2432, "step": 188575 }, { "epoch": 1.85, "grad_norm": 9.63758659362793, "learning_rate": 4.113696168091593e-06, "loss": 0.2388, "step": 188600 }, { "epoch": 1.85, "grad_norm": 3.669585943222046, "learning_rate": 4.113572045637345e-06, "loss": 0.2241, "step": 188625 }, { "epoch": 1.85, "grad_norm": 33.38948440551758, "learning_rate": 4.113447923183096e-06, "loss": 0.271, "step": 188650 }, { "epoch": 1.86, "grad_norm": 2.366847276687622, "learning_rate": 4.113323800728847e-06, "loss": 0.1916, "step": 188675 }, { "epoch": 1.86, "grad_norm": 18.01955223083496, "learning_rate": 4.1131996782745985e-06, "loss": 0.2364, "step": 188700 }, { "epoch": 1.86, "grad_norm": 5.082732677459717, "learning_rate": 4.1130755558203506e-06, "loss": 0.2338, "step": 188725 }, { "epoch": 1.86, "grad_norm": 17.04936981201172, "learning_rate": 4.112951433366102e-06, "loss": 0.2815, "step": 188750 }, { "epoch": 1.86, "grad_norm": 4.245606422424316, "learning_rate": 4.112827310911853e-06, "loss": 0.1911, "step": 188775 }, { "epoch": 1.86, "grad_norm": 19.624204635620117, "learning_rate": 4.112703188457605e-06, "loss": 0.2125, "step": 188800 }, { "epoch": 1.86, "grad_norm": 6.676952362060547, "learning_rate": 4.112579066003356e-06, "loss": 0.2711, "step": 188825 }, { "epoch": 1.86, "grad_norm": 19.221559524536133, "learning_rate": 4.112454943549108e-06, "loss": 0.2477, "step": 188850 }, { "epoch": 1.86, "grad_norm": 5.00746488571167, "learning_rate": 4.1123308210948595e-06, "loss": 0.2223, "step": 188875 }, { "epoch": 1.86, "grad_norm": 16.02627944946289, "learning_rate": 4.112206698640612e-06, "loss": 0.2378, "step": 188900 }, { "epoch": 1.86, "grad_norm": 5.662039756774902, "learning_rate": 4.112087541084533e-06, "loss": 0.2147, "step": 188925 }, { "epoch": 1.86, "grad_norm": 15.051515579223633, "learning_rate": 4.111963418630285e-06, "loss": 0.2308, "step": 188950 }, { "epoch": 1.86, "grad_norm": 9.032541275024414, "learning_rate": 4.111839296176036e-06, "loss": 0.1984, "step": 188975 }, { "epoch": 1.86, "grad_norm": 12.372365951538086, "learning_rate": 4.111715173721788e-06, "loss": 0.2449, "step": 189000 }, { "epoch": 1.86, "grad_norm": 7.038572788238525, "learning_rate": 4.111591051267539e-06, "loss": 0.2561, "step": 189025 }, { "epoch": 1.86, "grad_norm": 9.73936653137207, "learning_rate": 4.11146692881329e-06, "loss": 0.2054, "step": 189050 }, { "epoch": 1.86, "grad_norm": 8.193735122680664, "learning_rate": 4.1113428063590424e-06, "loss": 0.2207, "step": 189075 }, { "epoch": 1.86, "grad_norm": 12.595097541809082, "learning_rate": 4.111218683904794e-06, "loss": 0.1974, "step": 189100 }, { "epoch": 1.86, "grad_norm": 6.800817489624023, "learning_rate": 4.111094561450545e-06, "loss": 0.2402, "step": 189125 }, { "epoch": 1.86, "grad_norm": 14.97314167022705, "learning_rate": 4.110970438996297e-06, "loss": 0.2712, "step": 189150 }, { "epoch": 1.86, "grad_norm": 3.8741073608398438, "learning_rate": 4.110846316542048e-06, "loss": 0.2532, "step": 189175 }, { "epoch": 1.86, "grad_norm": 9.758520126342773, "learning_rate": 4.110722194087799e-06, "loss": 0.1938, "step": 189200 }, { "epoch": 1.86, "grad_norm": 4.632801532745361, "learning_rate": 4.110598071633551e-06, "loss": 0.2028, "step": 189225 }, { "epoch": 1.86, "grad_norm": 12.016132354736328, "learning_rate": 4.110473949179303e-06, "loss": 0.2196, "step": 189250 }, { "epoch": 1.86, "grad_norm": 2.8240723609924316, "learning_rate": 4.110349826725054e-06, "loss": 0.274, "step": 189275 }, { "epoch": 1.86, "grad_norm": 12.206131935119629, "learning_rate": 4.110225704270806e-06, "loss": 0.2261, "step": 189300 }, { "epoch": 1.86, "grad_norm": 6.4105353355407715, "learning_rate": 4.110101581816557e-06, "loss": 0.2364, "step": 189325 }, { "epoch": 1.86, "grad_norm": 13.013449668884277, "learning_rate": 4.109977459362309e-06, "loss": 0.2665, "step": 189350 }, { "epoch": 1.86, "grad_norm": 11.647180557250977, "learning_rate": 4.10985333690806e-06, "loss": 0.1827, "step": 189375 }, { "epoch": 1.86, "grad_norm": 15.057178497314453, "learning_rate": 4.109729214453812e-06, "loss": 0.222, "step": 189400 }, { "epoch": 1.86, "grad_norm": 12.204084396362305, "learning_rate": 4.109605091999564e-06, "loss": 0.24, "step": 189425 }, { "epoch": 1.86, "grad_norm": 15.854443550109863, "learning_rate": 4.109480969545315e-06, "loss": 0.1703, "step": 189450 }, { "epoch": 1.86, "grad_norm": 3.91530704498291, "learning_rate": 4.109356847091067e-06, "loss": 0.217, "step": 189475 }, { "epoch": 1.86, "grad_norm": 11.126153945922852, "learning_rate": 4.109232724636818e-06, "loss": 0.2079, "step": 189500 }, { "epoch": 1.86, "grad_norm": 9.474380493164062, "learning_rate": 4.109108602182569e-06, "loss": 0.259, "step": 189525 }, { "epoch": 1.86, "grad_norm": 13.43142318725586, "learning_rate": 4.108984479728321e-06, "loss": 0.2534, "step": 189550 }, { "epoch": 1.86, "grad_norm": 7.738866329193115, "learning_rate": 4.108860357274073e-06, "loss": 0.2258, "step": 189575 }, { "epoch": 1.86, "grad_norm": 15.610886573791504, "learning_rate": 4.108736234819824e-06, "loss": 0.2475, "step": 189600 }, { "epoch": 1.86, "grad_norm": 3.4854483604431152, "learning_rate": 4.108612112365576e-06, "loss": 0.1879, "step": 189625 }, { "epoch": 1.86, "grad_norm": 15.32413101196289, "learning_rate": 4.108487989911327e-06, "loss": 0.2531, "step": 189650 }, { "epoch": 1.86, "grad_norm": 4.48888635635376, "learning_rate": 4.108363867457078e-06, "loss": 0.2365, "step": 189675 }, { "epoch": 1.87, "grad_norm": 13.558401107788086, "learning_rate": 4.10823974500283e-06, "loss": 0.2522, "step": 189700 }, { "epoch": 1.87, "grad_norm": 4.765639781951904, "learning_rate": 4.1081156225485815e-06, "loss": 0.1978, "step": 189725 }, { "epoch": 1.87, "grad_norm": 6.69694185256958, "learning_rate": 4.107991500094334e-06, "loss": 0.2367, "step": 189750 }, { "epoch": 1.87, "grad_norm": 8.76771354675293, "learning_rate": 4.107867377640085e-06, "loss": 0.234, "step": 189775 }, { "epoch": 1.87, "grad_norm": 13.10852336883545, "learning_rate": 4.107743255185837e-06, "loss": 0.2458, "step": 189800 }, { "epoch": 1.87, "grad_norm": 0.8074016571044922, "learning_rate": 4.107619132731588e-06, "loss": 0.2163, "step": 189825 }, { "epoch": 1.87, "grad_norm": 13.524922370910645, "learning_rate": 4.10749501027734e-06, "loss": 0.2146, "step": 189850 }, { "epoch": 1.87, "grad_norm": 6.657472133636475, "learning_rate": 4.107370887823091e-06, "loss": 0.2516, "step": 189875 }, { "epoch": 1.87, "grad_norm": 9.62025260925293, "learning_rate": 4.1072467653688426e-06, "loss": 0.2362, "step": 189900 }, { "epoch": 1.87, "grad_norm": 2.4364123344421387, "learning_rate": 4.107122642914595e-06, "loss": 0.2347, "step": 189925 }, { "epoch": 1.87, "grad_norm": 9.7640962600708, "learning_rate": 4.106998520460346e-06, "loss": 0.192, "step": 189950 }, { "epoch": 1.87, "grad_norm": 4.859470844268799, "learning_rate": 4.106874398006097e-06, "loss": 0.1964, "step": 189975 }, { "epoch": 1.87, "grad_norm": 22.347993850708008, "learning_rate": 4.106750275551849e-06, "loss": 0.2365, "step": 190000 }, { "epoch": 1.87, "grad_norm": 1.080145001411438, "learning_rate": 4.1066261530976e-06, "loss": 0.1882, "step": 190025 }, { "epoch": 1.87, "grad_norm": 13.666328430175781, "learning_rate": 4.1065020306433515e-06, "loss": 0.2192, "step": 190050 }, { "epoch": 1.87, "grad_norm": 2.260122299194336, "learning_rate": 4.106377908189104e-06, "loss": 0.2561, "step": 190075 }, { "epoch": 1.87, "grad_norm": 13.173775672912598, "learning_rate": 4.106253785734855e-06, "loss": 0.2768, "step": 190100 }, { "epoch": 1.87, "grad_norm": 6.001003265380859, "learning_rate": 4.106129663280606e-06, "loss": 0.2149, "step": 190125 }, { "epoch": 1.87, "grad_norm": 19.94662094116211, "learning_rate": 4.106005540826358e-06, "loss": 0.2233, "step": 190150 }, { "epoch": 1.87, "grad_norm": 5.501991271972656, "learning_rate": 4.105881418372109e-06, "loss": 0.2333, "step": 190175 }, { "epoch": 1.87, "grad_norm": 18.678144454956055, "learning_rate": 4.105757295917861e-06, "loss": 0.2687, "step": 190200 }, { "epoch": 1.87, "grad_norm": 5.963401794433594, "learning_rate": 4.1056331734636126e-06, "loss": 0.2143, "step": 190225 }, { "epoch": 1.87, "grad_norm": 8.059005737304688, "learning_rate": 4.105509051009365e-06, "loss": 0.2835, "step": 190250 }, { "epoch": 1.87, "grad_norm": 3.8197271823883057, "learning_rate": 4.105384928555116e-06, "loss": 0.1737, "step": 190275 }, { "epoch": 1.87, "grad_norm": 8.2926664352417, "learning_rate": 4.105260806100867e-06, "loss": 0.1787, "step": 190300 }, { "epoch": 1.87, "grad_norm": 5.7133965492248535, "learning_rate": 4.105136683646619e-06, "loss": 0.2071, "step": 190325 }, { "epoch": 1.87, "grad_norm": 16.071849822998047, "learning_rate": 4.10501256119237e-06, "loss": 0.3058, "step": 190350 }, { "epoch": 1.87, "grad_norm": 2.700498580932617, "learning_rate": 4.1048884387381215e-06, "loss": 0.2257, "step": 190375 }, { "epoch": 1.87, "grad_norm": 17.56485939025879, "learning_rate": 4.1047643162838736e-06, "loss": 0.2994, "step": 190400 }, { "epoch": 1.87, "grad_norm": 9.47178840637207, "learning_rate": 4.104640193829625e-06, "loss": 0.1889, "step": 190425 }, { "epoch": 1.87, "grad_norm": 13.670602798461914, "learning_rate": 4.104516071375376e-06, "loss": 0.2237, "step": 190450 }, { "epoch": 1.87, "grad_norm": 6.250015735626221, "learning_rate": 4.104391948921128e-06, "loss": 0.2311, "step": 190475 }, { "epoch": 1.87, "grad_norm": 6.397035121917725, "learning_rate": 4.104267826466879e-06, "loss": 0.2598, "step": 190500 }, { "epoch": 1.87, "grad_norm": 3.7107796669006348, "learning_rate": 4.1041437040126305e-06, "loss": 0.225, "step": 190525 }, { "epoch": 1.87, "grad_norm": 16.305591583251953, "learning_rate": 4.1040195815583825e-06, "loss": 0.274, "step": 190550 }, { "epoch": 1.87, "grad_norm": 6.21029806137085, "learning_rate": 4.103895459104134e-06, "loss": 0.2075, "step": 190575 }, { "epoch": 1.87, "grad_norm": 11.697105407714844, "learning_rate": 4.103771336649886e-06, "loss": 0.2593, "step": 190600 }, { "epoch": 1.87, "grad_norm": 6.095695495605469, "learning_rate": 4.103647214195637e-06, "loss": 0.2389, "step": 190625 }, { "epoch": 1.87, "grad_norm": 21.597970962524414, "learning_rate": 4.103523091741389e-06, "loss": 0.2149, "step": 190650 }, { "epoch": 1.87, "grad_norm": 1.8822599649429321, "learning_rate": 4.10339896928714e-06, "loss": 0.2388, "step": 190675 }, { "epoch": 1.87, "grad_norm": 7.802606105804443, "learning_rate": 4.103274846832892e-06, "loss": 0.2846, "step": 190700 }, { "epoch": 1.88, "grad_norm": 5.904186725616455, "learning_rate": 4.1031507243786436e-06, "loss": 0.1801, "step": 190725 }, { "epoch": 1.88, "grad_norm": 12.684443473815918, "learning_rate": 4.103026601924395e-06, "loss": 0.2431, "step": 190750 }, { "epoch": 1.88, "grad_norm": 5.395748138427734, "learning_rate": 4.102902479470147e-06, "loss": 0.183, "step": 190775 }, { "epoch": 1.88, "grad_norm": 12.630407333374023, "learning_rate": 4.102778357015898e-06, "loss": 0.2695, "step": 190800 }, { "epoch": 1.88, "grad_norm": 7.145798683166504, "learning_rate": 4.102654234561649e-06, "loss": 0.1966, "step": 190825 }, { "epoch": 1.88, "grad_norm": 14.381424903869629, "learning_rate": 4.102530112107401e-06, "loss": 0.2546, "step": 190850 }, { "epoch": 1.88, "grad_norm": 2.660912275314331, "learning_rate": 4.1024059896531525e-06, "loss": 0.2234, "step": 190875 }, { "epoch": 1.88, "grad_norm": 18.39583396911621, "learning_rate": 4.102281867198904e-06, "loss": 0.2246, "step": 190900 }, { "epoch": 1.88, "grad_norm": 7.589085578918457, "learning_rate": 4.102157744744656e-06, "loss": 0.2281, "step": 190925 }, { "epoch": 1.88, "grad_norm": 13.160082817077637, "learning_rate": 4.102033622290407e-06, "loss": 0.2541, "step": 190950 }, { "epoch": 1.88, "grad_norm": 5.162865161895752, "learning_rate": 4.101909499836158e-06, "loss": 0.1987, "step": 190975 }, { "epoch": 1.88, "grad_norm": 15.352046012878418, "learning_rate": 4.10178537738191e-06, "loss": 0.2002, "step": 191000 }, { "epoch": 1.88, "grad_norm": 25.313953399658203, "learning_rate": 4.1016612549276615e-06, "loss": 0.2114, "step": 191025 }, { "epoch": 1.88, "grad_norm": 13.345174789428711, "learning_rate": 4.1015371324734135e-06, "loss": 0.2079, "step": 191050 }, { "epoch": 1.88, "grad_norm": 7.89125394821167, "learning_rate": 4.101413010019165e-06, "loss": 0.2445, "step": 191075 }, { "epoch": 1.88, "grad_norm": 9.551661491394043, "learning_rate": 4.101288887564917e-06, "loss": 0.2834, "step": 191100 }, { "epoch": 1.88, "grad_norm": 4.303422451019287, "learning_rate": 4.101169730008838e-06, "loss": 0.1984, "step": 191125 }, { "epoch": 1.88, "grad_norm": 15.420343399047852, "learning_rate": 4.10104560755459e-06, "loss": 0.1989, "step": 191150 }, { "epoch": 1.88, "grad_norm": 3.5025722980499268, "learning_rate": 4.100921485100341e-06, "loss": 0.2185, "step": 191175 }, { "epoch": 1.88, "grad_norm": 7.035303592681885, "learning_rate": 4.100797362646092e-06, "loss": 0.2232, "step": 191200 }, { "epoch": 1.88, "grad_norm": 2.5100257396698, "learning_rate": 4.100673240191844e-06, "loss": 0.1951, "step": 191225 }, { "epoch": 1.88, "grad_norm": 15.111530303955078, "learning_rate": 4.100549117737596e-06, "loss": 0.2498, "step": 191250 }, { "epoch": 1.88, "grad_norm": 7.199565410614014, "learning_rate": 4.100424995283347e-06, "loss": 0.1759, "step": 191275 }, { "epoch": 1.88, "grad_norm": 12.557143211364746, "learning_rate": 4.100300872829099e-06, "loss": 0.2066, "step": 191300 }, { "epoch": 1.88, "grad_norm": 10.188578605651855, "learning_rate": 4.10017675037485e-06, "loss": 0.2182, "step": 191325 }, { "epoch": 1.88, "grad_norm": 18.202470779418945, "learning_rate": 4.100052627920601e-06, "loss": 0.2563, "step": 191350 }, { "epoch": 1.88, "grad_norm": 9.723051071166992, "learning_rate": 4.099928505466353e-06, "loss": 0.1992, "step": 191375 }, { "epoch": 1.88, "grad_norm": 16.418020248413086, "learning_rate": 4.0998043830121046e-06, "loss": 0.2543, "step": 191400 }, { "epoch": 1.88, "grad_norm": 5.747104167938232, "learning_rate": 4.099680260557856e-06, "loss": 0.2484, "step": 191425 }, { "epoch": 1.88, "grad_norm": 15.444944381713867, "learning_rate": 4.099556138103608e-06, "loss": 0.3054, "step": 191450 }, { "epoch": 1.88, "grad_norm": 6.005319595336914, "learning_rate": 4.099432015649359e-06, "loss": 0.1934, "step": 191475 }, { "epoch": 1.88, "grad_norm": 16.68064308166504, "learning_rate": 4.099307893195111e-06, "loss": 0.1845, "step": 191500 }, { "epoch": 1.88, "grad_norm": 10.31750774383545, "learning_rate": 4.099183770740862e-06, "loss": 0.1749, "step": 191525 }, { "epoch": 1.88, "grad_norm": 9.42218017578125, "learning_rate": 4.099059648286614e-06, "loss": 0.2284, "step": 191550 }, { "epoch": 1.88, "grad_norm": 5.774571418762207, "learning_rate": 4.0989355258323656e-06, "loss": 0.1944, "step": 191575 }, { "epoch": 1.88, "grad_norm": 11.878005981445312, "learning_rate": 4.098811403378117e-06, "loss": 0.2417, "step": 191600 }, { "epoch": 1.88, "grad_norm": 7.205840110778809, "learning_rate": 4.098687280923869e-06, "loss": 0.2002, "step": 191625 }, { "epoch": 1.88, "grad_norm": 8.173971176147461, "learning_rate": 4.09856315846962e-06, "loss": 0.196, "step": 191650 }, { "epoch": 1.88, "grad_norm": 7.575217247009277, "learning_rate": 4.098439036015371e-06, "loss": 0.1893, "step": 191675 }, { "epoch": 1.88, "grad_norm": 10.747322082519531, "learning_rate": 4.098314913561123e-06, "loss": 0.2334, "step": 191700 }, { "epoch": 1.89, "grad_norm": 4.870541572570801, "learning_rate": 4.0981907911068745e-06, "loss": 0.2498, "step": 191725 }, { "epoch": 1.89, "grad_norm": 18.533275604248047, "learning_rate": 4.098066668652626e-06, "loss": 0.2134, "step": 191750 }, { "epoch": 1.89, "grad_norm": 4.411901473999023, "learning_rate": 4.097942546198378e-06, "loss": 0.2396, "step": 191775 }, { "epoch": 1.89, "grad_norm": 21.61622428894043, "learning_rate": 4.097818423744129e-06, "loss": 0.2113, "step": 191800 }, { "epoch": 1.89, "grad_norm": 4.601720809936523, "learning_rate": 4.097694301289881e-06, "loss": 0.1769, "step": 191825 }, { "epoch": 1.89, "grad_norm": 7.751491069793701, "learning_rate": 4.097570178835632e-06, "loss": 0.2171, "step": 191850 }, { "epoch": 1.89, "grad_norm": 3.5315630435943604, "learning_rate": 4.097446056381384e-06, "loss": 0.2356, "step": 191875 }, { "epoch": 1.89, "grad_norm": 17.015281677246094, "learning_rate": 4.0973219339271356e-06, "loss": 0.2541, "step": 191900 }, { "epoch": 1.89, "grad_norm": 6.8599443435668945, "learning_rate": 4.097197811472888e-06, "loss": 0.1974, "step": 191925 }, { "epoch": 1.89, "grad_norm": 12.675867080688477, "learning_rate": 4.097073689018639e-06, "loss": 0.2203, "step": 191950 }, { "epoch": 1.89, "grad_norm": 3.539612293243408, "learning_rate": 4.09694956656439e-06, "loss": 0.18, "step": 191975 }, { "epoch": 1.89, "grad_norm": 14.140581130981445, "learning_rate": 4.096825444110142e-06, "loss": 0.2507, "step": 192000 }, { "epoch": 1.89, "grad_norm": 6.346844673156738, "learning_rate": 4.096701321655893e-06, "loss": 0.2415, "step": 192025 }, { "epoch": 1.89, "grad_norm": 16.330379486083984, "learning_rate": 4.0965771992016445e-06, "loss": 0.2682, "step": 192050 }, { "epoch": 1.89, "grad_norm": 2.1564481258392334, "learning_rate": 4.096453076747397e-06, "loss": 0.1977, "step": 192075 }, { "epoch": 1.89, "grad_norm": 11.6528959274292, "learning_rate": 4.096328954293148e-06, "loss": 0.1912, "step": 192100 }, { "epoch": 1.89, "grad_norm": 4.863301753997803, "learning_rate": 4.096204831838899e-06, "loss": 0.2053, "step": 192125 }, { "epoch": 1.89, "grad_norm": 9.788936614990234, "learning_rate": 4.096080709384651e-06, "loss": 0.2204, "step": 192150 }, { "epoch": 1.89, "grad_norm": 3.7208001613616943, "learning_rate": 4.095956586930402e-06, "loss": 0.2226, "step": 192175 }, { "epoch": 1.89, "grad_norm": 11.827136993408203, "learning_rate": 4.0958324644761535e-06, "loss": 0.2716, "step": 192200 }, { "epoch": 1.89, "grad_norm": 0.8641197085380554, "learning_rate": 4.0957083420219055e-06, "loss": 0.1935, "step": 192225 }, { "epoch": 1.89, "grad_norm": 17.708293914794922, "learning_rate": 4.095584219567657e-06, "loss": 0.2445, "step": 192250 }, { "epoch": 1.89, "grad_norm": 1.847046136856079, "learning_rate": 4.095460097113409e-06, "loss": 0.1783, "step": 192275 }, { "epoch": 1.89, "grad_norm": 5.821805477142334, "learning_rate": 4.09533597465916e-06, "loss": 0.1945, "step": 192300 }, { "epoch": 1.89, "grad_norm": 6.520950794219971, "learning_rate": 4.095211852204912e-06, "loss": 0.1647, "step": 192325 }, { "epoch": 1.89, "grad_norm": 11.530355453491211, "learning_rate": 4.095087729750663e-06, "loss": 0.2473, "step": 192350 }, { "epoch": 1.89, "grad_norm": 6.573638916015625, "learning_rate": 4.094963607296415e-06, "loss": 0.2345, "step": 192375 }, { "epoch": 1.89, "grad_norm": 10.144694328308105, "learning_rate": 4.0948394848421666e-06, "loss": 0.2322, "step": 192400 }, { "epoch": 1.89, "grad_norm": 7.273835182189941, "learning_rate": 4.094715362387918e-06, "loss": 0.2384, "step": 192425 }, { "epoch": 1.89, "grad_norm": 15.792776107788086, "learning_rate": 4.094591239933669e-06, "loss": 0.2556, "step": 192450 }, { "epoch": 1.89, "grad_norm": 7.350399494171143, "learning_rate": 4.094467117479421e-06, "loss": 0.2325, "step": 192475 }, { "epoch": 1.89, "grad_norm": 16.034683227539062, "learning_rate": 4.094342995025172e-06, "loss": 0.1745, "step": 192500 }, { "epoch": 1.89, "grad_norm": 3.3401238918304443, "learning_rate": 4.0942188725709235e-06, "loss": 0.183, "step": 192525 }, { "epoch": 1.89, "grad_norm": 14.692127227783203, "learning_rate": 4.0940947501166755e-06, "loss": 0.1978, "step": 192550 }, { "epoch": 1.89, "grad_norm": 3.2192466259002686, "learning_rate": 4.093970627662427e-06, "loss": 0.2124, "step": 192575 }, { "epoch": 1.89, "grad_norm": 13.623434066772461, "learning_rate": 4.093846505208178e-06, "loss": 0.2594, "step": 192600 }, { "epoch": 1.89, "grad_norm": 1.0205143690109253, "learning_rate": 4.09372238275393e-06, "loss": 0.1799, "step": 192625 }, { "epoch": 1.89, "grad_norm": 14.045831680297852, "learning_rate": 4.093598260299681e-06, "loss": 0.2347, "step": 192650 }, { "epoch": 1.89, "grad_norm": 4.136122226715088, "learning_rate": 4.093474137845433e-06, "loss": 0.2284, "step": 192675 }, { "epoch": 1.89, "grad_norm": 15.234418869018555, "learning_rate": 4.0933500153911845e-06, "loss": 0.2289, "step": 192700 }, { "epoch": 1.89, "grad_norm": 5.126014709472656, "learning_rate": 4.0932258929369365e-06, "loss": 0.2095, "step": 192725 }, { "epoch": 1.9, "grad_norm": 15.450016975402832, "learning_rate": 4.093101770482688e-06, "loss": 0.2434, "step": 192750 }, { "epoch": 1.9, "grad_norm": 3.8818769454956055, "learning_rate": 4.09297764802844e-06, "loss": 0.2192, "step": 192775 }, { "epoch": 1.9, "grad_norm": 13.42275333404541, "learning_rate": 4.092853525574191e-06, "loss": 0.2538, "step": 192800 }, { "epoch": 1.9, "grad_norm": 6.642263412475586, "learning_rate": 4.092729403119942e-06, "loss": 0.2065, "step": 192825 }, { "epoch": 1.9, "grad_norm": 18.197265625, "learning_rate": 4.092605280665694e-06, "loss": 0.2284, "step": 192850 }, { "epoch": 1.9, "grad_norm": 4.742271423339844, "learning_rate": 4.0924811582114455e-06, "loss": 0.2389, "step": 192875 }, { "epoch": 1.9, "grad_norm": 7.625330448150635, "learning_rate": 4.092357035757197e-06, "loss": 0.2715, "step": 192900 }, { "epoch": 1.9, "grad_norm": 5.906366348266602, "learning_rate": 4.092232913302949e-06, "loss": 0.2426, "step": 192925 }, { "epoch": 1.9, "grad_norm": 17.5617618560791, "learning_rate": 4.0921087908487e-06, "loss": 0.208, "step": 192950 }, { "epoch": 1.9, "grad_norm": 5.817713260650635, "learning_rate": 4.091984668394451e-06, "loss": 0.2657, "step": 192975 }, { "epoch": 1.9, "grad_norm": 12.165047645568848, "learning_rate": 4.091860545940203e-06, "loss": 0.2402, "step": 193000 }, { "epoch": 1.9, "grad_norm": 6.157369613647461, "learning_rate": 4.0917364234859545e-06, "loss": 0.2489, "step": 193025 }, { "epoch": 1.9, "grad_norm": 15.62484073638916, "learning_rate": 4.091612301031706e-06, "loss": 0.2273, "step": 193050 }, { "epoch": 1.9, "grad_norm": 3.9123849868774414, "learning_rate": 4.091488178577458e-06, "loss": 0.2388, "step": 193075 }, { "epoch": 1.9, "grad_norm": 7.41642427444458, "learning_rate": 4.091364056123209e-06, "loss": 0.2233, "step": 193100 }, { "epoch": 1.9, "grad_norm": 1.884979486465454, "learning_rate": 4.091239933668961e-06, "loss": 0.2023, "step": 193125 }, { "epoch": 1.9, "grad_norm": 14.966012954711914, "learning_rate": 4.091115811214712e-06, "loss": 0.2225, "step": 193150 }, { "epoch": 1.9, "grad_norm": 10.26023006439209, "learning_rate": 4.090991688760464e-06, "loss": 0.2524, "step": 193175 }, { "epoch": 1.9, "grad_norm": 12.15097427368164, "learning_rate": 4.0908675663062155e-06, "loss": 0.2862, "step": 193200 }, { "epoch": 1.9, "grad_norm": 5.194722652435303, "learning_rate": 4.0907434438519676e-06, "loss": 0.2169, "step": 193225 }, { "epoch": 1.9, "grad_norm": 15.1734619140625, "learning_rate": 4.090619321397719e-06, "loss": 0.2499, "step": 193250 }, { "epoch": 1.9, "grad_norm": 3.7602109909057617, "learning_rate": 4.09049519894347e-06, "loss": 0.2319, "step": 193275 }, { "epoch": 1.9, "grad_norm": 17.836444854736328, "learning_rate": 4.090371076489221e-06, "loss": 0.2533, "step": 193300 }, { "epoch": 1.9, "grad_norm": 7.444974422454834, "learning_rate": 4.090246954034973e-06, "loss": 0.2392, "step": 193325 }, { "epoch": 1.9, "grad_norm": 13.010128021240234, "learning_rate": 4.0901228315807245e-06, "loss": 0.2338, "step": 193350 }, { "epoch": 1.9, "grad_norm": 1.6155354976654053, "learning_rate": 4.090003674024646e-06, "loss": 0.2017, "step": 193375 }, { "epoch": 1.9, "grad_norm": 15.784110069274902, "learning_rate": 4.0898795515703975e-06, "loss": 0.2587, "step": 193400 }, { "epoch": 1.9, "grad_norm": 6.411750793457031, "learning_rate": 4.089755429116149e-06, "loss": 0.2138, "step": 193425 }, { "epoch": 1.9, "grad_norm": 14.29036808013916, "learning_rate": 4.089631306661901e-06, "loss": 0.2593, "step": 193450 }, { "epoch": 1.9, "grad_norm": 3.6018731594085693, "learning_rate": 4.089507184207652e-06, "loss": 0.2096, "step": 193475 }, { "epoch": 1.9, "grad_norm": 17.4273681640625, "learning_rate": 4.089383061753403e-06, "loss": 0.2346, "step": 193500 }, { "epoch": 1.9, "grad_norm": 15.517156600952148, "learning_rate": 4.089258939299155e-06, "loss": 0.2305, "step": 193525 }, { "epoch": 1.9, "grad_norm": 14.322904586791992, "learning_rate": 4.0891348168449065e-06, "loss": 0.2221, "step": 193550 }, { "epoch": 1.9, "grad_norm": 0.49458593130111694, "learning_rate": 4.0890106943906586e-06, "loss": 0.1922, "step": 193575 }, { "epoch": 1.9, "grad_norm": 10.395139694213867, "learning_rate": 4.08888657193641e-06, "loss": 0.2648, "step": 193600 }, { "epoch": 1.9, "grad_norm": 4.298849582672119, "learning_rate": 4.088762449482162e-06, "loss": 0.206, "step": 193625 }, { "epoch": 1.9, "grad_norm": 12.886338233947754, "learning_rate": 4.088638327027913e-06, "loss": 0.2086, "step": 193650 }, { "epoch": 1.9, "grad_norm": 6.576738357543945, "learning_rate": 4.088514204573665e-06, "loss": 0.1884, "step": 193675 }, { "epoch": 1.9, "grad_norm": 12.484156608581543, "learning_rate": 4.088390082119416e-06, "loss": 0.2146, "step": 193700 }, { "epoch": 1.9, "grad_norm": 5.102383613586426, "learning_rate": 4.0882659596651675e-06, "loss": 0.2319, "step": 193725 }, { "epoch": 1.9, "grad_norm": 17.5933895111084, "learning_rate": 4.08814183721092e-06, "loss": 0.2316, "step": 193750 }, { "epoch": 1.91, "grad_norm": 6.528173923492432, "learning_rate": 4.088017714756671e-06, "loss": 0.235, "step": 193775 }, { "epoch": 1.91, "grad_norm": 12.163738250732422, "learning_rate": 4.087893592302422e-06, "loss": 0.2417, "step": 193800 }, { "epoch": 1.91, "grad_norm": 10.632610321044922, "learning_rate": 4.087769469848174e-06, "loss": 0.2692, "step": 193825 }, { "epoch": 1.91, "grad_norm": 12.522893905639648, "learning_rate": 4.087645347393925e-06, "loss": 0.2463, "step": 193850 }, { "epoch": 1.91, "grad_norm": 4.652920246124268, "learning_rate": 4.0875212249396765e-06, "loss": 0.24, "step": 193875 }, { "epoch": 1.91, "grad_norm": 18.380447387695312, "learning_rate": 4.087397102485428e-06, "loss": 0.2092, "step": 193900 }, { "epoch": 1.91, "grad_norm": 0.855620265007019, "learning_rate": 4.08727298003118e-06, "loss": 0.1924, "step": 193925 }, { "epoch": 1.91, "grad_norm": 12.254443168640137, "learning_rate": 4.087148857576931e-06, "loss": 0.2502, "step": 193950 }, { "epoch": 1.91, "grad_norm": 8.252685546875, "learning_rate": 4.087024735122683e-06, "loss": 0.215, "step": 193975 }, { "epoch": 1.91, "grad_norm": 17.90770721435547, "learning_rate": 4.086900612668434e-06, "loss": 0.2554, "step": 194000 }, { "epoch": 1.91, "grad_norm": 5.661880016326904, "learning_rate": 4.086776490214186e-06, "loss": 0.1914, "step": 194025 }, { "epoch": 1.91, "grad_norm": 18.260271072387695, "learning_rate": 4.0866523677599375e-06, "loss": 0.2583, "step": 194050 }, { "epoch": 1.91, "grad_norm": 6.949002265930176, "learning_rate": 4.0865282453056896e-06, "loss": 0.2126, "step": 194075 }, { "epoch": 1.91, "grad_norm": 23.52530860900879, "learning_rate": 4.086404122851441e-06, "loss": 0.293, "step": 194100 }, { "epoch": 1.91, "grad_norm": 5.029995918273926, "learning_rate": 4.086280000397192e-06, "loss": 0.2242, "step": 194125 }, { "epoch": 1.91, "grad_norm": 17.801549911499023, "learning_rate": 4.086155877942944e-06, "loss": 0.2055, "step": 194150 }, { "epoch": 1.91, "grad_norm": 5.634109973907471, "learning_rate": 4.086031755488695e-06, "loss": 0.2431, "step": 194175 }, { "epoch": 1.91, "grad_norm": 17.02985954284668, "learning_rate": 4.0859076330344465e-06, "loss": 0.2587, "step": 194200 }, { "epoch": 1.91, "grad_norm": 8.615287780761719, "learning_rate": 4.0857835105801985e-06, "loss": 0.232, "step": 194225 }, { "epoch": 1.91, "grad_norm": 12.611397743225098, "learning_rate": 4.08565938812595e-06, "loss": 0.2579, "step": 194250 }, { "epoch": 1.91, "grad_norm": 4.404827117919922, "learning_rate": 4.085535265671701e-06, "loss": 0.287, "step": 194275 }, { "epoch": 1.91, "grad_norm": 15.44020938873291, "learning_rate": 4.085411143217453e-06, "loss": 0.2666, "step": 194300 }, { "epoch": 1.91, "grad_norm": 3.810396432876587, "learning_rate": 4.085287020763204e-06, "loss": 0.2067, "step": 194325 }, { "epoch": 1.91, "grad_norm": 13.006507873535156, "learning_rate": 4.0851628983089554e-06, "loss": 0.2579, "step": 194350 }, { "epoch": 1.91, "grad_norm": 4.020456314086914, "learning_rate": 4.0850387758547075e-06, "loss": 0.212, "step": 194375 }, { "epoch": 1.91, "grad_norm": 14.519392013549805, "learning_rate": 4.084914653400459e-06, "loss": 0.2421, "step": 194400 }, { "epoch": 1.91, "grad_norm": 0.43494290113449097, "learning_rate": 4.084790530946211e-06, "loss": 0.1893, "step": 194425 }, { "epoch": 1.91, "grad_norm": 10.133535385131836, "learning_rate": 4.084666408491962e-06, "loss": 0.2534, "step": 194450 }, { "epoch": 1.91, "grad_norm": 3.2326366901397705, "learning_rate": 4.084542286037714e-06, "loss": 0.2231, "step": 194475 }, { "epoch": 1.91, "grad_norm": 11.157730102539062, "learning_rate": 4.084418163583465e-06, "loss": 0.2526, "step": 194500 }, { "epoch": 1.91, "grad_norm": 9.893603324890137, "learning_rate": 4.084294041129217e-06, "loss": 0.2047, "step": 194525 }, { "epoch": 1.91, "grad_norm": 11.256644248962402, "learning_rate": 4.0841699186749685e-06, "loss": 0.2553, "step": 194550 }, { "epoch": 1.91, "grad_norm": 3.7772631645202637, "learning_rate": 4.08404579622072e-06, "loss": 0.2296, "step": 194575 }, { "epoch": 1.91, "grad_norm": 14.83610725402832, "learning_rate": 4.083921673766472e-06, "loss": 0.2196, "step": 194600 }, { "epoch": 1.91, "grad_norm": 3.9419796466827393, "learning_rate": 4.083797551312223e-06, "loss": 0.2339, "step": 194625 }, { "epoch": 1.91, "grad_norm": 17.66415786743164, "learning_rate": 4.083673428857974e-06, "loss": 0.2284, "step": 194650 }, { "epoch": 1.91, "grad_norm": 5.585566997528076, "learning_rate": 4.083549306403726e-06, "loss": 0.1612, "step": 194675 }, { "epoch": 1.91, "grad_norm": 14.039464950561523, "learning_rate": 4.0834251839494775e-06, "loss": 0.2106, "step": 194700 }, { "epoch": 1.91, "grad_norm": 6.3270440101623535, "learning_rate": 4.083301061495229e-06, "loss": 0.2044, "step": 194725 }, { "epoch": 1.91, "grad_norm": 10.703412055969238, "learning_rate": 4.083176939040981e-06, "loss": 0.2392, "step": 194750 }, { "epoch": 1.92, "grad_norm": 20.586849212646484, "learning_rate": 4.083052816586732e-06, "loss": 0.2543, "step": 194775 }, { "epoch": 1.92, "grad_norm": 18.39598274230957, "learning_rate": 4.082928694132484e-06, "loss": 0.2623, "step": 194800 }, { "epoch": 1.92, "grad_norm": 5.271079063415527, "learning_rate": 4.082804571678235e-06, "loss": 0.1902, "step": 194825 }, { "epoch": 1.92, "grad_norm": 13.591976165771484, "learning_rate": 4.082680449223987e-06, "loss": 0.1953, "step": 194850 }, { "epoch": 1.92, "grad_norm": 7.097264289855957, "learning_rate": 4.0825563267697385e-06, "loss": 0.2294, "step": 194875 }, { "epoch": 1.92, "grad_norm": 13.470344543457031, "learning_rate": 4.08243220431549e-06, "loss": 0.2728, "step": 194900 }, { "epoch": 1.92, "grad_norm": 1.722930908203125, "learning_rate": 4.082308081861242e-06, "loss": 0.2155, "step": 194925 }, { "epoch": 1.92, "grad_norm": 17.02869987487793, "learning_rate": 4.082183959406993e-06, "loss": 0.2771, "step": 194950 }, { "epoch": 1.92, "grad_norm": 9.59481143951416, "learning_rate": 4.082059836952744e-06, "loss": 0.1844, "step": 194975 }, { "epoch": 1.92, "grad_norm": 6.7860565185546875, "learning_rate": 4.081935714498496e-06, "loss": 0.1902, "step": 195000 }, { "epoch": 1.92, "grad_norm": 3.7720632553100586, "learning_rate": 4.0818115920442475e-06, "loss": 0.2449, "step": 195025 }, { "epoch": 1.92, "grad_norm": 12.005887985229492, "learning_rate": 4.081687469589999e-06, "loss": 0.2312, "step": 195050 }, { "epoch": 1.92, "grad_norm": 1.9296497106552124, "learning_rate": 4.081563347135751e-06, "loss": 0.1976, "step": 195075 }, { "epoch": 1.92, "grad_norm": 16.439014434814453, "learning_rate": 4.081439224681502e-06, "loss": 0.2194, "step": 195100 }, { "epoch": 1.92, "grad_norm": 7.013436317443848, "learning_rate": 4.081315102227253e-06, "loss": 0.2534, "step": 195125 }, { "epoch": 1.92, "grad_norm": 21.389328002929688, "learning_rate": 4.081190979773005e-06, "loss": 0.2227, "step": 195150 }, { "epoch": 1.92, "grad_norm": 7.8345160484313965, "learning_rate": 4.0810668573187564e-06, "loss": 0.2374, "step": 195175 }, { "epoch": 1.92, "grad_norm": 13.24985122680664, "learning_rate": 4.0809427348645085e-06, "loss": 0.2074, "step": 195200 }, { "epoch": 1.92, "grad_norm": 8.189187049865723, "learning_rate": 4.08081861241026e-06, "loss": 0.2473, "step": 195225 }, { "epoch": 1.92, "grad_norm": 12.802339553833008, "learning_rate": 4.080694489956012e-06, "loss": 0.2881, "step": 195250 }, { "epoch": 1.92, "grad_norm": 9.135278701782227, "learning_rate": 4.080570367501763e-06, "loss": 0.2, "step": 195275 }, { "epoch": 1.92, "grad_norm": 12.00957202911377, "learning_rate": 4.080446245047515e-06, "loss": 0.2117, "step": 195300 }, { "epoch": 1.92, "grad_norm": 11.096861839294434, "learning_rate": 4.080322122593266e-06, "loss": 0.2477, "step": 195325 }, { "epoch": 1.92, "grad_norm": 12.281984329223633, "learning_rate": 4.0801980001390174e-06, "loss": 0.2535, "step": 195350 }, { "epoch": 1.92, "grad_norm": 2.000483989715576, "learning_rate": 4.0800738776847695e-06, "loss": 0.2247, "step": 195375 }, { "epoch": 1.92, "grad_norm": 12.828612327575684, "learning_rate": 4.079949755230521e-06, "loss": 0.2021, "step": 195400 }, { "epoch": 1.92, "grad_norm": 2.5707855224609375, "learning_rate": 4.079825632776272e-06, "loss": 0.2073, "step": 195425 }, { "epoch": 1.92, "grad_norm": 14.588132858276367, "learning_rate": 4.079701510322024e-06, "loss": 0.2713, "step": 195450 }, { "epoch": 1.92, "grad_norm": 5.7022705078125, "learning_rate": 4.079577387867775e-06, "loss": 0.2328, "step": 195475 }, { "epoch": 1.92, "grad_norm": 9.5162992477417, "learning_rate": 4.079453265413526e-06, "loss": 0.269, "step": 195500 }, { "epoch": 1.92, "grad_norm": 5.822170734405518, "learning_rate": 4.0793291429592785e-06, "loss": 0.2111, "step": 195525 }, { "epoch": 1.92, "grad_norm": 15.787132263183594, "learning_rate": 4.07920502050503e-06, "loss": 0.2724, "step": 195550 }, { "epoch": 1.92, "grad_norm": 15.32539176940918, "learning_rate": 4.079080898050781e-06, "loss": 0.2107, "step": 195575 }, { "epoch": 1.92, "grad_norm": 13.178091049194336, "learning_rate": 4.078956775596533e-06, "loss": 0.2182, "step": 195600 }, { "epoch": 1.92, "grad_norm": 7.601516246795654, "learning_rate": 4.078832653142284e-06, "loss": 0.2074, "step": 195625 }, { "epoch": 1.92, "grad_norm": 12.684112548828125, "learning_rate": 4.078708530688036e-06, "loss": 0.2398, "step": 195650 }, { "epoch": 1.92, "grad_norm": 0.030666019767522812, "learning_rate": 4.0785844082337874e-06, "loss": 0.1881, "step": 195675 }, { "epoch": 1.92, "grad_norm": 19.58990478515625, "learning_rate": 4.0784602857795395e-06, "loss": 0.2141, "step": 195700 }, { "epoch": 1.92, "grad_norm": 4.361144542694092, "learning_rate": 4.078336163325291e-06, "loss": 0.2074, "step": 195725 }, { "epoch": 1.92, "grad_norm": 15.305939674377441, "learning_rate": 4.078212040871042e-06, "loss": 0.2481, "step": 195750 }, { "epoch": 1.92, "grad_norm": 8.03589153289795, "learning_rate": 4.078087918416794e-06, "loss": 0.2714, "step": 195775 }, { "epoch": 1.93, "grad_norm": 16.402124404907227, "learning_rate": 4.077963795962545e-06, "loss": 0.2556, "step": 195800 }, { "epoch": 1.93, "grad_norm": 2.3607892990112305, "learning_rate": 4.077839673508296e-06, "loss": 0.2595, "step": 195825 }, { "epoch": 1.93, "grad_norm": 10.09197998046875, "learning_rate": 4.0777155510540485e-06, "loss": 0.2329, "step": 195850 }, { "epoch": 1.93, "grad_norm": 6.515467643737793, "learning_rate": 4.0775914285998e-06, "loss": 0.2308, "step": 195875 }, { "epoch": 1.93, "grad_norm": 7.570314884185791, "learning_rate": 4.077467306145551e-06, "loss": 0.2133, "step": 195900 }, { "epoch": 1.93, "grad_norm": 4.936968803405762, "learning_rate": 4.077343183691303e-06, "loss": 0.2117, "step": 195925 }, { "epoch": 1.93, "grad_norm": 8.256658554077148, "learning_rate": 4.077219061237054e-06, "loss": 0.2646, "step": 195950 }, { "epoch": 1.93, "grad_norm": 5.633825302124023, "learning_rate": 4.077099903680976e-06, "loss": 0.1955, "step": 195975 }, { "epoch": 1.93, "grad_norm": 17.281484603881836, "learning_rate": 4.076975781226727e-06, "loss": 0.2199, "step": 196000 }, { "epoch": 1.93, "grad_norm": 6.838735580444336, "learning_rate": 4.0768516587724784e-06, "loss": 0.2018, "step": 196025 }, { "epoch": 1.93, "grad_norm": 15.333843231201172, "learning_rate": 4.0767275363182305e-06, "loss": 0.2568, "step": 196050 }, { "epoch": 1.93, "grad_norm": 8.311638832092285, "learning_rate": 4.076603413863982e-06, "loss": 0.2088, "step": 196075 }, { "epoch": 1.93, "grad_norm": 14.189513206481934, "learning_rate": 4.076479291409734e-06, "loss": 0.239, "step": 196100 }, { "epoch": 1.93, "grad_norm": 8.221074104309082, "learning_rate": 4.076355168955485e-06, "loss": 0.2119, "step": 196125 }, { "epoch": 1.93, "grad_norm": 14.551076889038086, "learning_rate": 4.076231046501237e-06, "loss": 0.2695, "step": 196150 }, { "epoch": 1.93, "grad_norm": 5.150758266448975, "learning_rate": 4.076106924046988e-06, "loss": 0.1872, "step": 196175 }, { "epoch": 1.93, "grad_norm": 20.385473251342773, "learning_rate": 4.0759828015927395e-06, "loss": 0.2671, "step": 196200 }, { "epoch": 1.93, "grad_norm": 6.443561553955078, "learning_rate": 4.0758586791384915e-06, "loss": 0.2208, "step": 196225 }, { "epoch": 1.93, "grad_norm": 8.432026863098145, "learning_rate": 4.075734556684243e-06, "loss": 0.3214, "step": 196250 }, { "epoch": 1.93, "grad_norm": 5.036821365356445, "learning_rate": 4.075610434229994e-06, "loss": 0.2375, "step": 196275 }, { "epoch": 1.93, "grad_norm": 17.622703552246094, "learning_rate": 4.075486311775746e-06, "loss": 0.2593, "step": 196300 }, { "epoch": 1.93, "grad_norm": 7.413635730743408, "learning_rate": 4.075362189321497e-06, "loss": 0.2179, "step": 196325 }, { "epoch": 1.93, "grad_norm": 11.193950653076172, "learning_rate": 4.0752380668672484e-06, "loss": 0.2689, "step": 196350 }, { "epoch": 1.93, "grad_norm": 11.352072715759277, "learning_rate": 4.0751139444130005e-06, "loss": 0.2327, "step": 196375 }, { "epoch": 1.93, "grad_norm": 10.865532875061035, "learning_rate": 4.074989821958752e-06, "loss": 0.2064, "step": 196400 }, { "epoch": 1.93, "grad_norm": 5.273678779602051, "learning_rate": 4.074865699504503e-06, "loss": 0.3034, "step": 196425 }, { "epoch": 1.93, "grad_norm": 10.055123329162598, "learning_rate": 4.074741577050255e-06, "loss": 0.1911, "step": 196450 }, { "epoch": 1.93, "grad_norm": 0.5967544317245483, "learning_rate": 4.074617454596006e-06, "loss": 0.2377, "step": 196475 }, { "epoch": 1.93, "grad_norm": 7.695674896240234, "learning_rate": 4.074493332141758e-06, "loss": 0.2069, "step": 196500 }, { "epoch": 1.93, "grad_norm": 3.8415908813476562, "learning_rate": 4.0743692096875094e-06, "loss": 0.1831, "step": 196525 }, { "epoch": 1.93, "grad_norm": 8.772043228149414, "learning_rate": 4.0742450872332615e-06, "loss": 0.2438, "step": 196550 }, { "epoch": 1.93, "grad_norm": 9.624236106872559, "learning_rate": 4.074120964779013e-06, "loss": 0.2068, "step": 196575 }, { "epoch": 1.93, "grad_norm": 15.225183486938477, "learning_rate": 4.073996842324765e-06, "loss": 0.2187, "step": 196600 }, { "epoch": 1.93, "grad_norm": 1.14768385887146, "learning_rate": 4.073872719870516e-06, "loss": 0.1978, "step": 196625 }, { "epoch": 1.93, "grad_norm": 12.301800727844238, "learning_rate": 4.073748597416267e-06, "loss": 0.2189, "step": 196650 }, { "epoch": 1.93, "grad_norm": 3.7045137882232666, "learning_rate": 4.073624474962019e-06, "loss": 0.2473, "step": 196675 }, { "epoch": 1.93, "grad_norm": 7.579292297363281, "learning_rate": 4.0735003525077705e-06, "loss": 0.2423, "step": 196700 }, { "epoch": 1.93, "grad_norm": 6.956913471221924, "learning_rate": 4.073376230053522e-06, "loss": 0.2098, "step": 196725 }, { "epoch": 1.93, "grad_norm": 12.920439720153809, "learning_rate": 4.073252107599274e-06, "loss": 0.2092, "step": 196750 }, { "epoch": 1.93, "grad_norm": 4.105006217956543, "learning_rate": 4.073127985145025e-06, "loss": 0.2153, "step": 196775 }, { "epoch": 1.93, "grad_norm": 41.07407760620117, "learning_rate": 4.073003862690776e-06, "loss": 0.2167, "step": 196800 }, { "epoch": 1.94, "grad_norm": 5.888011932373047, "learning_rate": 4.072879740236528e-06, "loss": 0.2037, "step": 196825 }, { "epoch": 1.94, "grad_norm": 15.028337478637695, "learning_rate": 4.0727556177822794e-06, "loss": 0.1758, "step": 196850 }, { "epoch": 1.94, "grad_norm": 13.24609375, "learning_rate": 4.072631495328031e-06, "loss": 0.2209, "step": 196875 }, { "epoch": 1.94, "grad_norm": 9.211795806884766, "learning_rate": 4.072507372873783e-06, "loss": 0.1785, "step": 196900 }, { "epoch": 1.94, "grad_norm": 7.5579304695129395, "learning_rate": 4.072383250419534e-06, "loss": 0.2312, "step": 196925 }, { "epoch": 1.94, "grad_norm": 15.037684440612793, "learning_rate": 4.072259127965286e-06, "loss": 0.1989, "step": 196950 }, { "epoch": 1.94, "grad_norm": 4.277868270874023, "learning_rate": 4.072135005511037e-06, "loss": 0.2157, "step": 196975 }, { "epoch": 1.94, "grad_norm": 6.948558807373047, "learning_rate": 4.072010883056789e-06, "loss": 0.2354, "step": 197000 }, { "epoch": 1.94, "grad_norm": 9.603360176086426, "learning_rate": 4.0718867606025405e-06, "loss": 0.2277, "step": 197025 }, { "epoch": 1.94, "grad_norm": 13.423871994018555, "learning_rate": 4.071762638148292e-06, "loss": 0.2424, "step": 197050 }, { "epoch": 1.94, "grad_norm": 6.922965049743652, "learning_rate": 4.071638515694044e-06, "loss": 0.2267, "step": 197075 }, { "epoch": 1.94, "grad_norm": 11.228058815002441, "learning_rate": 4.071514393239795e-06, "loss": 0.2031, "step": 197100 }, { "epoch": 1.94, "grad_norm": 0.523689329624176, "learning_rate": 4.071390270785546e-06, "loss": 0.2063, "step": 197125 }, { "epoch": 1.94, "grad_norm": 17.650943756103516, "learning_rate": 4.071266148331298e-06, "loss": 0.2716, "step": 197150 }, { "epoch": 1.94, "grad_norm": 7.532177448272705, "learning_rate": 4.071142025877049e-06, "loss": 0.1885, "step": 197175 }, { "epoch": 1.94, "grad_norm": 10.720198631286621, "learning_rate": 4.071017903422801e-06, "loss": 0.2478, "step": 197200 }, { "epoch": 1.94, "grad_norm": 7.372257709503174, "learning_rate": 4.070893780968553e-06, "loss": 0.2264, "step": 197225 }, { "epoch": 1.94, "grad_norm": 12.998869895935059, "learning_rate": 4.070769658514304e-06, "loss": 0.1664, "step": 197250 }, { "epoch": 1.94, "grad_norm": 9.505074501037598, "learning_rate": 4.070645536060055e-06, "loss": 0.201, "step": 197275 }, { "epoch": 1.94, "grad_norm": 17.075881958007812, "learning_rate": 4.070521413605807e-06, "loss": 0.2377, "step": 197300 }, { "epoch": 1.94, "grad_norm": 6.433999538421631, "learning_rate": 4.070397291151558e-06, "loss": 0.2309, "step": 197325 }, { "epoch": 1.94, "grad_norm": 10.737759590148926, "learning_rate": 4.0702731686973104e-06, "loss": 0.2229, "step": 197350 }, { "epoch": 1.94, "grad_norm": 0.9789428114891052, "learning_rate": 4.070149046243062e-06, "loss": 0.1839, "step": 197375 }, { "epoch": 1.94, "grad_norm": 14.643938064575195, "learning_rate": 4.070024923788814e-06, "loss": 0.2783, "step": 197400 }, { "epoch": 1.94, "grad_norm": 5.470950126647949, "learning_rate": 4.069900801334565e-06, "loss": 0.2279, "step": 197425 }, { "epoch": 1.94, "grad_norm": 15.507447242736816, "learning_rate": 4.069776678880317e-06, "loss": 0.2055, "step": 197450 }, { "epoch": 1.94, "grad_norm": 8.940568923950195, "learning_rate": 4.069652556426068e-06, "loss": 0.2021, "step": 197475 }, { "epoch": 1.94, "grad_norm": 10.788300514221191, "learning_rate": 4.069528433971819e-06, "loss": 0.1697, "step": 197500 }, { "epoch": 1.94, "grad_norm": 6.370805263519287, "learning_rate": 4.0694043115175715e-06, "loss": 0.1868, "step": 197525 }, { "epoch": 1.94, "grad_norm": 16.309871673583984, "learning_rate": 4.069280189063323e-06, "loss": 0.243, "step": 197550 }, { "epoch": 1.94, "grad_norm": 5.048270225524902, "learning_rate": 4.069156066609074e-06, "loss": 0.2176, "step": 197575 }, { "epoch": 1.94, "grad_norm": 15.103903770446777, "learning_rate": 4.069031944154826e-06, "loss": 0.2209, "step": 197600 }, { "epoch": 1.94, "grad_norm": 5.712234973907471, "learning_rate": 4.068907821700577e-06, "loss": 0.2124, "step": 197625 }, { "epoch": 1.94, "grad_norm": 15.103394508361816, "learning_rate": 4.068783699246328e-06, "loss": 0.1843, "step": 197650 }, { "epoch": 1.94, "grad_norm": 4.7383880615234375, "learning_rate": 4.06865957679208e-06, "loss": 0.2048, "step": 197675 }, { "epoch": 1.94, "grad_norm": 14.87197208404541, "learning_rate": 4.068535454337832e-06, "loss": 0.2549, "step": 197700 }, { "epoch": 1.94, "grad_norm": 7.992271423339844, "learning_rate": 4.068411331883584e-06, "loss": 0.2602, "step": 197725 }, { "epoch": 1.94, "grad_norm": 28.507173538208008, "learning_rate": 4.068287209429335e-06, "loss": 0.2665, "step": 197750 }, { "epoch": 1.94, "grad_norm": 13.519959449768066, "learning_rate": 4.068163086975087e-06, "loss": 0.2007, "step": 197775 }, { "epoch": 1.94, "grad_norm": 11.232749938964844, "learning_rate": 4.068038964520838e-06, "loss": 0.2459, "step": 197800 }, { "epoch": 1.95, "grad_norm": 7.931975841522217, "learning_rate": 4.06791484206659e-06, "loss": 0.2409, "step": 197825 }, { "epoch": 1.95, "grad_norm": 14.622480392456055, "learning_rate": 4.0677907196123414e-06, "loss": 0.1956, "step": 197850 }, { "epoch": 1.95, "grad_norm": 3.3766932487487793, "learning_rate": 4.067666597158093e-06, "loss": 0.1966, "step": 197875 }, { "epoch": 1.95, "grad_norm": 22.635541915893555, "learning_rate": 4.067542474703844e-06, "loss": 0.2238, "step": 197900 }, { "epoch": 1.95, "grad_norm": 4.10338020324707, "learning_rate": 4.067418352249596e-06, "loss": 0.1925, "step": 197925 }, { "epoch": 1.95, "grad_norm": 14.102805137634277, "learning_rate": 4.067294229795347e-06, "loss": 0.2382, "step": 197950 }, { "epoch": 1.95, "grad_norm": 13.168622970581055, "learning_rate": 4.067170107341098e-06, "loss": 0.2212, "step": 197975 }, { "epoch": 1.95, "grad_norm": 12.649663925170898, "learning_rate": 4.06704598488685e-06, "loss": 0.2615, "step": 198000 }, { "epoch": 1.95, "grad_norm": 6.904087543487549, "learning_rate": 4.0669268273307714e-06, "loss": 0.2396, "step": 198025 }, { "epoch": 1.95, "grad_norm": 8.95108699798584, "learning_rate": 4.0668027048765235e-06, "loss": 0.2133, "step": 198050 }, { "epoch": 1.95, "grad_norm": 4.395055294036865, "learning_rate": 4.066678582422275e-06, "loss": 0.2216, "step": 198075 }, { "epoch": 1.95, "grad_norm": 19.826801300048828, "learning_rate": 4.066554459968026e-06, "loss": 0.2541, "step": 198100 }, { "epoch": 1.95, "grad_norm": 6.095859050750732, "learning_rate": 4.066430337513778e-06, "loss": 0.2412, "step": 198125 }, { "epoch": 1.95, "grad_norm": 11.110677719116211, "learning_rate": 4.066306215059529e-06, "loss": 0.2073, "step": 198150 }, { "epoch": 1.95, "grad_norm": 6.711886882781982, "learning_rate": 4.066182092605281e-06, "loss": 0.2028, "step": 198175 }, { "epoch": 1.95, "grad_norm": 6.73745059967041, "learning_rate": 4.0660579701510325e-06, "loss": 0.2444, "step": 198200 }, { "epoch": 1.95, "grad_norm": 5.710333824157715, "learning_rate": 4.0659338476967845e-06, "loss": 0.1938, "step": 198225 }, { "epoch": 1.95, "grad_norm": 15.29339599609375, "learning_rate": 4.065809725242536e-06, "loss": 0.2776, "step": 198250 }, { "epoch": 1.95, "grad_norm": 2.77449893951416, "learning_rate": 4.065685602788288e-06, "loss": 0.1908, "step": 198275 }, { "epoch": 1.95, "grad_norm": 12.402874946594238, "learning_rate": 4.065561480334039e-06, "loss": 0.2228, "step": 198300 }, { "epoch": 1.95, "grad_norm": 7.996497631072998, "learning_rate": 4.06543735787979e-06, "loss": 0.2602, "step": 198325 }, { "epoch": 1.95, "grad_norm": 15.665895462036133, "learning_rate": 4.065313235425542e-06, "loss": 0.2364, "step": 198350 }, { "epoch": 1.95, "grad_norm": 6.873963356018066, "learning_rate": 4.0651891129712935e-06, "loss": 0.1945, "step": 198375 }, { "epoch": 1.95, "grad_norm": 12.591796875, "learning_rate": 4.065064990517045e-06, "loss": 0.2527, "step": 198400 }, { "epoch": 1.95, "grad_norm": 13.738988876342773, "learning_rate": 4.064940868062797e-06, "loss": 0.2418, "step": 198425 }, { "epoch": 1.95, "grad_norm": 12.029495239257812, "learning_rate": 4.064816745608548e-06, "loss": 0.2211, "step": 198450 }, { "epoch": 1.95, "grad_norm": 4.545374393463135, "learning_rate": 4.064692623154299e-06, "loss": 0.2328, "step": 198475 }, { "epoch": 1.95, "grad_norm": 12.482081413269043, "learning_rate": 4.06456850070005e-06, "loss": 0.2377, "step": 198500 }, { "epoch": 1.95, "grad_norm": 4.132186412811279, "learning_rate": 4.0644443782458024e-06, "loss": 0.2126, "step": 198525 }, { "epoch": 1.95, "grad_norm": 11.948406219482422, "learning_rate": 4.064320255791554e-06, "loss": 0.2329, "step": 198550 }, { "epoch": 1.95, "grad_norm": 3.0975329875946045, "learning_rate": 4.064196133337306e-06, "loss": 0.2013, "step": 198575 }, { "epoch": 1.95, "grad_norm": 15.559049606323242, "learning_rate": 4.064072010883057e-06, "loss": 0.2608, "step": 198600 }, { "epoch": 1.95, "grad_norm": 9.371867179870605, "learning_rate": 4.063947888428809e-06, "loss": 0.1741, "step": 198625 }, { "epoch": 1.95, "grad_norm": 17.33253288269043, "learning_rate": 4.06382376597456e-06, "loss": 0.1943, "step": 198650 }, { "epoch": 1.95, "grad_norm": 4.2885332107543945, "learning_rate": 4.063699643520312e-06, "loss": 0.2174, "step": 198675 }, { "epoch": 1.95, "grad_norm": 16.068517684936523, "learning_rate": 4.0635755210660635e-06, "loss": 0.2187, "step": 198700 }, { "epoch": 1.95, "grad_norm": 3.7677388191223145, "learning_rate": 4.063451398611815e-06, "loss": 0.2227, "step": 198725 }, { "epoch": 1.95, "grad_norm": 15.941574096679688, "learning_rate": 4.063327276157567e-06, "loss": 0.2602, "step": 198750 }, { "epoch": 1.95, "grad_norm": 5.110500335693359, "learning_rate": 4.063203153703318e-06, "loss": 0.1918, "step": 198775 }, { "epoch": 1.95, "grad_norm": 8.452723503112793, "learning_rate": 4.063079031249069e-06, "loss": 0.2197, "step": 198800 }, { "epoch": 1.95, "grad_norm": 9.878830909729004, "learning_rate": 4.062954908794821e-06, "loss": 0.2611, "step": 198825 }, { "epoch": 1.96, "grad_norm": 14.388200759887695, "learning_rate": 4.0628307863405724e-06, "loss": 0.2554, "step": 198850 }, { "epoch": 1.96, "grad_norm": 8.442989349365234, "learning_rate": 4.062706663886324e-06, "loss": 0.2461, "step": 198875 }, { "epoch": 1.96, "grad_norm": 11.544061660766602, "learning_rate": 4.062582541432076e-06, "loss": 0.2475, "step": 198900 }, { "epoch": 1.96, "grad_norm": 3.047581911087036, "learning_rate": 4.062458418977827e-06, "loss": 0.2071, "step": 198925 }, { "epoch": 1.96, "grad_norm": 14.80742073059082, "learning_rate": 4.062334296523578e-06, "loss": 0.2744, "step": 198950 }, { "epoch": 1.96, "grad_norm": 5.119495868682861, "learning_rate": 4.06221017406933e-06, "loss": 0.2063, "step": 198975 }, { "epoch": 1.96, "grad_norm": 19.21347999572754, "learning_rate": 4.062086051615081e-06, "loss": 0.2802, "step": 199000 }, { "epoch": 1.96, "grad_norm": 8.098435401916504, "learning_rate": 4.0619619291608334e-06, "loss": 0.1852, "step": 199025 }, { "epoch": 1.96, "grad_norm": 17.463119506835938, "learning_rate": 4.061837806706585e-06, "loss": 0.2338, "step": 199050 }, { "epoch": 1.96, "grad_norm": 0.847234308719635, "learning_rate": 4.061713684252337e-06, "loss": 0.2228, "step": 199075 }, { "epoch": 1.96, "grad_norm": 11.500375747680664, "learning_rate": 4.061589561798088e-06, "loss": 0.2138, "step": 199100 }, { "epoch": 1.96, "grad_norm": 8.1564302444458, "learning_rate": 4.06146543934384e-06, "loss": 0.1663, "step": 199125 }, { "epoch": 1.96, "grad_norm": 10.314214706420898, "learning_rate": 4.061341316889591e-06, "loss": 0.3055, "step": 199150 }, { "epoch": 1.96, "grad_norm": 7.3760986328125, "learning_rate": 4.061217194435342e-06, "loss": 0.2302, "step": 199175 }, { "epoch": 1.96, "grad_norm": 12.89625072479248, "learning_rate": 4.0610930719810945e-06, "loss": 0.2485, "step": 199200 }, { "epoch": 1.96, "grad_norm": 5.772853851318359, "learning_rate": 4.060968949526846e-06, "loss": 0.23, "step": 199225 }, { "epoch": 1.96, "grad_norm": 12.842764854431152, "learning_rate": 4.060844827072597e-06, "loss": 0.2095, "step": 199250 }, { "epoch": 1.96, "grad_norm": 2.8457682132720947, "learning_rate": 4.060720704618349e-06, "loss": 0.2182, "step": 199275 }, { "epoch": 1.96, "grad_norm": 26.960737228393555, "learning_rate": 4.0605965821641e-06, "loss": 0.1932, "step": 199300 }, { "epoch": 1.96, "grad_norm": 6.177461624145508, "learning_rate": 4.060472459709851e-06, "loss": 0.1976, "step": 199325 }, { "epoch": 1.96, "grad_norm": 19.034011840820312, "learning_rate": 4.060348337255603e-06, "loss": 0.2495, "step": 199350 }, { "epoch": 1.96, "grad_norm": 7.148341178894043, "learning_rate": 4.060224214801355e-06, "loss": 0.1911, "step": 199375 }, { "epoch": 1.96, "grad_norm": 13.29416561126709, "learning_rate": 4.060100092347106e-06, "loss": 0.2237, "step": 199400 }, { "epoch": 1.96, "grad_norm": 0.20475926995277405, "learning_rate": 4.059975969892858e-06, "loss": 0.1809, "step": 199425 }, { "epoch": 1.96, "grad_norm": 12.304292678833008, "learning_rate": 4.059851847438609e-06, "loss": 0.2368, "step": 199450 }, { "epoch": 1.96, "grad_norm": 4.794045448303223, "learning_rate": 4.059727724984361e-06, "loss": 0.2398, "step": 199475 }, { "epoch": 1.96, "grad_norm": 9.526657104492188, "learning_rate": 4.059603602530112e-06, "loss": 0.1975, "step": 199500 }, { "epoch": 1.96, "grad_norm": 4.209308624267578, "learning_rate": 4.0594794800758645e-06, "loss": 0.2323, "step": 199525 }, { "epoch": 1.96, "grad_norm": 18.049392700195312, "learning_rate": 4.059355357621616e-06, "loss": 0.2221, "step": 199550 }, { "epoch": 1.96, "grad_norm": 3.7349495887756348, "learning_rate": 4.059231235167367e-06, "loss": 0.2099, "step": 199575 }, { "epoch": 1.96, "grad_norm": 17.32075309753418, "learning_rate": 4.059107112713119e-06, "loss": 0.2653, "step": 199600 }, { "epoch": 1.96, "grad_norm": 16.48921775817871, "learning_rate": 4.05898299025887e-06, "loss": 0.2038, "step": 199625 }, { "epoch": 1.96, "grad_norm": 14.396713256835938, "learning_rate": 4.058858867804621e-06, "loss": 0.2377, "step": 199650 }, { "epoch": 1.96, "grad_norm": 4.777568340301514, "learning_rate": 4.058734745350373e-06, "loss": 0.2442, "step": 199675 }, { "epoch": 1.96, "grad_norm": 11.16960334777832, "learning_rate": 4.058610622896125e-06, "loss": 0.2596, "step": 199700 }, { "epoch": 1.96, "grad_norm": 6.792009353637695, "learning_rate": 4.058486500441876e-06, "loss": 0.3036, "step": 199725 }, { "epoch": 1.96, "grad_norm": 14.901460647583008, "learning_rate": 4.058362377987628e-06, "loss": 0.2526, "step": 199750 }, { "epoch": 1.96, "grad_norm": 4.730118751525879, "learning_rate": 4.058238255533379e-06, "loss": 0.2056, "step": 199775 }, { "epoch": 1.96, "grad_norm": 18.774028778076172, "learning_rate": 4.05811413307913e-06, "loss": 0.2648, "step": 199800 }, { "epoch": 1.96, "grad_norm": 4.576821804046631, "learning_rate": 4.057990010624882e-06, "loss": 0.2043, "step": 199825 }, { "epoch": 1.96, "grad_norm": 18.808420181274414, "learning_rate": 4.057865888170634e-06, "loss": 0.2333, "step": 199850 }, { "epoch": 1.97, "grad_norm": 4.528355598449707, "learning_rate": 4.057741765716386e-06, "loss": 0.1919, "step": 199875 }, { "epoch": 1.97, "grad_norm": 15.285218238830566, "learning_rate": 4.057617643262137e-06, "loss": 0.2638, "step": 199900 }, { "epoch": 1.97, "grad_norm": 5.981906414031982, "learning_rate": 4.057493520807889e-06, "loss": 0.2054, "step": 199925 }, { "epoch": 1.97, "grad_norm": 4.556251525878906, "learning_rate": 4.05736939835364e-06, "loss": 0.2749, "step": 199950 }, { "epoch": 1.97, "grad_norm": 7.454943656921387, "learning_rate": 4.057245275899392e-06, "loss": 0.2576, "step": 199975 }, { "epoch": 1.97, "grad_norm": 9.278565406799316, "learning_rate": 4.057121153445143e-06, "loss": 0.2138, "step": 200000 }, { "epoch": 1.97, "eval_loss": 0.5201025605201721, "eval_runtime": 6050.9762, "eval_samples_per_second": 1.565, "eval_steps_per_second": 0.196, "eval_wer": 0.1272935041524496, "step": 200000 }, { "epoch": 1.97, "grad_norm": 4.522525310516357, "learning_rate": 4.056997030990895e-06, "loss": 0.1779, "step": 200025 }, { "epoch": 1.97, "grad_norm": 17.310855865478516, "learning_rate": 4.056872908536647e-06, "loss": 0.2608, "step": 200050 }, { "epoch": 1.97, "grad_norm": 7.716192245483398, "learning_rate": 4.056748786082398e-06, "loss": 0.2181, "step": 200075 }, { "epoch": 1.97, "grad_norm": 15.418451309204102, "learning_rate": 4.056624663628149e-06, "loss": 0.2054, "step": 200100 }, { "epoch": 1.97, "grad_norm": 4.536891460418701, "learning_rate": 4.056500541173901e-06, "loss": 0.2165, "step": 200125 }, { "epoch": 1.97, "grad_norm": 9.281734466552734, "learning_rate": 4.056376418719652e-06, "loss": 0.2299, "step": 200150 }, { "epoch": 1.97, "grad_norm": 5.1738152503967285, "learning_rate": 4.0562522962654036e-06, "loss": 0.1827, "step": 200175 }, { "epoch": 1.97, "grad_norm": 12.455413818359375, "learning_rate": 4.056128173811155e-06, "loss": 0.2799, "step": 200200 }, { "epoch": 1.97, "grad_norm": 4.559409141540527, "learning_rate": 4.056004051356907e-06, "loss": 0.2491, "step": 200225 }, { "epoch": 1.97, "grad_norm": 18.327430725097656, "learning_rate": 4.055879928902658e-06, "loss": 0.2582, "step": 200250 }, { "epoch": 1.97, "grad_norm": 5.175022602081299, "learning_rate": 4.05575580644841e-06, "loss": 0.2235, "step": 200275 }, { "epoch": 1.97, "grad_norm": 9.32681655883789, "learning_rate": 4.055631683994161e-06, "loss": 0.2209, "step": 200300 }, { "epoch": 1.97, "grad_norm": 3.190070629119873, "learning_rate": 4.055512526438083e-06, "loss": 0.2214, "step": 200325 }, { "epoch": 1.97, "grad_norm": 6.033173561096191, "learning_rate": 4.055388403983834e-06, "loss": 0.2148, "step": 200350 }, { "epoch": 1.97, "grad_norm": 6.044315814971924, "learning_rate": 4.0552642815295865e-06, "loss": 0.1917, "step": 200375 }, { "epoch": 1.97, "grad_norm": 15.363276481628418, "learning_rate": 4.055140159075338e-06, "loss": 0.2658, "step": 200400 }, { "epoch": 1.97, "grad_norm": 6.606573581695557, "learning_rate": 4.05501603662109e-06, "loss": 0.2045, "step": 200425 }, { "epoch": 1.97, "grad_norm": 14.087454795837402, "learning_rate": 4.054891914166841e-06, "loss": 0.2614, "step": 200450 }, { "epoch": 1.97, "grad_norm": 8.412039756774902, "learning_rate": 4.054767791712592e-06, "loss": 0.2449, "step": 200475 }, { "epoch": 1.97, "grad_norm": 14.039582252502441, "learning_rate": 4.054643669258344e-06, "loss": 0.2412, "step": 200500 }, { "epoch": 1.97, "grad_norm": 1.2167582511901855, "learning_rate": 4.0545195468040954e-06, "loss": 0.1782, "step": 200525 }, { "epoch": 1.97, "grad_norm": 13.781458854675293, "learning_rate": 4.054395424349847e-06, "loss": 0.2732, "step": 200550 }, { "epoch": 1.97, "grad_norm": 7.551527500152588, "learning_rate": 4.054271301895599e-06, "loss": 0.2106, "step": 200575 }, { "epoch": 1.97, "grad_norm": 34.324214935302734, "learning_rate": 4.05414717944135e-06, "loss": 0.23, "step": 200600 }, { "epoch": 1.97, "grad_norm": 5.053399085998535, "learning_rate": 4.054023056987101e-06, "loss": 0.2245, "step": 200625 }, { "epoch": 1.97, "grad_norm": 9.30048656463623, "learning_rate": 4.053898934532853e-06, "loss": 0.2026, "step": 200650 }, { "epoch": 1.97, "grad_norm": 2.4735476970672607, "learning_rate": 4.053774812078604e-06, "loss": 0.2188, "step": 200675 }, { "epoch": 1.97, "grad_norm": 11.090798377990723, "learning_rate": 4.0536506896243565e-06, "loss": 0.2626, "step": 200700 }, { "epoch": 1.97, "grad_norm": 5.639332294464111, "learning_rate": 4.053526567170108e-06, "loss": 0.2536, "step": 200725 }, { "epoch": 1.97, "grad_norm": 15.156982421875, "learning_rate": 4.05340244471586e-06, "loss": 0.2677, "step": 200750 }, { "epoch": 1.97, "grad_norm": 5.852256774902344, "learning_rate": 4.053278322261611e-06, "loss": 0.202, "step": 200775 }, { "epoch": 1.97, "grad_norm": 13.75415325164795, "learning_rate": 4.053154199807362e-06, "loss": 0.2563, "step": 200800 }, { "epoch": 1.97, "grad_norm": 4.1732378005981445, "learning_rate": 4.053030077353114e-06, "loss": 0.2505, "step": 200825 }, { "epoch": 1.97, "grad_norm": 9.762422561645508, "learning_rate": 4.052905954898865e-06, "loss": 0.2495, "step": 200850 }, { "epoch": 1.98, "grad_norm": 1.181623935699463, "learning_rate": 4.052781832444617e-06, "loss": 0.2645, "step": 200875 }, { "epoch": 1.98, "grad_norm": 19.6956729888916, "learning_rate": 4.052657709990369e-06, "loss": 0.2244, "step": 200900 }, { "epoch": 1.98, "grad_norm": 2.7035012245178223, "learning_rate": 4.05253358753612e-06, "loss": 0.2609, "step": 200925 }, { "epoch": 1.98, "grad_norm": 7.333357810974121, "learning_rate": 4.052409465081871e-06, "loss": 0.2433, "step": 200950 }, { "epoch": 1.98, "grad_norm": 4.620270729064941, "learning_rate": 4.052285342627623e-06, "loss": 0.2105, "step": 200975 }, { "epoch": 1.98, "grad_norm": 14.808886528015137, "learning_rate": 4.052161220173374e-06, "loss": 0.2156, "step": 201000 }, { "epoch": 1.98, "grad_norm": 7.3911871910095215, "learning_rate": 4.052037097719126e-06, "loss": 0.2539, "step": 201025 }, { "epoch": 1.98, "grad_norm": 9.630346298217773, "learning_rate": 4.051912975264878e-06, "loss": 0.3099, "step": 201050 }, { "epoch": 1.98, "grad_norm": 10.741307258605957, "learning_rate": 4.051788852810629e-06, "loss": 0.1823, "step": 201075 }, { "epoch": 1.98, "grad_norm": 10.573957443237305, "learning_rate": 4.051664730356381e-06, "loss": 0.234, "step": 201100 }, { "epoch": 1.98, "grad_norm": 5.85478401184082, "learning_rate": 4.051540607902132e-06, "loss": 0.2466, "step": 201125 }, { "epoch": 1.98, "grad_norm": 16.052255630493164, "learning_rate": 4.051416485447884e-06, "loss": 0.2133, "step": 201150 }, { "epoch": 1.98, "grad_norm": 5.7588605880737305, "learning_rate": 4.051292362993635e-06, "loss": 0.2292, "step": 201175 }, { "epoch": 1.98, "grad_norm": 17.460941314697266, "learning_rate": 4.0511682405393875e-06, "loss": 0.2285, "step": 201200 }, { "epoch": 1.98, "grad_norm": 0.44187310338020325, "learning_rate": 4.051044118085139e-06, "loss": 0.2318, "step": 201225 }, { "epoch": 1.98, "grad_norm": 14.34783935546875, "learning_rate": 4.05091999563089e-06, "loss": 0.2754, "step": 201250 }, { "epoch": 1.98, "grad_norm": 10.41730785369873, "learning_rate": 4.050795873176642e-06, "loss": 0.2227, "step": 201275 }, { "epoch": 1.98, "grad_norm": 16.027647018432617, "learning_rate": 4.050671750722393e-06, "loss": 0.236, "step": 201300 }, { "epoch": 1.98, "grad_norm": 2.2689456939697266, "learning_rate": 4.050547628268144e-06, "loss": 0.2332, "step": 201325 }, { "epoch": 1.98, "grad_norm": 18.87653350830078, "learning_rate": 4.050423505813896e-06, "loss": 0.2287, "step": 201350 }, { "epoch": 1.98, "grad_norm": 0.9627447128295898, "learning_rate": 4.050299383359648e-06, "loss": 0.2259, "step": 201375 }, { "epoch": 1.98, "grad_norm": 12.170136451721191, "learning_rate": 4.050175260905399e-06, "loss": 0.2309, "step": 201400 }, { "epoch": 1.98, "grad_norm": 0.8914557695388794, "learning_rate": 4.050051138451151e-06, "loss": 0.2044, "step": 201425 }, { "epoch": 1.98, "grad_norm": 16.11665153503418, "learning_rate": 4.049927015996902e-06, "loss": 0.2362, "step": 201450 }, { "epoch": 1.98, "grad_norm": 4.514730930328369, "learning_rate": 4.049802893542653e-06, "loss": 0.2685, "step": 201475 }, { "epoch": 1.98, "grad_norm": 7.336154460906982, "learning_rate": 4.049678771088405e-06, "loss": 0.253, "step": 201500 }, { "epoch": 1.98, "grad_norm": 5.811469078063965, "learning_rate": 4.049554648634157e-06, "loss": 0.2354, "step": 201525 }, { "epoch": 1.98, "grad_norm": 13.101239204406738, "learning_rate": 4.049430526179909e-06, "loss": 0.2263, "step": 201550 }, { "epoch": 1.98, "grad_norm": 12.948062896728516, "learning_rate": 4.04930640372566e-06, "loss": 0.2742, "step": 201575 }, { "epoch": 1.98, "grad_norm": 5.808104991912842, "learning_rate": 4.049182281271412e-06, "loss": 0.2369, "step": 201600 }, { "epoch": 1.98, "grad_norm": 6.639456748962402, "learning_rate": 4.049058158817163e-06, "loss": 0.24, "step": 201625 }, { "epoch": 1.98, "grad_norm": 14.403810501098633, "learning_rate": 4.048934036362914e-06, "loss": 0.2274, "step": 201650 }, { "epoch": 1.98, "grad_norm": 9.247907638549805, "learning_rate": 4.048809913908666e-06, "loss": 0.2301, "step": 201675 }, { "epoch": 1.98, "grad_norm": 9.376452445983887, "learning_rate": 4.048685791454418e-06, "loss": 0.2196, "step": 201700 }, { "epoch": 1.98, "grad_norm": 8.395248413085938, "learning_rate": 4.048561669000169e-06, "loss": 0.262, "step": 201725 }, { "epoch": 1.98, "grad_norm": 12.126043319702148, "learning_rate": 4.048437546545921e-06, "loss": 0.1993, "step": 201750 }, { "epoch": 1.98, "grad_norm": 6.708484172821045, "learning_rate": 4.048313424091672e-06, "loss": 0.2112, "step": 201775 }, { "epoch": 1.98, "grad_norm": 14.872969627380371, "learning_rate": 4.048189301637423e-06, "loss": 0.3108, "step": 201800 }, { "epoch": 1.98, "grad_norm": 4.688320636749268, "learning_rate": 4.048065179183175e-06, "loss": 0.2378, "step": 201825 }, { "epoch": 1.98, "grad_norm": 19.530200958251953, "learning_rate": 4.047941056728927e-06, "loss": 0.2919, "step": 201850 }, { "epoch": 1.98, "grad_norm": 3.8746328353881836, "learning_rate": 4.047816934274678e-06, "loss": 0.2138, "step": 201875 }, { "epoch": 1.99, "grad_norm": 11.458592414855957, "learning_rate": 4.04769281182043e-06, "loss": 0.2286, "step": 201900 }, { "epoch": 1.99, "grad_norm": 2.007598400115967, "learning_rate": 4.047568689366181e-06, "loss": 0.1627, "step": 201925 }, { "epoch": 1.99, "grad_norm": 9.07752513885498, "learning_rate": 4.047444566911933e-06, "loss": 0.2403, "step": 201950 }, { "epoch": 1.99, "grad_norm": 7.71641206741333, "learning_rate": 4.047320444457684e-06, "loss": 0.2011, "step": 201975 }, { "epoch": 1.99, "grad_norm": 13.544942855834961, "learning_rate": 4.047196322003436e-06, "loss": 0.2041, "step": 202000 }, { "epoch": 1.99, "grad_norm": 5.674728870391846, "learning_rate": 4.047072199549188e-06, "loss": 0.2067, "step": 202025 }, { "epoch": 1.99, "grad_norm": 16.941999435424805, "learning_rate": 4.04694807709494e-06, "loss": 0.2488, "step": 202050 }, { "epoch": 1.99, "grad_norm": 2.5962653160095215, "learning_rate": 4.046823954640691e-06, "loss": 0.2877, "step": 202075 }, { "epoch": 1.99, "grad_norm": 8.25964069366455, "learning_rate": 4.046699832186442e-06, "loss": 0.2326, "step": 202100 }, { "epoch": 1.99, "grad_norm": 6.959245204925537, "learning_rate": 4.046575709732194e-06, "loss": 0.2312, "step": 202125 }, { "epoch": 1.99, "grad_norm": 21.493803024291992, "learning_rate": 4.046451587277945e-06, "loss": 0.2658, "step": 202150 }, { "epoch": 1.99, "grad_norm": 3.978259563446045, "learning_rate": 4.0463274648236966e-06, "loss": 0.2435, "step": 202175 }, { "epoch": 1.99, "grad_norm": 19.114131927490234, "learning_rate": 4.046203342369449e-06, "loss": 0.2138, "step": 202200 }, { "epoch": 1.99, "grad_norm": 3.729468584060669, "learning_rate": 4.0460792199152e-06, "loss": 0.2117, "step": 202225 }, { "epoch": 1.99, "grad_norm": 10.391034126281738, "learning_rate": 4.045955097460951e-06, "loss": 0.2538, "step": 202250 }, { "epoch": 1.99, "grad_norm": 14.136128425598145, "learning_rate": 4.045830975006703e-06, "loss": 0.2431, "step": 202275 }, { "epoch": 1.99, "grad_norm": 14.939757347106934, "learning_rate": 4.045706852552454e-06, "loss": 0.2152, "step": 202300 }, { "epoch": 1.99, "grad_norm": 6.431245803833008, "learning_rate": 4.0455827300982055e-06, "loss": 0.2739, "step": 202325 }, { "epoch": 1.99, "grad_norm": 11.956032752990723, "learning_rate": 4.045463572542127e-06, "loss": 0.2664, "step": 202350 }, { "epoch": 1.99, "grad_norm": 2.4783949851989746, "learning_rate": 4.045339450087879e-06, "loss": 0.1915, "step": 202375 }, { "epoch": 1.99, "grad_norm": 15.331034660339355, "learning_rate": 4.045215327633631e-06, "loss": 0.2611, "step": 202400 }, { "epoch": 1.99, "grad_norm": 3.9847395420074463, "learning_rate": 4.045091205179382e-06, "loss": 0.2345, "step": 202425 }, { "epoch": 1.99, "grad_norm": 12.008561134338379, "learning_rate": 4.044967082725134e-06, "loss": 0.1802, "step": 202450 }, { "epoch": 1.99, "grad_norm": 9.962615966796875, "learning_rate": 4.044842960270885e-06, "loss": 0.2688, "step": 202475 }, { "epoch": 1.99, "grad_norm": 16.801986694335938, "learning_rate": 4.044718837816637e-06, "loss": 0.1948, "step": 202500 }, { "epoch": 1.99, "grad_norm": 6.920836448669434, "learning_rate": 4.044594715362388e-06, "loss": 0.2409, "step": 202525 }, { "epoch": 1.99, "grad_norm": 13.002252578735352, "learning_rate": 4.04447059290814e-06, "loss": 0.2749, "step": 202550 }, { "epoch": 1.99, "grad_norm": 3.503894805908203, "learning_rate": 4.044346470453892e-06, "loss": 0.1963, "step": 202575 }, { "epoch": 1.99, "grad_norm": 21.859268188476562, "learning_rate": 4.044222347999643e-06, "loss": 0.2954, "step": 202600 }, { "epoch": 1.99, "grad_norm": 8.894759178161621, "learning_rate": 4.044098225545394e-06, "loss": 0.212, "step": 202625 }, { "epoch": 1.99, "grad_norm": 12.555532455444336, "learning_rate": 4.043974103091146e-06, "loss": 0.2812, "step": 202650 }, { "epoch": 1.99, "grad_norm": 7.665413856506348, "learning_rate": 4.043849980636897e-06, "loss": 0.2569, "step": 202675 }, { "epoch": 1.99, "grad_norm": 12.772773742675781, "learning_rate": 4.043725858182649e-06, "loss": 0.224, "step": 202700 }, { "epoch": 1.99, "grad_norm": 7.169820785522461, "learning_rate": 4.043601735728401e-06, "loss": 0.2655, "step": 202725 }, { "epoch": 1.99, "grad_norm": 12.408780097961426, "learning_rate": 4.043477613274152e-06, "loss": 0.2345, "step": 202750 }, { "epoch": 1.99, "grad_norm": 3.7110469341278076, "learning_rate": 4.043353490819903e-06, "loss": 0.185, "step": 202775 }, { "epoch": 1.99, "grad_norm": 9.891523361206055, "learning_rate": 4.043229368365655e-06, "loss": 0.2391, "step": 202800 }, { "epoch": 1.99, "grad_norm": 16.991657257080078, "learning_rate": 4.043105245911406e-06, "loss": 0.2146, "step": 202825 }, { "epoch": 1.99, "grad_norm": 12.668599128723145, "learning_rate": 4.042981123457158e-06, "loss": 0.2425, "step": 202850 }, { "epoch": 1.99, "grad_norm": 5.640872955322266, "learning_rate": 4.04285700100291e-06, "loss": 0.2251, "step": 202875 }, { "epoch": 1.99, "grad_norm": 13.910694122314453, "learning_rate": 4.042732878548662e-06, "loss": 0.2472, "step": 202900 }, { "epoch": 2.0, "grad_norm": 5.929598808288574, "learning_rate": 4.042608756094413e-06, "loss": 0.2215, "step": 202925 }, { "epoch": 2.0, "grad_norm": 7.457127571105957, "learning_rate": 4.042484633640165e-06, "loss": 0.2096, "step": 202950 }, { "epoch": 2.0, "grad_norm": 2.9873437881469727, "learning_rate": 4.042360511185916e-06, "loss": 0.2083, "step": 202975 }, { "epoch": 2.0, "grad_norm": 27.462543487548828, "learning_rate": 4.042236388731667e-06, "loss": 0.2475, "step": 203000 }, { "epoch": 2.0, "grad_norm": 5.502181529998779, "learning_rate": 4.042112266277419e-06, "loss": 0.2164, "step": 203025 }, { "epoch": 2.0, "grad_norm": 18.58012580871582, "learning_rate": 4.041988143823171e-06, "loss": 0.2192, "step": 203050 }, { "epoch": 2.0, "grad_norm": 7.29445743560791, "learning_rate": 4.041864021368922e-06, "loss": 0.2046, "step": 203075 }, { "epoch": 2.0, "grad_norm": 16.10295295715332, "learning_rate": 4.041739898914673e-06, "loss": 0.2547, "step": 203100 }, { "epoch": 2.0, "grad_norm": 10.456608772277832, "learning_rate": 4.041615776460425e-06, "loss": 0.2101, "step": 203125 }, { "epoch": 2.0, "grad_norm": 11.209512710571289, "learning_rate": 4.041491654006176e-06, "loss": 0.2912, "step": 203150 }, { "epoch": 2.0, "grad_norm": 9.658629417419434, "learning_rate": 4.0413675315519275e-06, "loss": 0.2043, "step": 203175 }, { "epoch": 2.0, "grad_norm": 11.331445693969727, "learning_rate": 4.04124340909768e-06, "loss": 0.295, "step": 203200 }, { "epoch": 2.0, "grad_norm": 10.824201583862305, "learning_rate": 4.041119286643431e-06, "loss": 0.2078, "step": 203225 }, { "epoch": 2.0, "grad_norm": 13.146023750305176, "learning_rate": 4.040995164189183e-06, "loss": 0.1957, "step": 203250 }, { "epoch": 2.0, "grad_norm": 6.310302257537842, "learning_rate": 4.040871041734934e-06, "loss": 0.2085, "step": 203275 }, { "epoch": 2.0, "grad_norm": 13.890501022338867, "learning_rate": 4.040746919280686e-06, "loss": 0.2282, "step": 203300 }, { "epoch": 2.0, "grad_norm": 4.714311122894287, "learning_rate": 4.040622796826437e-06, "loss": 0.1981, "step": 203325 }, { "epoch": 2.0, "grad_norm": 19.50672149658203, "learning_rate": 4.040498674372189e-06, "loss": 0.2446, "step": 203350 }, { "epoch": 2.0, "grad_norm": 2.7303624153137207, "learning_rate": 4.040374551917941e-06, "loss": 0.2158, "step": 203375 }, { "epoch": 2.0, "grad_norm": 14.171573638916016, "learning_rate": 4.040250429463692e-06, "loss": 0.2044, "step": 203400 }, { "epoch": 2.0, "grad_norm": 5.291664123535156, "learning_rate": 4.040126307009444e-06, "loss": 0.2006, "step": 203425 }, { "epoch": 2.0, "grad_norm": 9.877062797546387, "learning_rate": 4.040002184555195e-06, "loss": 0.1067, "step": 203450 }, { "epoch": 2.0, "grad_norm": 7.788100719451904, "learning_rate": 4.039878062100946e-06, "loss": 0.2186, "step": 203475 }, { "epoch": 2.0, "grad_norm": 7.251657962799072, "learning_rate": 4.039753939646698e-06, "loss": 0.0997, "step": 203500 }, { "epoch": 2.0, "grad_norm": 1.7101902961730957, "learning_rate": 4.03962981719245e-06, "loss": 0.2049, "step": 203525 }, { "epoch": 2.0, "grad_norm": 10.515421867370605, "learning_rate": 4.039505694738201e-06, "loss": 0.1598, "step": 203550 }, { "epoch": 2.0, "grad_norm": 3.453665256500244, "learning_rate": 4.039381572283953e-06, "loss": 0.1822, "step": 203575 }, { "epoch": 2.0, "grad_norm": 8.845614433288574, "learning_rate": 4.039257449829704e-06, "loss": 0.1205, "step": 203600 }, { "epoch": 2.0, "grad_norm": 4.2690510749816895, "learning_rate": 4.039133327375456e-06, "loss": 0.222, "step": 203625 }, { "epoch": 2.0, "grad_norm": 10.119515419006348, "learning_rate": 4.039009204921207e-06, "loss": 0.1106, "step": 203650 }, { "epoch": 2.0, "grad_norm": 4.22916316986084, "learning_rate": 4.038885082466959e-06, "loss": 0.1971, "step": 203675 }, { "epoch": 2.0, "grad_norm": 9.195722579956055, "learning_rate": 4.038760960012711e-06, "loss": 0.1253, "step": 203700 }, { "epoch": 2.0, "grad_norm": 4.467746257781982, "learning_rate": 4.038636837558463e-06, "loss": 0.1886, "step": 203725 }, { "epoch": 2.0, "grad_norm": 6.869723796844482, "learning_rate": 4.038512715104214e-06, "loss": 0.1326, "step": 203750 }, { "epoch": 2.0, "grad_norm": 4.783178806304932, "learning_rate": 4.038388592649965e-06, "loss": 0.2004, "step": 203775 }, { "epoch": 2.0, "grad_norm": 13.396275520324707, "learning_rate": 4.038264470195717e-06, "loss": 0.1215, "step": 203800 }, { "epoch": 2.0, "grad_norm": 2.7631609439849854, "learning_rate": 4.038140347741468e-06, "loss": 0.1987, "step": 203825 }, { "epoch": 2.0, "grad_norm": 9.546979904174805, "learning_rate": 4.0380162252872196e-06, "loss": 0.0956, "step": 203850 }, { "epoch": 2.0, "grad_norm": 6.8389668464660645, "learning_rate": 4.037892102832971e-06, "loss": 0.1982, "step": 203875 }, { "epoch": 2.0, "grad_norm": 7.460923194885254, "learning_rate": 4.037767980378723e-06, "loss": 0.1144, "step": 203900 }, { "epoch": 2.01, "grad_norm": 6.8332109451293945, "learning_rate": 4.037643857924474e-06, "loss": 0.1971, "step": 203925 }, { "epoch": 2.01, "grad_norm": 13.513983726501465, "learning_rate": 4.037519735470225e-06, "loss": 0.1321, "step": 203950 }, { "epoch": 2.01, "grad_norm": 2.7327518463134766, "learning_rate": 4.037395613015977e-06, "loss": 0.2097, "step": 203975 }, { "epoch": 2.01, "grad_norm": 10.321587562561035, "learning_rate": 4.0372714905617285e-06, "loss": 0.1066, "step": 204000 }, { "epoch": 2.01, "grad_norm": 4.8867292404174805, "learning_rate": 4.037147368107481e-06, "loss": 0.2111, "step": 204025 }, { "epoch": 2.01, "grad_norm": 4.7669677734375, "learning_rate": 4.037023245653232e-06, "loss": 0.1313, "step": 204050 }, { "epoch": 2.01, "grad_norm": 3.1328582763671875, "learning_rate": 4.036899123198984e-06, "loss": 0.2111, "step": 204075 }, { "epoch": 2.01, "grad_norm": 8.32575798034668, "learning_rate": 4.036775000744735e-06, "loss": 0.0921, "step": 204100 }, { "epoch": 2.01, "grad_norm": 5.658828258514404, "learning_rate": 4.036650878290487e-06, "loss": 0.2219, "step": 204125 }, { "epoch": 2.01, "grad_norm": 12.369583129882812, "learning_rate": 4.036526755836238e-06, "loss": 0.1441, "step": 204150 }, { "epoch": 2.01, "grad_norm": 3.0615475177764893, "learning_rate": 4.0364026333819896e-06, "loss": 0.2186, "step": 204175 }, { "epoch": 2.01, "grad_norm": 5.781341552734375, "learning_rate": 4.036278510927742e-06, "loss": 0.1375, "step": 204200 }, { "epoch": 2.01, "grad_norm": 2.715825319290161, "learning_rate": 4.036154388473493e-06, "loss": 0.2088, "step": 204225 }, { "epoch": 2.01, "grad_norm": 27.174528121948242, "learning_rate": 4.036030266019244e-06, "loss": 0.0947, "step": 204250 }, { "epoch": 2.01, "grad_norm": 2.782684087753296, "learning_rate": 4.035906143564996e-06, "loss": 0.1866, "step": 204275 }, { "epoch": 2.01, "grad_norm": 8.861053466796875, "learning_rate": 4.035782021110747e-06, "loss": 0.1049, "step": 204300 }, { "epoch": 2.01, "grad_norm": 4.734757900238037, "learning_rate": 4.0356578986564985e-06, "loss": 0.2443, "step": 204325 }, { "epoch": 2.01, "grad_norm": 8.12902545928955, "learning_rate": 4.0355337762022506e-06, "loss": 0.1101, "step": 204350 }, { "epoch": 2.01, "grad_norm": 5.069469928741455, "learning_rate": 4.035409653748002e-06, "loss": 0.2231, "step": 204375 }, { "epoch": 2.01, "grad_norm": 8.295186996459961, "learning_rate": 4.035285531293753e-06, "loss": 0.1236, "step": 204400 }, { "epoch": 2.01, "grad_norm": 1.717414379119873, "learning_rate": 4.035161408839505e-06, "loss": 0.225, "step": 204425 }, { "epoch": 2.01, "grad_norm": 8.72005558013916, "learning_rate": 4.035037286385256e-06, "loss": 0.0921, "step": 204450 }, { "epoch": 2.01, "grad_norm": 3.5027384757995605, "learning_rate": 4.034913163931008e-06, "loss": 0.193, "step": 204475 }, { "epoch": 2.01, "grad_norm": 8.387130737304688, "learning_rate": 4.0347890414767595e-06, "loss": 0.0911, "step": 204500 }, { "epoch": 2.01, "grad_norm": 4.39431095123291, "learning_rate": 4.034664919022512e-06, "loss": 0.2026, "step": 204525 }, { "epoch": 2.01, "grad_norm": 7.335353851318359, "learning_rate": 4.034540796568263e-06, "loss": 0.1086, "step": 204550 }, { "epoch": 2.01, "grad_norm": 25.309640884399414, "learning_rate": 4.034416674114015e-06, "loss": 0.2146, "step": 204575 }, { "epoch": 2.01, "grad_norm": 14.457329750061035, "learning_rate": 4.034292551659766e-06, "loss": 0.1734, "step": 204600 }, { "epoch": 2.01, "grad_norm": 7.208093166351318, "learning_rate": 4.034173394103687e-06, "loss": 0.2156, "step": 204625 }, { "epoch": 2.01, "grad_norm": 10.742616653442383, "learning_rate": 4.034049271649439e-06, "loss": 0.1217, "step": 204650 }, { "epoch": 2.01, "grad_norm": 1.455129861831665, "learning_rate": 4.03392514919519e-06, "loss": 0.1872, "step": 204675 }, { "epoch": 2.01, "grad_norm": 13.578743934631348, "learning_rate": 4.033801026740942e-06, "loss": 0.1228, "step": 204700 }, { "epoch": 2.01, "grad_norm": 0.5114380121231079, "learning_rate": 4.033676904286694e-06, "loss": 0.1997, "step": 204725 }, { "epoch": 2.01, "grad_norm": 7.434113025665283, "learning_rate": 4.033552781832445e-06, "loss": 0.125, "step": 204750 }, { "epoch": 2.01, "grad_norm": 9.268695831298828, "learning_rate": 4.033428659378196e-06, "loss": 0.2106, "step": 204775 }, { "epoch": 2.01, "grad_norm": 10.75728702545166, "learning_rate": 4.033304536923948e-06, "loss": 0.0988, "step": 204800 }, { "epoch": 2.01, "grad_norm": 9.681879043579102, "learning_rate": 4.033180414469699e-06, "loss": 0.2305, "step": 204825 }, { "epoch": 2.01, "grad_norm": 16.172975540161133, "learning_rate": 4.0330562920154505e-06, "loss": 0.1477, "step": 204850 }, { "epoch": 2.01, "grad_norm": 4.170344829559326, "learning_rate": 4.032932169561203e-06, "loss": 0.2292, "step": 204875 }, { "epoch": 2.01, "grad_norm": 7.78208065032959, "learning_rate": 4.032808047106954e-06, "loss": 0.0899, "step": 204900 }, { "epoch": 2.01, "grad_norm": 2.7589011192321777, "learning_rate": 4.032683924652706e-06, "loss": 0.2034, "step": 204925 }, { "epoch": 2.02, "grad_norm": 9.843603134155273, "learning_rate": 4.032559802198457e-06, "loss": 0.1357, "step": 204950 }, { "epoch": 2.02, "grad_norm": 2.949772834777832, "learning_rate": 4.032435679744209e-06, "loss": 0.2318, "step": 204975 }, { "epoch": 2.02, "grad_norm": 14.991121292114258, "learning_rate": 4.03231155728996e-06, "loss": 0.1241, "step": 205000 }, { "epoch": 2.02, "grad_norm": 4.377214431762695, "learning_rate": 4.032187434835712e-06, "loss": 0.151, "step": 205025 }, { "epoch": 2.02, "grad_norm": 10.387436866760254, "learning_rate": 4.032063312381464e-06, "loss": 0.141, "step": 205050 }, { "epoch": 2.02, "grad_norm": 6.080711841583252, "learning_rate": 4.031939189927215e-06, "loss": 0.2152, "step": 205075 }, { "epoch": 2.02, "grad_norm": 22.356712341308594, "learning_rate": 4.031815067472967e-06, "loss": 0.1711, "step": 205100 }, { "epoch": 2.02, "grad_norm": 5.383313179016113, "learning_rate": 4.031690945018718e-06, "loss": 0.2638, "step": 205125 }, { "epoch": 2.02, "grad_norm": 9.923674583435059, "learning_rate": 4.031566822564469e-06, "loss": 0.1496, "step": 205150 }, { "epoch": 2.02, "grad_norm": 3.8644070625305176, "learning_rate": 4.031442700110221e-06, "loss": 0.2098, "step": 205175 }, { "epoch": 2.02, "grad_norm": 21.414640426635742, "learning_rate": 4.031318577655973e-06, "loss": 0.111, "step": 205200 }, { "epoch": 2.02, "grad_norm": 2.641436815261841, "learning_rate": 4.031194455201724e-06, "loss": 0.2221, "step": 205225 }, { "epoch": 2.02, "grad_norm": 7.311573505401611, "learning_rate": 4.031070332747476e-06, "loss": 0.117, "step": 205250 }, { "epoch": 2.02, "grad_norm": 4.679196357727051, "learning_rate": 4.030946210293227e-06, "loss": 0.2176, "step": 205275 }, { "epoch": 2.02, "grad_norm": 11.626893997192383, "learning_rate": 4.030822087838978e-06, "loss": 0.142, "step": 205300 }, { "epoch": 2.02, "grad_norm": 2.867762804031372, "learning_rate": 4.03069796538473e-06, "loss": 0.2307, "step": 205325 }, { "epoch": 2.02, "grad_norm": 19.57225799560547, "learning_rate": 4.0305738429304816e-06, "loss": 0.1236, "step": 205350 }, { "epoch": 2.02, "grad_norm": 1.0374006032943726, "learning_rate": 4.030449720476234e-06, "loss": 0.1772, "step": 205375 }, { "epoch": 2.02, "grad_norm": 9.965255737304688, "learning_rate": 4.030325598021985e-06, "loss": 0.132, "step": 205400 }, { "epoch": 2.02, "grad_norm": 4.261962413787842, "learning_rate": 4.030201475567737e-06, "loss": 0.232, "step": 205425 }, { "epoch": 2.02, "grad_norm": 11.034917831420898, "learning_rate": 4.030077353113488e-06, "loss": 0.1151, "step": 205450 }, { "epoch": 2.02, "grad_norm": 3.2693865299224854, "learning_rate": 4.029953230659239e-06, "loss": 0.2152, "step": 205475 }, { "epoch": 2.02, "grad_norm": 14.668875694274902, "learning_rate": 4.029829108204991e-06, "loss": 0.1539, "step": 205500 }, { "epoch": 2.02, "grad_norm": 3.975269079208374, "learning_rate": 4.0297049857507426e-06, "loss": 0.2125, "step": 205525 }, { "epoch": 2.02, "grad_norm": 7.089271545410156, "learning_rate": 4.029580863296494e-06, "loss": 0.1152, "step": 205550 }, { "epoch": 2.02, "grad_norm": 2.3213133811950684, "learning_rate": 4.029456740842246e-06, "loss": 0.2268, "step": 205575 }, { "epoch": 2.02, "grad_norm": 13.442841529846191, "learning_rate": 4.029332618387997e-06, "loss": 0.1198, "step": 205600 }, { "epoch": 2.02, "grad_norm": 5.15958833694458, "learning_rate": 4.029208495933748e-06, "loss": 0.1969, "step": 205625 }, { "epoch": 2.02, "grad_norm": 9.123557090759277, "learning_rate": 4.0290843734795e-06, "loss": 0.1326, "step": 205650 }, { "epoch": 2.02, "grad_norm": 2.500239133834839, "learning_rate": 4.0289602510252515e-06, "loss": 0.228, "step": 205675 }, { "epoch": 2.02, "grad_norm": 9.45108699798584, "learning_rate": 4.028836128571003e-06, "loss": 0.1306, "step": 205700 }, { "epoch": 2.02, "grad_norm": 8.60934066772461, "learning_rate": 4.028712006116755e-06, "loss": 0.2098, "step": 205725 }, { "epoch": 2.02, "grad_norm": 11.802779197692871, "learning_rate": 4.028587883662506e-06, "loss": 0.1495, "step": 205750 }, { "epoch": 2.02, "grad_norm": 4.552669525146484, "learning_rate": 4.028463761208258e-06, "loss": 0.1795, "step": 205775 }, { "epoch": 2.02, "grad_norm": 7.670668125152588, "learning_rate": 4.028339638754009e-06, "loss": 0.1434, "step": 205800 }, { "epoch": 2.02, "grad_norm": 3.7275116443634033, "learning_rate": 4.028215516299761e-06, "loss": 0.201, "step": 205825 }, { "epoch": 2.02, "grad_norm": 10.051813125610352, "learning_rate": 4.0280913938455126e-06, "loss": 0.0972, "step": 205850 }, { "epoch": 2.02, "grad_norm": 5.668983459472656, "learning_rate": 4.027967271391265e-06, "loss": 0.2174, "step": 205875 }, { "epoch": 2.02, "grad_norm": 8.088471412658691, "learning_rate": 4.027843148937016e-06, "loss": 0.1151, "step": 205900 }, { "epoch": 2.02, "grad_norm": 1.9309965372085571, "learning_rate": 4.027719026482767e-06, "loss": 0.1796, "step": 205925 }, { "epoch": 2.02, "grad_norm": 11.320578575134277, "learning_rate": 4.027594904028519e-06, "loss": 0.1248, "step": 205950 }, { "epoch": 2.03, "grad_norm": 9.48476505279541, "learning_rate": 4.02747078157427e-06, "loss": 0.236, "step": 205975 }, { "epoch": 2.03, "grad_norm": 12.310286521911621, "learning_rate": 4.0273466591200215e-06, "loss": 0.1283, "step": 206000 }, { "epoch": 2.03, "grad_norm": 5.228811264038086, "learning_rate": 4.027222536665774e-06, "loss": 0.1911, "step": 206025 }, { "epoch": 2.03, "grad_norm": 13.606507301330566, "learning_rate": 4.027098414211525e-06, "loss": 0.1107, "step": 206050 }, { "epoch": 2.03, "grad_norm": 2.945345640182495, "learning_rate": 4.026974291757276e-06, "loss": 0.2316, "step": 206075 }, { "epoch": 2.03, "grad_norm": 13.664615631103516, "learning_rate": 4.026850169303028e-06, "loss": 0.141, "step": 206100 }, { "epoch": 2.03, "grad_norm": 2.3768534660339355, "learning_rate": 4.026726046848779e-06, "loss": 0.1635, "step": 206125 }, { "epoch": 2.03, "grad_norm": 13.518872261047363, "learning_rate": 4.0266019243945305e-06, "loss": 0.1357, "step": 206150 }, { "epoch": 2.03, "grad_norm": 5.69960355758667, "learning_rate": 4.0264778019402825e-06, "loss": 0.2166, "step": 206175 }, { "epoch": 2.03, "grad_norm": 10.2103271484375, "learning_rate": 4.026353679486034e-06, "loss": 0.1364, "step": 206200 }, { "epoch": 2.03, "grad_norm": 3.1203606128692627, "learning_rate": 4.026229557031786e-06, "loss": 0.2404, "step": 206225 }, { "epoch": 2.03, "grad_norm": 11.163704872131348, "learning_rate": 4.026105434577537e-06, "loss": 0.1037, "step": 206250 }, { "epoch": 2.03, "grad_norm": 5.11909818649292, "learning_rate": 4.025981312123289e-06, "loss": 0.2031, "step": 206275 }, { "epoch": 2.03, "grad_norm": 13.018593788146973, "learning_rate": 4.02585718966904e-06, "loss": 0.1293, "step": 206300 }, { "epoch": 2.03, "grad_norm": 2.8301737308502197, "learning_rate": 4.0257330672147915e-06, "loss": 0.218, "step": 206325 }, { "epoch": 2.03, "grad_norm": 7.696350574493408, "learning_rate": 4.0256089447605436e-06, "loss": 0.1245, "step": 206350 }, { "epoch": 2.03, "grad_norm": 1.651573657989502, "learning_rate": 4.025484822306295e-06, "loss": 0.2407, "step": 206375 }, { "epoch": 2.03, "grad_norm": 6.709588527679443, "learning_rate": 4.025360699852046e-06, "loss": 0.1274, "step": 206400 }, { "epoch": 2.03, "grad_norm": 3.1489791870117188, "learning_rate": 4.025236577397798e-06, "loss": 0.2068, "step": 206425 }, { "epoch": 2.03, "grad_norm": 13.635674476623535, "learning_rate": 4.025112454943549e-06, "loss": 0.1639, "step": 206450 }, { "epoch": 2.03, "grad_norm": 6.471497535705566, "learning_rate": 4.0249883324893005e-06, "loss": 0.2031, "step": 206475 }, { "epoch": 2.03, "grad_norm": 5.367116451263428, "learning_rate": 4.0248642100350525e-06, "loss": 0.1027, "step": 206500 }, { "epoch": 2.03, "grad_norm": 6.488775730133057, "learning_rate": 4.024740087580804e-06, "loss": 0.2001, "step": 206525 }, { "epoch": 2.03, "grad_norm": 13.469502449035645, "learning_rate": 4.024615965126556e-06, "loss": 0.1215, "step": 206550 }, { "epoch": 2.03, "grad_norm": 3.362861156463623, "learning_rate": 4.024491842672307e-06, "loss": 0.218, "step": 206575 }, { "epoch": 2.03, "grad_norm": 13.687602043151855, "learning_rate": 4.024367720218059e-06, "loss": 0.104, "step": 206600 }, { "epoch": 2.03, "grad_norm": 3.448316812515259, "learning_rate": 4.02424359776381e-06, "loss": 0.1879, "step": 206625 }, { "epoch": 2.03, "grad_norm": 10.4818696975708, "learning_rate": 4.024119475309562e-06, "loss": 0.1396, "step": 206650 }, { "epoch": 2.03, "grad_norm": 4.554089069366455, "learning_rate": 4.0239953528553135e-06, "loss": 0.1993, "step": 206675 }, { "epoch": 2.03, "grad_norm": 7.514374732971191, "learning_rate": 4.023871230401065e-06, "loss": 0.1009, "step": 206700 }, { "epoch": 2.03, "grad_norm": 7.867256164550781, "learning_rate": 4.023747107946817e-06, "loss": 0.2369, "step": 206725 }, { "epoch": 2.03, "grad_norm": 13.632837295532227, "learning_rate": 4.023622985492568e-06, "loss": 0.134, "step": 206750 }, { "epoch": 2.03, "grad_norm": 5.8418450355529785, "learning_rate": 4.023498863038319e-06, "loss": 0.2071, "step": 206775 }, { "epoch": 2.03, "grad_norm": 12.120223045349121, "learning_rate": 4.023374740584071e-06, "loss": 0.1043, "step": 206800 }, { "epoch": 2.03, "grad_norm": 4.029977798461914, "learning_rate": 4.0232506181298225e-06, "loss": 0.211, "step": 206825 }, { "epoch": 2.03, "grad_norm": 7.715822696685791, "learning_rate": 4.023126495675574e-06, "loss": 0.1148, "step": 206850 }, { "epoch": 2.03, "grad_norm": 0.15712247788906097, "learning_rate": 4.023002373221326e-06, "loss": 0.2237, "step": 206875 }, { "epoch": 2.03, "grad_norm": 9.38854694366455, "learning_rate": 4.022878250767077e-06, "loss": 0.1384, "step": 206900 }, { "epoch": 2.03, "grad_norm": 4.802224159240723, "learning_rate": 4.022754128312828e-06, "loss": 0.1529, "step": 206925 }, { "epoch": 2.03, "grad_norm": 15.133809089660645, "learning_rate": 4.02263000585858e-06, "loss": 0.0778, "step": 206950 }, { "epoch": 2.04, "grad_norm": 4.576481819152832, "learning_rate": 4.0225058834043315e-06, "loss": 0.1705, "step": 206975 }, { "epoch": 2.04, "grad_norm": 9.64865493774414, "learning_rate": 4.0223817609500835e-06, "loss": 0.1065, "step": 207000 }, { "epoch": 2.04, "grad_norm": 4.144393444061279, "learning_rate": 4.0222626033940046e-06, "loss": 0.2226, "step": 207025 }, { "epoch": 2.04, "grad_norm": 8.302870750427246, "learning_rate": 4.022138480939757e-06, "loss": 0.1408, "step": 207050 }, { "epoch": 2.04, "grad_norm": 1.4306602478027344, "learning_rate": 4.022014358485508e-06, "loss": 0.1996, "step": 207075 }, { "epoch": 2.04, "grad_norm": 15.939794540405273, "learning_rate": 4.02189023603126e-06, "loss": 0.1492, "step": 207100 }, { "epoch": 2.04, "grad_norm": 2.7916383743286133, "learning_rate": 4.021766113577011e-06, "loss": 0.2316, "step": 207125 }, { "epoch": 2.04, "grad_norm": 8.332763671875, "learning_rate": 4.021641991122762e-06, "loss": 0.1067, "step": 207150 }, { "epoch": 2.04, "grad_norm": 4.874785423278809, "learning_rate": 4.021517868668514e-06, "loss": 0.1964, "step": 207175 }, { "epoch": 2.04, "grad_norm": 5.522751331329346, "learning_rate": 4.021393746214266e-06, "loss": 0.1134, "step": 207200 }, { "epoch": 2.04, "grad_norm": 4.221974849700928, "learning_rate": 4.021269623760017e-06, "loss": 0.2063, "step": 207225 }, { "epoch": 2.04, "grad_norm": 5.883918762207031, "learning_rate": 4.021145501305769e-06, "loss": 0.12, "step": 207250 }, { "epoch": 2.04, "grad_norm": 8.386689186096191, "learning_rate": 4.02102137885152e-06, "loss": 0.1805, "step": 207275 }, { "epoch": 2.04, "grad_norm": 12.07164192199707, "learning_rate": 4.020897256397271e-06, "loss": 0.1092, "step": 207300 }, { "epoch": 2.04, "grad_norm": 5.318277835845947, "learning_rate": 4.020773133943023e-06, "loss": 0.201, "step": 207325 }, { "epoch": 2.04, "grad_norm": 11.369438171386719, "learning_rate": 4.0206490114887745e-06, "loss": 0.1105, "step": 207350 }, { "epoch": 2.04, "grad_norm": 2.6086013317108154, "learning_rate": 4.020524889034526e-06, "loss": 0.2123, "step": 207375 }, { "epoch": 2.04, "grad_norm": 8.918609619140625, "learning_rate": 4.020400766580278e-06, "loss": 0.1216, "step": 207400 }, { "epoch": 2.04, "grad_norm": 6.400620937347412, "learning_rate": 4.020276644126029e-06, "loss": 0.2029, "step": 207425 }, { "epoch": 2.04, "grad_norm": 11.41785717010498, "learning_rate": 4.020152521671781e-06, "loss": 0.1042, "step": 207450 }, { "epoch": 2.04, "grad_norm": 9.081633567810059, "learning_rate": 4.020028399217532e-06, "loss": 0.1934, "step": 207475 }, { "epoch": 2.04, "grad_norm": 5.826053142547607, "learning_rate": 4.019904276763284e-06, "loss": 0.1215, "step": 207500 }, { "epoch": 2.04, "grad_norm": 4.681146621704102, "learning_rate": 4.0197801543090356e-06, "loss": 0.1936, "step": 207525 }, { "epoch": 2.04, "grad_norm": 7.023774147033691, "learning_rate": 4.019656031854788e-06, "loss": 0.104, "step": 207550 }, { "epoch": 2.04, "grad_norm": 3.2070465087890625, "learning_rate": 4.019531909400539e-06, "loss": 0.1758, "step": 207575 }, { "epoch": 2.04, "grad_norm": 12.410598754882812, "learning_rate": 4.01940778694629e-06, "loss": 0.1489, "step": 207600 }, { "epoch": 2.04, "grad_norm": 6.23022985458374, "learning_rate": 4.019283664492041e-06, "loss": 0.1984, "step": 207625 }, { "epoch": 2.04, "grad_norm": 11.486367225646973, "learning_rate": 4.019159542037793e-06, "loss": 0.092, "step": 207650 }, { "epoch": 2.04, "grad_norm": 4.784128665924072, "learning_rate": 4.0190354195835445e-06, "loss": 0.1741, "step": 207675 }, { "epoch": 2.04, "grad_norm": 12.21338176727295, "learning_rate": 4.018911297129296e-06, "loss": 0.1229, "step": 207700 }, { "epoch": 2.04, "grad_norm": 2.1533029079437256, "learning_rate": 4.018787174675048e-06, "loss": 0.1758, "step": 207725 }, { "epoch": 2.04, "grad_norm": 6.4708571434021, "learning_rate": 4.018663052220799e-06, "loss": 0.1286, "step": 207750 }, { "epoch": 2.04, "grad_norm": 3.6685805320739746, "learning_rate": 4.01853892976655e-06, "loss": 0.197, "step": 207775 }, { "epoch": 2.04, "grad_norm": 11.995023727416992, "learning_rate": 4.018414807312302e-06, "loss": 0.1637, "step": 207800 }, { "epoch": 2.04, "grad_norm": 4.777711868286133, "learning_rate": 4.0182906848580535e-06, "loss": 0.2471, "step": 207825 }, { "epoch": 2.04, "grad_norm": 7.119612216949463, "learning_rate": 4.0181665624038056e-06, "loss": 0.0918, "step": 207850 }, { "epoch": 2.04, "grad_norm": 4.862934589385986, "learning_rate": 4.018042439949557e-06, "loss": 0.2344, "step": 207875 }, { "epoch": 2.04, "grad_norm": 9.768475532531738, "learning_rate": 4.017918317495309e-06, "loss": 0.1113, "step": 207900 }, { "epoch": 2.04, "grad_norm": 10.108806610107422, "learning_rate": 4.01779419504106e-06, "loss": 0.1629, "step": 207925 }, { "epoch": 2.04, "grad_norm": 10.5574312210083, "learning_rate": 4.017670072586812e-06, "loss": 0.125, "step": 207950 }, { "epoch": 2.04, "grad_norm": 6.136951923370361, "learning_rate": 4.017545950132563e-06, "loss": 0.2462, "step": 207975 }, { "epoch": 2.05, "grad_norm": 11.736515045166016, "learning_rate": 4.0174218276783145e-06, "loss": 0.1254, "step": 208000 }, { "epoch": 2.05, "grad_norm": 2.6208691596984863, "learning_rate": 4.0172977052240666e-06, "loss": 0.1835, "step": 208025 }, { "epoch": 2.05, "grad_norm": 8.652613639831543, "learning_rate": 4.017173582769818e-06, "loss": 0.1055, "step": 208050 }, { "epoch": 2.05, "grad_norm": 2.035630464553833, "learning_rate": 4.017049460315569e-06, "loss": 0.1722, "step": 208075 }, { "epoch": 2.05, "grad_norm": 14.098589897155762, "learning_rate": 4.016925337861321e-06, "loss": 0.1093, "step": 208100 }, { "epoch": 2.05, "grad_norm": 2.8440330028533936, "learning_rate": 4.016801215407072e-06, "loss": 0.2312, "step": 208125 }, { "epoch": 2.05, "grad_norm": 9.737565040588379, "learning_rate": 4.0166770929528235e-06, "loss": 0.1068, "step": 208150 }, { "epoch": 2.05, "grad_norm": 6.33320426940918, "learning_rate": 4.0165529704985755e-06, "loss": 0.2098, "step": 208175 }, { "epoch": 2.05, "grad_norm": 12.71847152709961, "learning_rate": 4.016428848044327e-06, "loss": 0.1313, "step": 208200 }, { "epoch": 2.05, "grad_norm": 2.2341227531433105, "learning_rate": 4.016304725590078e-06, "loss": 0.1842, "step": 208225 }, { "epoch": 2.05, "grad_norm": 8.081509590148926, "learning_rate": 4.01618060313583e-06, "loss": 0.1109, "step": 208250 }, { "epoch": 2.05, "grad_norm": 4.037906646728516, "learning_rate": 4.016056480681581e-06, "loss": 0.1896, "step": 208275 }, { "epoch": 2.05, "grad_norm": 22.5207462310791, "learning_rate": 4.015932358227333e-06, "loss": 0.137, "step": 208300 }, { "epoch": 2.05, "grad_norm": 4.642852783203125, "learning_rate": 4.0158082357730845e-06, "loss": 0.2193, "step": 208325 }, { "epoch": 2.05, "grad_norm": 6.855506420135498, "learning_rate": 4.0156841133188366e-06, "loss": 0.1179, "step": 208350 }, { "epoch": 2.05, "grad_norm": 5.240880966186523, "learning_rate": 4.015559990864588e-06, "loss": 0.1846, "step": 208375 }, { "epoch": 2.05, "grad_norm": 14.909170150756836, "learning_rate": 4.01543586841034e-06, "loss": 0.1056, "step": 208400 }, { "epoch": 2.05, "grad_norm": 5.126863956451416, "learning_rate": 4.015311745956091e-06, "loss": 0.2137, "step": 208425 }, { "epoch": 2.05, "grad_norm": 8.129748344421387, "learning_rate": 4.015187623501842e-06, "loss": 0.1065, "step": 208450 }, { "epoch": 2.05, "grad_norm": 5.267970085144043, "learning_rate": 4.0150635010475935e-06, "loss": 0.2199, "step": 208475 }, { "epoch": 2.05, "grad_norm": 10.011208534240723, "learning_rate": 4.0149393785933455e-06, "loss": 0.1114, "step": 208500 }, { "epoch": 2.05, "grad_norm": 2.8102235794067383, "learning_rate": 4.014815256139097e-06, "loss": 0.2154, "step": 208525 }, { "epoch": 2.05, "grad_norm": 11.846223831176758, "learning_rate": 4.014691133684848e-06, "loss": 0.1178, "step": 208550 }, { "epoch": 2.05, "grad_norm": 3.950212240219116, "learning_rate": 4.0145670112306e-06, "loss": 0.1778, "step": 208575 }, { "epoch": 2.05, "grad_norm": 13.155826568603516, "learning_rate": 4.014442888776351e-06, "loss": 0.1116, "step": 208600 }, { "epoch": 2.05, "grad_norm": 4.33406400680542, "learning_rate": 4.014318766322102e-06, "loss": 0.1992, "step": 208625 }, { "epoch": 2.05, "grad_norm": 13.183965682983398, "learning_rate": 4.0141946438678545e-06, "loss": 0.1272, "step": 208650 }, { "epoch": 2.05, "grad_norm": 3.1886162757873535, "learning_rate": 4.014070521413606e-06, "loss": 0.2152, "step": 208675 }, { "epoch": 2.05, "grad_norm": 12.73622989654541, "learning_rate": 4.013946398959358e-06, "loss": 0.093, "step": 208700 }, { "epoch": 2.05, "grad_norm": 6.022646427154541, "learning_rate": 4.013822276505109e-06, "loss": 0.223, "step": 208725 }, { "epoch": 2.05, "grad_norm": 5.762898921966553, "learning_rate": 4.013698154050861e-06, "loss": 0.1055, "step": 208750 }, { "epoch": 2.05, "grad_norm": 1.644355297088623, "learning_rate": 4.013574031596612e-06, "loss": 0.1682, "step": 208775 }, { "epoch": 2.05, "grad_norm": 13.029213905334473, "learning_rate": 4.013449909142364e-06, "loss": 0.1376, "step": 208800 }, { "epoch": 2.05, "grad_norm": 2.860520362854004, "learning_rate": 4.0133257866881155e-06, "loss": 0.2133, "step": 208825 }, { "epoch": 2.05, "grad_norm": 14.873495101928711, "learning_rate": 4.013201664233867e-06, "loss": 0.1161, "step": 208850 }, { "epoch": 2.05, "grad_norm": 6.427549362182617, "learning_rate": 4.013077541779619e-06, "loss": 0.2015, "step": 208875 }, { "epoch": 2.05, "grad_norm": 8.318385124206543, "learning_rate": 4.01295341932537e-06, "loss": 0.1369, "step": 208900 }, { "epoch": 2.05, "grad_norm": 1.4049407243728638, "learning_rate": 4.012829296871121e-06, "loss": 0.1959, "step": 208925 }, { "epoch": 2.05, "grad_norm": 11.17520809173584, "learning_rate": 4.012705174416873e-06, "loss": 0.1269, "step": 208950 }, { "epoch": 2.05, "grad_norm": 4.70074987411499, "learning_rate": 4.0125810519626245e-06, "loss": 0.2246, "step": 208975 }, { "epoch": 2.05, "grad_norm": 16.320415496826172, "learning_rate": 4.012456929508376e-06, "loss": 0.1088, "step": 209000 }, { "epoch": 2.06, "grad_norm": 6.570969104766846, "learning_rate": 4.012332807054128e-06, "loss": 0.2112, "step": 209025 }, { "epoch": 2.06, "grad_norm": 16.48440933227539, "learning_rate": 4.012208684599879e-06, "loss": 0.1108, "step": 209050 }, { "epoch": 2.06, "grad_norm": 6.477974891662598, "learning_rate": 4.01208456214563e-06, "loss": 0.1929, "step": 209075 }, { "epoch": 2.06, "grad_norm": 19.227581024169922, "learning_rate": 4.011960439691382e-06, "loss": 0.1247, "step": 209100 }, { "epoch": 2.06, "grad_norm": 2.687790870666504, "learning_rate": 4.011841282135303e-06, "loss": 0.1865, "step": 209125 }, { "epoch": 2.06, "grad_norm": 6.470534801483154, "learning_rate": 4.011717159681055e-06, "loss": 0.1325, "step": 209150 }, { "epoch": 2.06, "grad_norm": 3.575446605682373, "learning_rate": 4.0115930372268065e-06, "loss": 0.1919, "step": 209175 }, { "epoch": 2.06, "grad_norm": 17.26609230041504, "learning_rate": 4.0114689147725586e-06, "loss": 0.1315, "step": 209200 }, { "epoch": 2.06, "grad_norm": 7.488041877746582, "learning_rate": 4.01134479231831e-06, "loss": 0.2149, "step": 209225 }, { "epoch": 2.06, "grad_norm": 13.035646438598633, "learning_rate": 4.011220669864062e-06, "loss": 0.134, "step": 209250 }, { "epoch": 2.06, "grad_norm": 4.346345901489258, "learning_rate": 4.011096547409813e-06, "loss": 0.235, "step": 209275 }, { "epoch": 2.06, "grad_norm": 10.742890357971191, "learning_rate": 4.010972424955564e-06, "loss": 0.1233, "step": 209300 }, { "epoch": 2.06, "grad_norm": 4.046631336212158, "learning_rate": 4.010848302501316e-06, "loss": 0.2516, "step": 209325 }, { "epoch": 2.06, "grad_norm": 13.377188682556152, "learning_rate": 4.0107241800470675e-06, "loss": 0.1439, "step": 209350 }, { "epoch": 2.06, "grad_norm": 5.539435863494873, "learning_rate": 4.010600057592819e-06, "loss": 0.2173, "step": 209375 }, { "epoch": 2.06, "grad_norm": 7.516229629516602, "learning_rate": 4.010475935138571e-06, "loss": 0.1015, "step": 209400 }, { "epoch": 2.06, "grad_norm": 2.7310264110565186, "learning_rate": 4.010351812684322e-06, "loss": 0.2528, "step": 209425 }, { "epoch": 2.06, "grad_norm": 9.528827667236328, "learning_rate": 4.010227690230073e-06, "loss": 0.099, "step": 209450 }, { "epoch": 2.06, "grad_norm": 9.57759952545166, "learning_rate": 4.010103567775825e-06, "loss": 0.2343, "step": 209475 }, { "epoch": 2.06, "grad_norm": 12.48453140258789, "learning_rate": 4.0099794453215765e-06, "loss": 0.1217, "step": 209500 }, { "epoch": 2.06, "grad_norm": 4.719654560089111, "learning_rate": 4.0098553228673286e-06, "loss": 0.2111, "step": 209525 }, { "epoch": 2.06, "grad_norm": 13.29078197479248, "learning_rate": 4.00973120041308e-06, "loss": 0.146, "step": 209550 }, { "epoch": 2.06, "grad_norm": 1.2265101671218872, "learning_rate": 4.009607077958832e-06, "loss": 0.2469, "step": 209575 }, { "epoch": 2.06, "grad_norm": 9.934883117675781, "learning_rate": 4.009482955504583e-06, "loss": 0.1251, "step": 209600 }, { "epoch": 2.06, "grad_norm": 1.7729666233062744, "learning_rate": 4.009358833050335e-06, "loss": 0.2027, "step": 209625 }, { "epoch": 2.06, "grad_norm": 10.08787727355957, "learning_rate": 4.009234710596086e-06, "loss": 0.1155, "step": 209650 }, { "epoch": 2.06, "grad_norm": 6.871694564819336, "learning_rate": 4.0091105881418375e-06, "loss": 0.1919, "step": 209675 }, { "epoch": 2.06, "grad_norm": 14.230572700500488, "learning_rate": 4.00898646568759e-06, "loss": 0.1743, "step": 209700 }, { "epoch": 2.06, "grad_norm": 1.913960337638855, "learning_rate": 4.008862343233341e-06, "loss": 0.2009, "step": 209725 }, { "epoch": 2.06, "grad_norm": 8.336867332458496, "learning_rate": 4.008738220779092e-06, "loss": 0.112, "step": 209750 }, { "epoch": 2.06, "grad_norm": 0.6376567482948303, "learning_rate": 4.008614098324844e-06, "loss": 0.2559, "step": 209775 }, { "epoch": 2.06, "grad_norm": 8.037951469421387, "learning_rate": 4.008489975870595e-06, "loss": 0.132, "step": 209800 }, { "epoch": 2.06, "grad_norm": 0.9785881638526917, "learning_rate": 4.0083658534163465e-06, "loss": 0.2063, "step": 209825 }, { "epoch": 2.06, "grad_norm": 12.511580467224121, "learning_rate": 4.0082417309620985e-06, "loss": 0.125, "step": 209850 }, { "epoch": 2.06, "grad_norm": 0.3438396751880646, "learning_rate": 4.00811760850785e-06, "loss": 0.2358, "step": 209875 }, { "epoch": 2.06, "grad_norm": 10.527846336364746, "learning_rate": 4.007993486053601e-06, "loss": 0.1194, "step": 209900 }, { "epoch": 2.06, "grad_norm": 3.4217891693115234, "learning_rate": 4.007869363599353e-06, "loss": 0.1605, "step": 209925 }, { "epoch": 2.06, "grad_norm": 8.322744369506836, "learning_rate": 4.007745241145104e-06, "loss": 0.1117, "step": 209950 }, { "epoch": 2.06, "grad_norm": 5.536486625671387, "learning_rate": 4.007621118690856e-06, "loss": 0.203, "step": 209975 }, { "epoch": 2.06, "grad_norm": 13.510151863098145, "learning_rate": 4.0074969962366075e-06, "loss": 0.1103, "step": 210000 }, { "epoch": 2.07, "grad_norm": 5.03897762298584, "learning_rate": 4.0073728737823596e-06, "loss": 0.189, "step": 210025 }, { "epoch": 2.07, "grad_norm": 14.147541999816895, "learning_rate": 4.007248751328111e-06, "loss": 0.1341, "step": 210050 }, { "epoch": 2.07, "grad_norm": 3.3066585063934326, "learning_rate": 4.007124628873862e-06, "loss": 0.1963, "step": 210075 }, { "epoch": 2.07, "grad_norm": 18.262327194213867, "learning_rate": 4.007000506419614e-06, "loss": 0.1252, "step": 210100 }, { "epoch": 2.07, "grad_norm": 6.736450672149658, "learning_rate": 4.006876383965365e-06, "loss": 0.2378, "step": 210125 }, { "epoch": 2.07, "grad_norm": 7.53901481628418, "learning_rate": 4.0067522615111165e-06, "loss": 0.1097, "step": 210150 }, { "epoch": 2.07, "grad_norm": 3.7917370796203613, "learning_rate": 4.0066281390568685e-06, "loss": 0.1806, "step": 210175 }, { "epoch": 2.07, "grad_norm": 11.486066818237305, "learning_rate": 4.00650401660262e-06, "loss": 0.1441, "step": 210200 }, { "epoch": 2.07, "grad_norm": 5.8077874183654785, "learning_rate": 4.006379894148371e-06, "loss": 0.2302, "step": 210225 }, { "epoch": 2.07, "grad_norm": 13.337883949279785, "learning_rate": 4.006255771694123e-06, "loss": 0.1406, "step": 210250 }, { "epoch": 2.07, "grad_norm": 3.602874755859375, "learning_rate": 4.006131649239874e-06, "loss": 0.2263, "step": 210275 }, { "epoch": 2.07, "grad_norm": 12.595222473144531, "learning_rate": 4.0060075267856254e-06, "loss": 0.1395, "step": 210300 }, { "epoch": 2.07, "grad_norm": 7.9685821533203125, "learning_rate": 4.0058834043313775e-06, "loss": 0.2487, "step": 210325 }, { "epoch": 2.07, "grad_norm": 4.438198566436768, "learning_rate": 4.005759281877129e-06, "loss": 0.1208, "step": 210350 }, { "epoch": 2.07, "grad_norm": 1.4490057229995728, "learning_rate": 4.005635159422881e-06, "loss": 0.2079, "step": 210375 }, { "epoch": 2.07, "grad_norm": 12.12069034576416, "learning_rate": 4.005511036968632e-06, "loss": 0.1112, "step": 210400 }, { "epoch": 2.07, "grad_norm": 1.0853214263916016, "learning_rate": 4.005386914514384e-06, "loss": 0.2317, "step": 210425 }, { "epoch": 2.07, "grad_norm": 9.849432945251465, "learning_rate": 4.005262792060135e-06, "loss": 0.1105, "step": 210450 }, { "epoch": 2.07, "grad_norm": 1.9890305995941162, "learning_rate": 4.005138669605887e-06, "loss": 0.2402, "step": 210475 }, { "epoch": 2.07, "grad_norm": 9.456913948059082, "learning_rate": 4.0050145471516385e-06, "loss": 0.1268, "step": 210500 }, { "epoch": 2.07, "grad_norm": 4.802297115325928, "learning_rate": 4.00489042469739e-06, "loss": 0.1854, "step": 210525 }, { "epoch": 2.07, "grad_norm": 18.708892822265625, "learning_rate": 4.004766302243142e-06, "loss": 0.1567, "step": 210550 }, { "epoch": 2.07, "grad_norm": 4.378236293792725, "learning_rate": 4.004642179788893e-06, "loss": 0.2429, "step": 210575 }, { "epoch": 2.07, "grad_norm": 15.268924713134766, "learning_rate": 4.004518057334644e-06, "loss": 0.119, "step": 210600 }, { "epoch": 2.07, "grad_norm": 1.6387250423431396, "learning_rate": 4.004393934880396e-06, "loss": 0.2548, "step": 210625 }, { "epoch": 2.07, "grad_norm": 10.635870933532715, "learning_rate": 4.0042698124261475e-06, "loss": 0.1005, "step": 210650 }, { "epoch": 2.07, "grad_norm": 10.846902847290039, "learning_rate": 4.004145689971899e-06, "loss": 0.1857, "step": 210675 }, { "epoch": 2.07, "grad_norm": 28.031099319458008, "learning_rate": 4.004021567517651e-06, "loss": 0.1279, "step": 210700 }, { "epoch": 2.07, "grad_norm": 5.127678394317627, "learning_rate": 4.003897445063402e-06, "loss": 0.2121, "step": 210725 }, { "epoch": 2.07, "grad_norm": 13.79767894744873, "learning_rate": 4.003773322609153e-06, "loss": 0.1345, "step": 210750 }, { "epoch": 2.07, "grad_norm": 2.894991874694824, "learning_rate": 4.003649200154905e-06, "loss": 0.1597, "step": 210775 }, { "epoch": 2.07, "grad_norm": 13.488115310668945, "learning_rate": 4.0035250777006564e-06, "loss": 0.1351, "step": 210800 }, { "epoch": 2.07, "grad_norm": 5.641576766967773, "learning_rate": 4.0034009552464085e-06, "loss": 0.234, "step": 210825 }, { "epoch": 2.07, "grad_norm": 10.985048294067383, "learning_rate": 4.00327683279216e-06, "loss": 0.105, "step": 210850 }, { "epoch": 2.07, "grad_norm": 4.237049102783203, "learning_rate": 4.003152710337912e-06, "loss": 0.2099, "step": 210875 }, { "epoch": 2.07, "grad_norm": 12.697392463684082, "learning_rate": 4.003028587883663e-06, "loss": 0.1472, "step": 210900 }, { "epoch": 2.07, "grad_norm": 7.4509053230285645, "learning_rate": 4.002904465429414e-06, "loss": 0.1934, "step": 210925 }, { "epoch": 2.07, "grad_norm": 11.340929985046387, "learning_rate": 4.002780342975166e-06, "loss": 0.1529, "step": 210950 }, { "epoch": 2.07, "grad_norm": 3.0974974632263184, "learning_rate": 4.0026562205209175e-06, "loss": 0.2205, "step": 210975 }, { "epoch": 2.07, "grad_norm": 9.455810546875, "learning_rate": 4.002532098066669e-06, "loss": 0.0879, "step": 211000 }, { "epoch": 2.07, "grad_norm": 4.598367691040039, "learning_rate": 4.002407975612421e-06, "loss": 0.1941, "step": 211025 }, { "epoch": 2.08, "grad_norm": 8.019725799560547, "learning_rate": 4.002283853158172e-06, "loss": 0.1081, "step": 211050 }, { "epoch": 2.08, "grad_norm": 4.175759792327881, "learning_rate": 4.002159730703923e-06, "loss": 0.1912, "step": 211075 }, { "epoch": 2.08, "grad_norm": 9.01654052734375, "learning_rate": 4.002035608249675e-06, "loss": 0.131, "step": 211100 }, { "epoch": 2.08, "grad_norm": 3.980498790740967, "learning_rate": 4.001911485795426e-06, "loss": 0.2006, "step": 211125 }, { "epoch": 2.08, "grad_norm": 2.785858154296875, "learning_rate": 4.001787363341178e-06, "loss": 0.0947, "step": 211150 }, { "epoch": 2.08, "grad_norm": 5.144773960113525, "learning_rate": 4.00166324088693e-06, "loss": 0.1775, "step": 211175 }, { "epoch": 2.08, "grad_norm": 14.391077041625977, "learning_rate": 4.001539118432681e-06, "loss": 0.1148, "step": 211200 }, { "epoch": 2.08, "grad_norm": 2.858191967010498, "learning_rate": 4.001414995978433e-06, "loss": 0.2979, "step": 211225 }, { "epoch": 2.08, "grad_norm": 12.944182395935059, "learning_rate": 4.001290873524184e-06, "loss": 0.1295, "step": 211250 }, { "epoch": 2.08, "grad_norm": 5.867774486541748, "learning_rate": 4.001166751069936e-06, "loss": 0.2284, "step": 211275 }, { "epoch": 2.08, "grad_norm": 10.466052055358887, "learning_rate": 4.0010426286156874e-06, "loss": 0.0918, "step": 211300 }, { "epoch": 2.08, "grad_norm": 3.2792255878448486, "learning_rate": 4.0009185061614395e-06, "loss": 0.2305, "step": 211325 }, { "epoch": 2.08, "grad_norm": 7.616727352142334, "learning_rate": 4.000794383707191e-06, "loss": 0.1104, "step": 211350 }, { "epoch": 2.08, "grad_norm": 10.480356216430664, "learning_rate": 4.000670261252942e-06, "loss": 0.1967, "step": 211375 }, { "epoch": 2.08, "grad_norm": 10.251680374145508, "learning_rate": 4.000546138798694e-06, "loss": 0.1123, "step": 211400 }, { "epoch": 2.08, "grad_norm": 8.606858253479004, "learning_rate": 4.000422016344445e-06, "loss": 0.2138, "step": 211425 }, { "epoch": 2.08, "grad_norm": 7.116175174713135, "learning_rate": 4.000297893890196e-06, "loss": 0.108, "step": 211450 }, { "epoch": 2.08, "grad_norm": 5.773708343505859, "learning_rate": 4.0001737714359485e-06, "loss": 0.1965, "step": 211475 }, { "epoch": 2.08, "grad_norm": 11.483830451965332, "learning_rate": 4.0000496489817e-06, "loss": 0.1414, "step": 211500 }, { "epoch": 2.08, "grad_norm": 1.3332728147506714, "learning_rate": 3.999925526527451e-06, "loss": 0.2014, "step": 211525 }, { "epoch": 2.08, "grad_norm": 7.147753715515137, "learning_rate": 3.999801404073203e-06, "loss": 0.1545, "step": 211550 }, { "epoch": 2.08, "grad_norm": 3.4009206295013428, "learning_rate": 3.999677281618954e-06, "loss": 0.197, "step": 211575 }, { "epoch": 2.08, "grad_norm": 13.112139701843262, "learning_rate": 3.999553159164705e-06, "loss": 0.126, "step": 211600 }, { "epoch": 2.08, "grad_norm": 8.21094036102295, "learning_rate": 3.999434001608627e-06, "loss": 0.2156, "step": 211625 }, { "epoch": 2.08, "grad_norm": 10.352110862731934, "learning_rate": 3.9993098791543785e-06, "loss": 0.1093, "step": 211650 }, { "epoch": 2.08, "grad_norm": 4.3419623374938965, "learning_rate": 3.9991857567001305e-06, "loss": 0.2748, "step": 211675 }, { "epoch": 2.08, "grad_norm": 7.411513328552246, "learning_rate": 3.999061634245882e-06, "loss": 0.1028, "step": 211700 }, { "epoch": 2.08, "grad_norm": 7.895609378814697, "learning_rate": 3.998937511791634e-06, "loss": 0.1824, "step": 211725 }, { "epoch": 2.08, "grad_norm": 12.924392700195312, "learning_rate": 3.998813389337385e-06, "loss": 0.1145, "step": 211750 }, { "epoch": 2.08, "grad_norm": 3.2090299129486084, "learning_rate": 3.998689266883137e-06, "loss": 0.1856, "step": 211775 }, { "epoch": 2.08, "grad_norm": 14.481351852416992, "learning_rate": 3.998565144428888e-06, "loss": 0.1087, "step": 211800 }, { "epoch": 2.08, "grad_norm": 4.015239238739014, "learning_rate": 3.9984410219746395e-06, "loss": 0.2336, "step": 211825 }, { "epoch": 2.08, "grad_norm": 11.349912643432617, "learning_rate": 3.9983168995203915e-06, "loss": 0.1081, "step": 211850 }, { "epoch": 2.08, "grad_norm": 8.032912254333496, "learning_rate": 3.998192777066143e-06, "loss": 0.2262, "step": 211875 }, { "epoch": 2.08, "grad_norm": 8.698028564453125, "learning_rate": 3.998068654611894e-06, "loss": 0.1152, "step": 211900 }, { "epoch": 2.08, "grad_norm": 8.869747161865234, "learning_rate": 3.997944532157646e-06, "loss": 0.2102, "step": 211925 }, { "epoch": 2.08, "grad_norm": 7.774245262145996, "learning_rate": 3.997820409703397e-06, "loss": 0.1195, "step": 211950 }, { "epoch": 2.08, "grad_norm": 7.442229270935059, "learning_rate": 3.9976962872491484e-06, "loss": 0.2458, "step": 211975 }, { "epoch": 2.08, "grad_norm": 14.698331832885742, "learning_rate": 3.9975721647949005e-06, "loss": 0.1511, "step": 212000 }, { "epoch": 2.08, "grad_norm": 5.433886528015137, "learning_rate": 3.997448042340652e-06, "loss": 0.2012, "step": 212025 }, { "epoch": 2.08, "grad_norm": 10.69666576385498, "learning_rate": 3.997323919886403e-06, "loss": 0.1436, "step": 212050 }, { "epoch": 2.09, "grad_norm": 4.130741596221924, "learning_rate": 3.997199797432155e-06, "loss": 0.1798, "step": 212075 }, { "epoch": 2.09, "grad_norm": 8.022866249084473, "learning_rate": 3.997075674977906e-06, "loss": 0.0908, "step": 212100 }, { "epoch": 2.09, "grad_norm": 1.8471262454986572, "learning_rate": 3.996951552523658e-06, "loss": 0.202, "step": 212125 }, { "epoch": 2.09, "grad_norm": 14.306748390197754, "learning_rate": 3.9968274300694095e-06, "loss": 0.1562, "step": 212150 }, { "epoch": 2.09, "grad_norm": 3.0472044944763184, "learning_rate": 3.9967033076151615e-06, "loss": 0.2016, "step": 212175 }, { "epoch": 2.09, "grad_norm": 4.965055465698242, "learning_rate": 3.996579185160913e-06, "loss": 0.115, "step": 212200 }, { "epoch": 2.09, "grad_norm": 2.610067129135132, "learning_rate": 3.996455062706664e-06, "loss": 0.1678, "step": 212225 }, { "epoch": 2.09, "grad_norm": 8.395315170288086, "learning_rate": 3.996330940252416e-06, "loss": 0.1223, "step": 212250 }, { "epoch": 2.09, "grad_norm": 5.570521354675293, "learning_rate": 3.996206817798167e-06, "loss": 0.1845, "step": 212275 }, { "epoch": 2.09, "grad_norm": 11.070240020751953, "learning_rate": 3.996082695343918e-06, "loss": 0.1169, "step": 212300 }, { "epoch": 2.09, "grad_norm": 6.844172477722168, "learning_rate": 3.9959585728896705e-06, "loss": 0.1866, "step": 212325 }, { "epoch": 2.09, "grad_norm": 5.895583152770996, "learning_rate": 3.995834450435422e-06, "loss": 0.1255, "step": 212350 }, { "epoch": 2.09, "grad_norm": 3.8086326122283936, "learning_rate": 3.995710327981173e-06, "loss": 0.2065, "step": 212375 }, { "epoch": 2.09, "grad_norm": 4.541388988494873, "learning_rate": 3.995586205526925e-06, "loss": 0.1378, "step": 212400 }, { "epoch": 2.09, "grad_norm": 5.767416954040527, "learning_rate": 3.995462083072676e-06, "loss": 0.2272, "step": 212425 }, { "epoch": 2.09, "grad_norm": 8.486129760742188, "learning_rate": 3.995337960618428e-06, "loss": 0.1196, "step": 212450 }, { "epoch": 2.09, "grad_norm": 6.069565773010254, "learning_rate": 3.9952138381641794e-06, "loss": 0.2281, "step": 212475 }, { "epoch": 2.09, "grad_norm": 17.297212600708008, "learning_rate": 3.9950897157099315e-06, "loss": 0.1148, "step": 212500 }, { "epoch": 2.09, "grad_norm": 7.517704010009766, "learning_rate": 3.994965593255683e-06, "loss": 0.2128, "step": 212525 }, { "epoch": 2.09, "grad_norm": 7.062537670135498, "learning_rate": 3.994841470801435e-06, "loss": 0.0983, "step": 212550 }, { "epoch": 2.09, "grad_norm": 1.3401379585266113, "learning_rate": 3.994717348347186e-06, "loss": 0.1872, "step": 212575 }, { "epoch": 2.09, "grad_norm": 14.01573371887207, "learning_rate": 3.994593225892937e-06, "loss": 0.0889, "step": 212600 }, { "epoch": 2.09, "grad_norm": 4.889484405517578, "learning_rate": 3.994469103438689e-06, "loss": 0.2263, "step": 212625 }, { "epoch": 2.09, "grad_norm": 4.339841842651367, "learning_rate": 3.9943449809844405e-06, "loss": 0.1083, "step": 212650 }, { "epoch": 2.09, "grad_norm": 5.610559940338135, "learning_rate": 3.994220858530192e-06, "loss": 0.2064, "step": 212675 }, { "epoch": 2.09, "grad_norm": 16.24614715576172, "learning_rate": 3.994096736075944e-06, "loss": 0.0991, "step": 212700 }, { "epoch": 2.09, "grad_norm": 3.65720796585083, "learning_rate": 3.993972613621695e-06, "loss": 0.2051, "step": 212725 }, { "epoch": 2.09, "grad_norm": 26.659381866455078, "learning_rate": 3.993848491167446e-06, "loss": 0.1192, "step": 212750 }, { "epoch": 2.09, "grad_norm": 3.351757526397705, "learning_rate": 3.993724368713198e-06, "loss": 0.2159, "step": 212775 }, { "epoch": 2.09, "grad_norm": 8.931954383850098, "learning_rate": 3.9936002462589494e-06, "loss": 0.1437, "step": 212800 }, { "epoch": 2.09, "grad_norm": 0.9119071364402771, "learning_rate": 3.993476123804701e-06, "loss": 0.1902, "step": 212825 }, { "epoch": 2.09, "grad_norm": 10.96212100982666, "learning_rate": 3.993352001350453e-06, "loss": 0.1241, "step": 212850 }, { "epoch": 2.09, "grad_norm": 0.8493523001670837, "learning_rate": 3.993227878896204e-06, "loss": 0.2045, "step": 212875 }, { "epoch": 2.09, "grad_norm": 8.662275314331055, "learning_rate": 3.993103756441956e-06, "loss": 0.1311, "step": 212900 }, { "epoch": 2.09, "grad_norm": 5.044004917144775, "learning_rate": 3.992979633987707e-06, "loss": 0.2026, "step": 212925 }, { "epoch": 2.09, "grad_norm": 12.646841049194336, "learning_rate": 3.992855511533459e-06, "loss": 0.0989, "step": 212950 }, { "epoch": 2.09, "grad_norm": 5.579813003540039, "learning_rate": 3.9927313890792104e-06, "loss": 0.2108, "step": 212975 }, { "epoch": 2.09, "grad_norm": 10.250510215759277, "learning_rate": 3.9926072666249625e-06, "loss": 0.135, "step": 213000 }, { "epoch": 2.09, "grad_norm": 3.772258996963501, "learning_rate": 3.992483144170714e-06, "loss": 0.2341, "step": 213025 }, { "epoch": 2.09, "grad_norm": 10.858160972595215, "learning_rate": 3.992359021716465e-06, "loss": 0.1434, "step": 213050 }, { "epoch": 2.09, "grad_norm": 6.071061134338379, "learning_rate": 3.992234899262216e-06, "loss": 0.2148, "step": 213075 }, { "epoch": 2.1, "grad_norm": 6.792096138000488, "learning_rate": 3.992110776807968e-06, "loss": 0.1176, "step": 213100 }, { "epoch": 2.1, "grad_norm": 6.665597438812256, "learning_rate": 3.991986654353719e-06, "loss": 0.2363, "step": 213125 }, { "epoch": 2.1, "grad_norm": 10.48482608795166, "learning_rate": 3.991862531899471e-06, "loss": 0.1267, "step": 213150 }, { "epoch": 2.1, "grad_norm": 4.91733980178833, "learning_rate": 3.991738409445223e-06, "loss": 0.1755, "step": 213175 }, { "epoch": 2.1, "grad_norm": 21.208515167236328, "learning_rate": 3.991614286990974e-06, "loss": 0.1296, "step": 213200 }, { "epoch": 2.1, "grad_norm": 1.7547940015792847, "learning_rate": 3.991490164536725e-06, "loss": 0.2011, "step": 213225 }, { "epoch": 2.1, "grad_norm": 16.26073455810547, "learning_rate": 3.991366042082477e-06, "loss": 0.1258, "step": 213250 }, { "epoch": 2.1, "grad_norm": 4.651358127593994, "learning_rate": 3.991241919628228e-06, "loss": 0.2257, "step": 213275 }, { "epoch": 2.1, "grad_norm": 1.7517958879470825, "learning_rate": 3.9911177971739804e-06, "loss": 0.1311, "step": 213300 }, { "epoch": 2.1, "grad_norm": 4.0862603187561035, "learning_rate": 3.990993674719732e-06, "loss": 0.247, "step": 213325 }, { "epoch": 2.1, "grad_norm": 8.087075233459473, "learning_rate": 3.990869552265484e-06, "loss": 0.1212, "step": 213350 }, { "epoch": 2.1, "grad_norm": 2.481717586517334, "learning_rate": 3.990745429811235e-06, "loss": 0.2147, "step": 213375 }, { "epoch": 2.1, "grad_norm": 6.698223114013672, "learning_rate": 3.990621307356987e-06, "loss": 0.1398, "step": 213400 }, { "epoch": 2.1, "grad_norm": 12.256879806518555, "learning_rate": 3.990497184902738e-06, "loss": 0.2098, "step": 213425 }, { "epoch": 2.1, "grad_norm": 13.299895286560059, "learning_rate": 3.990373062448489e-06, "loss": 0.1359, "step": 213450 }, { "epoch": 2.1, "grad_norm": 4.005307197570801, "learning_rate": 3.9902489399942415e-06, "loss": 0.2284, "step": 213475 }, { "epoch": 2.1, "grad_norm": 3.3084070682525635, "learning_rate": 3.990124817539993e-06, "loss": 0.0917, "step": 213500 }, { "epoch": 2.1, "grad_norm": 4.280498504638672, "learning_rate": 3.990000695085744e-06, "loss": 0.2066, "step": 213525 }, { "epoch": 2.1, "grad_norm": 14.721097946166992, "learning_rate": 3.989876572631496e-06, "loss": 0.1391, "step": 213550 }, { "epoch": 2.1, "grad_norm": 5.187100887298584, "learning_rate": 3.989752450177247e-06, "loss": 0.2543, "step": 213575 }, { "epoch": 2.1, "grad_norm": 10.857362747192383, "learning_rate": 3.989628327722998e-06, "loss": 0.1182, "step": 213600 }, { "epoch": 2.1, "grad_norm": 1.1008762121200562, "learning_rate": 3.98950420526875e-06, "loss": 0.2231, "step": 213625 }, { "epoch": 2.1, "grad_norm": 8.533863067626953, "learning_rate": 3.989380082814502e-06, "loss": 0.1043, "step": 213650 }, { "epoch": 2.1, "grad_norm": 1.2420778274536133, "learning_rate": 3.989255960360253e-06, "loss": 0.1782, "step": 213675 }, { "epoch": 2.1, "grad_norm": 8.843015670776367, "learning_rate": 3.989131837906005e-06, "loss": 0.0815, "step": 213700 }, { "epoch": 2.1, "grad_norm": 2.574751138687134, "learning_rate": 3.989012680349926e-06, "loss": 0.1823, "step": 213725 }, { "epoch": 2.1, "grad_norm": 15.324172973632812, "learning_rate": 3.988888557895678e-06, "loss": 0.1077, "step": 213750 }, { "epoch": 2.1, "grad_norm": 5.584234714508057, "learning_rate": 3.988764435441429e-06, "loss": 0.2205, "step": 213775 }, { "epoch": 2.1, "grad_norm": 12.145620346069336, "learning_rate": 3.988640312987181e-06, "loss": 0.1303, "step": 213800 }, { "epoch": 2.1, "grad_norm": 8.45803165435791, "learning_rate": 3.9885161905329325e-06, "loss": 0.1903, "step": 213825 }, { "epoch": 2.1, "grad_norm": 12.711183547973633, "learning_rate": 3.9883920680786845e-06, "loss": 0.1205, "step": 213850 }, { "epoch": 2.1, "grad_norm": 2.3264176845550537, "learning_rate": 3.988267945624436e-06, "loss": 0.2452, "step": 213875 }, { "epoch": 2.1, "grad_norm": 12.294133186340332, "learning_rate": 3.988143823170187e-06, "loss": 0.1303, "step": 213900 }, { "epoch": 2.1, "grad_norm": 3.738262891769409, "learning_rate": 3.988019700715939e-06, "loss": 0.1758, "step": 213925 }, { "epoch": 2.1, "grad_norm": 11.550732612609863, "learning_rate": 3.98789557826169e-06, "loss": 0.145, "step": 213950 }, { "epoch": 2.1, "grad_norm": 2.4518468379974365, "learning_rate": 3.9877714558074414e-06, "loss": 0.2154, "step": 213975 }, { "epoch": 2.1, "grad_norm": 10.73868465423584, "learning_rate": 3.9876473333531935e-06, "loss": 0.1268, "step": 214000 }, { "epoch": 2.1, "grad_norm": 4.560984134674072, "learning_rate": 3.987523210898945e-06, "loss": 0.2033, "step": 214025 }, { "epoch": 2.1, "grad_norm": 15.286505699157715, "learning_rate": 3.987399088444696e-06, "loss": 0.1205, "step": 214050 }, { "epoch": 2.1, "grad_norm": 9.44581127166748, "learning_rate": 3.987274965990448e-06, "loss": 0.2024, "step": 214075 }, { "epoch": 2.11, "grad_norm": 10.629648208618164, "learning_rate": 3.987150843536199e-06, "loss": 0.1459, "step": 214100 }, { "epoch": 2.11, "grad_norm": 5.958962917327881, "learning_rate": 3.98702672108195e-06, "loss": 0.2322, "step": 214125 }, { "epoch": 2.11, "grad_norm": 12.686208724975586, "learning_rate": 3.9869025986277024e-06, "loss": 0.1085, "step": 214150 }, { "epoch": 2.11, "grad_norm": 8.034875869750977, "learning_rate": 3.986778476173454e-06, "loss": 0.2581, "step": 214175 }, { "epoch": 2.11, "grad_norm": 13.127994537353516, "learning_rate": 3.986654353719206e-06, "loss": 0.0783, "step": 214200 }, { "epoch": 2.11, "grad_norm": 4.041104316711426, "learning_rate": 3.986530231264957e-06, "loss": 0.2131, "step": 214225 }, { "epoch": 2.11, "grad_norm": 9.987712860107422, "learning_rate": 3.986406108810709e-06, "loss": 0.126, "step": 214250 }, { "epoch": 2.11, "grad_norm": 4.227272987365723, "learning_rate": 3.98628198635646e-06, "loss": 0.178, "step": 214275 }, { "epoch": 2.11, "grad_norm": 8.632951736450195, "learning_rate": 3.986157863902212e-06, "loss": 0.106, "step": 214300 }, { "epoch": 2.11, "grad_norm": 5.465086936950684, "learning_rate": 3.9860337414479635e-06, "loss": 0.2204, "step": 214325 }, { "epoch": 2.11, "grad_norm": 17.485925674438477, "learning_rate": 3.985909618993715e-06, "loss": 0.1444, "step": 214350 }, { "epoch": 2.11, "grad_norm": 7.593334674835205, "learning_rate": 3.985785496539467e-06, "loss": 0.203, "step": 214375 }, { "epoch": 2.11, "grad_norm": 9.347421646118164, "learning_rate": 3.985661374085218e-06, "loss": 0.1367, "step": 214400 }, { "epoch": 2.11, "grad_norm": 5.391568660736084, "learning_rate": 3.985537251630969e-06, "loss": 0.2274, "step": 214425 }, { "epoch": 2.11, "grad_norm": 8.109612464904785, "learning_rate": 3.985413129176721e-06, "loss": 0.1062, "step": 214450 }, { "epoch": 2.11, "grad_norm": 1.1940191984176636, "learning_rate": 3.9852890067224724e-06, "loss": 0.1969, "step": 214475 }, { "epoch": 2.11, "grad_norm": 13.037384986877441, "learning_rate": 3.985164884268224e-06, "loss": 0.1375, "step": 214500 }, { "epoch": 2.11, "grad_norm": 7.458321571350098, "learning_rate": 3.985040761813975e-06, "loss": 0.2311, "step": 214525 }, { "epoch": 2.11, "grad_norm": 10.411575317382812, "learning_rate": 3.984916639359727e-06, "loss": 0.1067, "step": 214550 }, { "epoch": 2.11, "grad_norm": 4.646700859069824, "learning_rate": 3.984792516905478e-06, "loss": 0.2236, "step": 214575 }, { "epoch": 2.11, "grad_norm": 7.424493312835693, "learning_rate": 3.98466839445123e-06, "loss": 0.1174, "step": 214600 }, { "epoch": 2.11, "grad_norm": 4.824001789093018, "learning_rate": 3.984544271996981e-06, "loss": 0.2154, "step": 214625 }, { "epoch": 2.11, "grad_norm": 8.878884315490723, "learning_rate": 3.9844201495427335e-06, "loss": 0.1286, "step": 214650 }, { "epoch": 2.11, "grad_norm": 2.59385085105896, "learning_rate": 3.984296027088485e-06, "loss": 0.1643, "step": 214675 }, { "epoch": 2.11, "grad_norm": 8.346040725708008, "learning_rate": 3.984171904634237e-06, "loss": 0.1313, "step": 214700 }, { "epoch": 2.11, "grad_norm": 5.375958442687988, "learning_rate": 3.984047782179988e-06, "loss": 0.2213, "step": 214725 }, { "epoch": 2.11, "grad_norm": 15.337854385375977, "learning_rate": 3.983923659725739e-06, "loss": 0.1437, "step": 214750 }, { "epoch": 2.11, "grad_norm": 4.465531826019287, "learning_rate": 3.983799537271491e-06, "loss": 0.2333, "step": 214775 }, { "epoch": 2.11, "grad_norm": 14.917827606201172, "learning_rate": 3.983675414817242e-06, "loss": 0.0969, "step": 214800 }, { "epoch": 2.11, "grad_norm": 8.732257843017578, "learning_rate": 3.983551292362994e-06, "loss": 0.2413, "step": 214825 }, { "epoch": 2.11, "grad_norm": 11.61935806274414, "learning_rate": 3.983427169908746e-06, "loss": 0.1419, "step": 214850 }, { "epoch": 2.11, "grad_norm": 6.027915000915527, "learning_rate": 3.983303047454497e-06, "loss": 0.1864, "step": 214875 }, { "epoch": 2.11, "grad_norm": 9.957155227661133, "learning_rate": 3.983178925000248e-06, "loss": 0.1407, "step": 214900 }, { "epoch": 2.11, "grad_norm": 4.9760966300964355, "learning_rate": 3.983054802546e-06, "loss": 0.2423, "step": 214925 }, { "epoch": 2.11, "grad_norm": 9.793842315673828, "learning_rate": 3.982930680091751e-06, "loss": 0.1162, "step": 214950 }, { "epoch": 2.11, "grad_norm": 0.3308962881565094, "learning_rate": 3.982806557637503e-06, "loss": 0.1998, "step": 214975 }, { "epoch": 2.11, "grad_norm": 13.04032039642334, "learning_rate": 3.982682435183255e-06, "loss": 0.1394, "step": 215000 }, { "epoch": 2.11, "grad_norm": 6.630911350250244, "learning_rate": 3.982558312729006e-06, "loss": 0.2021, "step": 215025 }, { "epoch": 2.11, "grad_norm": 7.282685279846191, "learning_rate": 3.982434190274758e-06, "loss": 0.1327, "step": 215050 }, { "epoch": 2.11, "grad_norm": 4.744588851928711, "learning_rate": 3.982310067820509e-06, "loss": 0.1937, "step": 215075 }, { "epoch": 2.11, "grad_norm": 13.508674621582031, "learning_rate": 3.982185945366261e-06, "loss": 0.1737, "step": 215100 }, { "epoch": 2.12, "grad_norm": 3.0566368103027344, "learning_rate": 3.982061822912012e-06, "loss": 0.224, "step": 215125 }, { "epoch": 2.12, "grad_norm": 13.419532775878906, "learning_rate": 3.9819377004577645e-06, "loss": 0.0901, "step": 215150 }, { "epoch": 2.12, "grad_norm": 5.324384689331055, "learning_rate": 3.981813578003516e-06, "loss": 0.2076, "step": 215175 }, { "epoch": 2.12, "grad_norm": 13.503429412841797, "learning_rate": 3.981689455549267e-06, "loss": 0.1218, "step": 215200 }, { "epoch": 2.12, "grad_norm": 4.083441257476807, "learning_rate": 3.981565333095019e-06, "loss": 0.1983, "step": 215225 }, { "epoch": 2.12, "grad_norm": 7.142414093017578, "learning_rate": 3.98144121064077e-06, "loss": 0.1158, "step": 215250 }, { "epoch": 2.12, "grad_norm": 5.570886611938477, "learning_rate": 3.981317088186521e-06, "loss": 0.2125, "step": 215275 }, { "epoch": 2.12, "grad_norm": 9.649677276611328, "learning_rate": 3.981192965732273e-06, "loss": 0.1277, "step": 215300 }, { "epoch": 2.12, "grad_norm": 5.812182903289795, "learning_rate": 3.981068843278025e-06, "loss": 0.182, "step": 215325 }, { "epoch": 2.12, "grad_norm": 9.25173568725586, "learning_rate": 3.980944720823776e-06, "loss": 0.1324, "step": 215350 }, { "epoch": 2.12, "grad_norm": 7.1704182624816895, "learning_rate": 3.980820598369528e-06, "loss": 0.2733, "step": 215375 }, { "epoch": 2.12, "grad_norm": 11.172025680541992, "learning_rate": 3.980696475915279e-06, "loss": 0.1395, "step": 215400 }, { "epoch": 2.12, "grad_norm": 8.953158378601074, "learning_rate": 3.980572353461031e-06, "loss": 0.2257, "step": 215425 }, { "epoch": 2.12, "grad_norm": 14.587264060974121, "learning_rate": 3.980448231006782e-06, "loss": 0.1435, "step": 215450 }, { "epoch": 2.12, "grad_norm": 3.5030384063720703, "learning_rate": 3.9803241085525344e-06, "loss": 0.194, "step": 215475 }, { "epoch": 2.12, "grad_norm": 16.57521629333496, "learning_rate": 3.980199986098286e-06, "loss": 0.1334, "step": 215500 }, { "epoch": 2.12, "grad_norm": 3.574431896209717, "learning_rate": 3.980075863644037e-06, "loss": 0.2272, "step": 215525 }, { "epoch": 2.12, "grad_norm": 11.560467720031738, "learning_rate": 3.979951741189789e-06, "loss": 0.116, "step": 215550 }, { "epoch": 2.12, "grad_norm": 1.8557955026626587, "learning_rate": 3.97982761873554e-06, "loss": 0.1947, "step": 215575 }, { "epoch": 2.12, "grad_norm": 11.89069652557373, "learning_rate": 3.979703496281291e-06, "loss": 0.1258, "step": 215600 }, { "epoch": 2.12, "grad_norm": 5.262467861175537, "learning_rate": 3.979579373827043e-06, "loss": 0.178, "step": 215625 }, { "epoch": 2.12, "grad_norm": 8.427450180053711, "learning_rate": 3.979455251372795e-06, "loss": 0.1376, "step": 215650 }, { "epoch": 2.12, "grad_norm": 4.6440887451171875, "learning_rate": 3.979331128918546e-06, "loss": 0.207, "step": 215675 }, { "epoch": 2.12, "grad_norm": 11.876821517944336, "learning_rate": 3.979207006464298e-06, "loss": 0.1158, "step": 215700 }, { "epoch": 2.12, "grad_norm": 7.881806373596191, "learning_rate": 3.979082884010049e-06, "loss": 0.1891, "step": 215725 }, { "epoch": 2.12, "grad_norm": 5.723994255065918, "learning_rate": 3.9789587615558e-06, "loss": 0.1219, "step": 215750 }, { "epoch": 2.12, "grad_norm": 1.056012511253357, "learning_rate": 3.978839603999722e-06, "loss": 0.2247, "step": 215775 }, { "epoch": 2.12, "grad_norm": 7.112066745758057, "learning_rate": 3.978715481545473e-06, "loss": 0.1272, "step": 215800 }, { "epoch": 2.12, "grad_norm": 7.952840328216553, "learning_rate": 3.9785913590912255e-06, "loss": 0.189, "step": 215825 }, { "epoch": 2.12, "grad_norm": 4.449411869049072, "learning_rate": 3.978467236636977e-06, "loss": 0.0945, "step": 215850 }, { "epoch": 2.12, "grad_norm": 5.126324653625488, "learning_rate": 3.978343114182729e-06, "loss": 0.2263, "step": 215875 }, { "epoch": 2.12, "grad_norm": 11.28897762298584, "learning_rate": 3.97821899172848e-06, "loss": 0.1445, "step": 215900 }, { "epoch": 2.12, "grad_norm": 2.8373472690582275, "learning_rate": 3.978094869274232e-06, "loss": 0.1971, "step": 215925 }, { "epoch": 2.12, "grad_norm": 14.305767059326172, "learning_rate": 3.977970746819983e-06, "loss": 0.1096, "step": 215950 }, { "epoch": 2.12, "grad_norm": 4.138350486755371, "learning_rate": 3.977846624365734e-06, "loss": 0.2316, "step": 215975 }, { "epoch": 2.12, "grad_norm": 9.868815422058105, "learning_rate": 3.9777225019114865e-06, "loss": 0.1182, "step": 216000 }, { "epoch": 2.12, "grad_norm": 2.0251474380493164, "learning_rate": 3.977598379457238e-06, "loss": 0.1733, "step": 216025 }, { "epoch": 2.12, "grad_norm": 10.652077674865723, "learning_rate": 3.977474257002989e-06, "loss": 0.1424, "step": 216050 }, { "epoch": 2.12, "grad_norm": 6.374371528625488, "learning_rate": 3.977350134548741e-06, "loss": 0.1735, "step": 216075 }, { "epoch": 2.12, "grad_norm": 21.198287963867188, "learning_rate": 3.977226012094492e-06, "loss": 0.131, "step": 216100 }, { "epoch": 2.12, "grad_norm": 4.438529968261719, "learning_rate": 3.977101889640243e-06, "loss": 0.2571, "step": 216125 }, { "epoch": 2.13, "grad_norm": 12.597207069396973, "learning_rate": 3.9769777671859954e-06, "loss": 0.1526, "step": 216150 }, { "epoch": 2.13, "grad_norm": 4.2657904624938965, "learning_rate": 3.976853644731747e-06, "loss": 0.1971, "step": 216175 }, { "epoch": 2.13, "grad_norm": 13.255030632019043, "learning_rate": 3.976729522277498e-06, "loss": 0.1075, "step": 216200 }, { "epoch": 2.13, "grad_norm": 4.339363098144531, "learning_rate": 3.97660539982325e-06, "loss": 0.1711, "step": 216225 }, { "epoch": 2.13, "grad_norm": 10.669394493103027, "learning_rate": 3.976481277369001e-06, "loss": 0.1049, "step": 216250 }, { "epoch": 2.13, "grad_norm": 2.3290820121765137, "learning_rate": 3.976357154914753e-06, "loss": 0.1635, "step": 216275 }, { "epoch": 2.13, "grad_norm": 8.498069763183594, "learning_rate": 3.976233032460504e-06, "loss": 0.1279, "step": 216300 }, { "epoch": 2.13, "grad_norm": 4.5797505378723145, "learning_rate": 3.9761089100062565e-06, "loss": 0.2259, "step": 216325 }, { "epoch": 2.13, "grad_norm": 7.9545207023620605, "learning_rate": 3.975984787552008e-06, "loss": 0.1268, "step": 216350 }, { "epoch": 2.13, "grad_norm": 5.396627902984619, "learning_rate": 3.97586066509776e-06, "loss": 0.2202, "step": 216375 }, { "epoch": 2.13, "grad_norm": 7.9398298263549805, "learning_rate": 3.975736542643511e-06, "loss": 0.1545, "step": 216400 }, { "epoch": 2.13, "grad_norm": 5.3979411125183105, "learning_rate": 3.975612420189262e-06, "loss": 0.2093, "step": 216425 }, { "epoch": 2.13, "grad_norm": 16.289939880371094, "learning_rate": 3.975488297735014e-06, "loss": 0.1047, "step": 216450 }, { "epoch": 2.13, "grad_norm": 13.412553787231445, "learning_rate": 3.975364175280765e-06, "loss": 0.2223, "step": 216475 }, { "epoch": 2.13, "grad_norm": 8.315796852111816, "learning_rate": 3.975240052826517e-06, "loss": 0.1153, "step": 216500 }, { "epoch": 2.13, "grad_norm": 9.317569732666016, "learning_rate": 3.975115930372269e-06, "loss": 0.2102, "step": 216525 }, { "epoch": 2.13, "grad_norm": 15.431571006774902, "learning_rate": 3.97499180791802e-06, "loss": 0.1124, "step": 216550 }, { "epoch": 2.13, "grad_norm": 27.240436553955078, "learning_rate": 3.974867685463771e-06, "loss": 0.1573, "step": 216575 }, { "epoch": 2.13, "grad_norm": 8.405509948730469, "learning_rate": 3.974743563009523e-06, "loss": 0.0965, "step": 216600 }, { "epoch": 2.13, "grad_norm": 4.5003886222839355, "learning_rate": 3.974619440555274e-06, "loss": 0.2131, "step": 216625 }, { "epoch": 2.13, "grad_norm": 11.582817077636719, "learning_rate": 3.974495318101026e-06, "loss": 0.1334, "step": 216650 }, { "epoch": 2.13, "grad_norm": 8.40120792388916, "learning_rate": 3.974371195646778e-06, "loss": 0.2278, "step": 216675 }, { "epoch": 2.13, "grad_norm": 6.4036126136779785, "learning_rate": 3.974247073192529e-06, "loss": 0.0958, "step": 216700 }, { "epoch": 2.13, "grad_norm": 2.488459825515747, "learning_rate": 3.974122950738281e-06, "loss": 0.224, "step": 216725 }, { "epoch": 2.13, "grad_norm": 11.359360694885254, "learning_rate": 3.973998828284032e-06, "loss": 0.1428, "step": 216750 }, { "epoch": 2.13, "grad_norm": 4.997556209564209, "learning_rate": 3.973874705829784e-06, "loss": 0.1841, "step": 216775 }, { "epoch": 2.13, "grad_norm": 12.50033187866211, "learning_rate": 3.973750583375535e-06, "loss": 0.1272, "step": 216800 }, { "epoch": 2.13, "grad_norm": 2.6896753311157227, "learning_rate": 3.973626460921287e-06, "loss": 0.2269, "step": 216825 }, { "epoch": 2.13, "grad_norm": 12.521383285522461, "learning_rate": 3.973502338467039e-06, "loss": 0.1018, "step": 216850 }, { "epoch": 2.13, "grad_norm": 1.4233627319335938, "learning_rate": 3.97337821601279e-06, "loss": 0.1895, "step": 216875 }, { "epoch": 2.13, "grad_norm": 8.806678771972656, "learning_rate": 3.973254093558541e-06, "loss": 0.1392, "step": 216900 }, { "epoch": 2.13, "grad_norm": 2.9724643230438232, "learning_rate": 3.973129971104293e-06, "loss": 0.2068, "step": 216925 }, { "epoch": 2.13, "grad_norm": 6.652657508850098, "learning_rate": 3.973005848650044e-06, "loss": 0.1229, "step": 216950 }, { "epoch": 2.13, "grad_norm": 6.064100742340088, "learning_rate": 3.972881726195796e-06, "loss": 0.1984, "step": 216975 }, { "epoch": 2.13, "grad_norm": 13.22167682647705, "learning_rate": 3.972757603741548e-06, "loss": 0.1147, "step": 217000 }, { "epoch": 2.13, "grad_norm": 4.5795979499816895, "learning_rate": 3.972633481287299e-06, "loss": 0.1871, "step": 217025 }, { "epoch": 2.13, "grad_norm": 14.662748336791992, "learning_rate": 3.97250935883305e-06, "loss": 0.1538, "step": 217050 }, { "epoch": 2.13, "grad_norm": 2.279067039489746, "learning_rate": 3.972385236378802e-06, "loss": 0.2328, "step": 217075 }, { "epoch": 2.13, "grad_norm": 17.230451583862305, "learning_rate": 3.972261113924553e-06, "loss": 0.1067, "step": 217100 }, { "epoch": 2.13, "grad_norm": 7.659762859344482, "learning_rate": 3.972136991470305e-06, "loss": 0.2647, "step": 217125 }, { "epoch": 2.14, "grad_norm": 9.71596622467041, "learning_rate": 3.972012869016057e-06, "loss": 0.1438, "step": 217150 }, { "epoch": 2.14, "grad_norm": 5.9744954109191895, "learning_rate": 3.971888746561809e-06, "loss": 0.1759, "step": 217175 }, { "epoch": 2.14, "grad_norm": 11.166077613830566, "learning_rate": 3.97176462410756e-06, "loss": 0.0961, "step": 217200 }, { "epoch": 2.14, "grad_norm": 3.1792802810668945, "learning_rate": 3.971640501653312e-06, "loss": 0.1855, "step": 217225 }, { "epoch": 2.14, "grad_norm": 11.83922004699707, "learning_rate": 3.971516379199063e-06, "loss": 0.1082, "step": 217250 }, { "epoch": 2.14, "grad_norm": 1.073854923248291, "learning_rate": 3.971392256744814e-06, "loss": 0.2053, "step": 217275 }, { "epoch": 2.14, "grad_norm": 6.641141891479492, "learning_rate": 3.971268134290566e-06, "loss": 0.1486, "step": 217300 }, { "epoch": 2.14, "grad_norm": 5.01456356048584, "learning_rate": 3.971144011836318e-06, "loss": 0.1818, "step": 217325 }, { "epoch": 2.14, "grad_norm": 11.652692794799805, "learning_rate": 3.971019889382069e-06, "loss": 0.115, "step": 217350 }, { "epoch": 2.14, "grad_norm": 1.7837942838668823, "learning_rate": 3.970895766927821e-06, "loss": 0.2018, "step": 217375 }, { "epoch": 2.14, "grad_norm": 8.154664993286133, "learning_rate": 3.970771644473572e-06, "loss": 0.1056, "step": 217400 }, { "epoch": 2.14, "grad_norm": 0.08730854839086533, "learning_rate": 3.970647522019323e-06, "loss": 0.193, "step": 217425 }, { "epoch": 2.14, "grad_norm": 22.543289184570312, "learning_rate": 3.970523399565075e-06, "loss": 0.1067, "step": 217450 }, { "epoch": 2.14, "grad_norm": 5.511642932891846, "learning_rate": 3.970399277110827e-06, "loss": 0.1971, "step": 217475 }, { "epoch": 2.14, "grad_norm": 13.051043510437012, "learning_rate": 3.970275154656578e-06, "loss": 0.1347, "step": 217500 }, { "epoch": 2.14, "grad_norm": 6.729798793792725, "learning_rate": 3.97015103220233e-06, "loss": 0.2019, "step": 217525 }, { "epoch": 2.14, "grad_norm": 11.820609092712402, "learning_rate": 3.970026909748081e-06, "loss": 0.1098, "step": 217550 }, { "epoch": 2.14, "grad_norm": 5.607062816619873, "learning_rate": 3.969902787293833e-06, "loss": 0.2144, "step": 217575 }, { "epoch": 2.14, "grad_norm": 8.08608341217041, "learning_rate": 3.969778664839584e-06, "loss": 0.1505, "step": 217600 }, { "epoch": 2.14, "grad_norm": 0.6783499717712402, "learning_rate": 3.969654542385336e-06, "loss": 0.2001, "step": 217625 }, { "epoch": 2.14, "grad_norm": 13.247627258300781, "learning_rate": 3.969530419931088e-06, "loss": 0.141, "step": 217650 }, { "epoch": 2.14, "grad_norm": 4.155354022979736, "learning_rate": 3.969406297476839e-06, "loss": 0.2053, "step": 217675 }, { "epoch": 2.14, "grad_norm": 11.341318130493164, "learning_rate": 3.969282175022591e-06, "loss": 0.141, "step": 217700 }, { "epoch": 2.14, "grad_norm": 3.3340585231781006, "learning_rate": 3.969158052568342e-06, "loss": 0.2189, "step": 217725 }, { "epoch": 2.14, "grad_norm": 9.779045104980469, "learning_rate": 3.969033930114093e-06, "loss": 0.1322, "step": 217750 }, { "epoch": 2.14, "grad_norm": 1.5488557815551758, "learning_rate": 3.968909807659845e-06, "loss": 0.1908, "step": 217775 }, { "epoch": 2.14, "grad_norm": 14.182503700256348, "learning_rate": 3.9687856852055966e-06, "loss": 0.1562, "step": 217800 }, { "epoch": 2.14, "grad_norm": 2.864748001098633, "learning_rate": 3.968661562751348e-06, "loss": 0.2684, "step": 217825 }, { "epoch": 2.14, "grad_norm": 9.098125457763672, "learning_rate": 3.9685374402971e-06, "loss": 0.132, "step": 217850 }, { "epoch": 2.14, "grad_norm": 2.76431941986084, "learning_rate": 3.968413317842851e-06, "loss": 0.232, "step": 217875 }, { "epoch": 2.14, "grad_norm": 11.285857200622559, "learning_rate": 3.968289195388603e-06, "loss": 0.0969, "step": 217900 }, { "epoch": 2.14, "grad_norm": 5.584897041320801, "learning_rate": 3.968165072934354e-06, "loss": 0.1702, "step": 217925 }, { "epoch": 2.14, "grad_norm": 8.574405670166016, "learning_rate": 3.968040950480106e-06, "loss": 0.1456, "step": 217950 }, { "epoch": 2.14, "grad_norm": 2.593899965286255, "learning_rate": 3.967921792924027e-06, "loss": 0.2733, "step": 217975 }, { "epoch": 2.14, "grad_norm": 24.681995391845703, "learning_rate": 3.967797670469779e-06, "loss": 0.1273, "step": 218000 }, { "epoch": 2.14, "grad_norm": 1.5233864784240723, "learning_rate": 3.967673548015531e-06, "loss": 0.1991, "step": 218025 }, { "epoch": 2.14, "grad_norm": 11.209391593933105, "learning_rate": 3.967549425561282e-06, "loss": 0.0888, "step": 218050 }, { "epoch": 2.14, "grad_norm": 8.227201461791992, "learning_rate": 3.967425303107034e-06, "loss": 0.1912, "step": 218075 }, { "epoch": 2.14, "grad_norm": 5.782423973083496, "learning_rate": 3.967301180652785e-06, "loss": 0.1222, "step": 218100 }, { "epoch": 2.14, "grad_norm": 5.545090198516846, "learning_rate": 3.967177058198537e-06, "loss": 0.2048, "step": 218125 }, { "epoch": 2.14, "grad_norm": 8.195245742797852, "learning_rate": 3.9670529357442884e-06, "loss": 0.0965, "step": 218150 }, { "epoch": 2.15, "grad_norm": 2.3525593280792236, "learning_rate": 3.96692881329004e-06, "loss": 0.1751, "step": 218175 }, { "epoch": 2.15, "grad_norm": 11.47995662689209, "learning_rate": 3.966804690835791e-06, "loss": 0.1124, "step": 218200 }, { "epoch": 2.15, "grad_norm": 4.388243198394775, "learning_rate": 3.966680568381543e-06, "loss": 0.2323, "step": 218225 }, { "epoch": 2.15, "grad_norm": 7.35845947265625, "learning_rate": 3.966556445927294e-06, "loss": 0.1228, "step": 218250 }, { "epoch": 2.15, "grad_norm": 1.9449645280838013, "learning_rate": 3.966432323473045e-06, "loss": 0.2208, "step": 218275 }, { "epoch": 2.15, "grad_norm": 12.93636417388916, "learning_rate": 3.966308201018797e-06, "loss": 0.1523, "step": 218300 }, { "epoch": 2.15, "grad_norm": 5.151219367980957, "learning_rate": 3.966184078564549e-06, "loss": 0.2694, "step": 218325 }, { "epoch": 2.15, "grad_norm": 14.81828498840332, "learning_rate": 3.966059956110301e-06, "loss": 0.0989, "step": 218350 }, { "epoch": 2.15, "grad_norm": 2.2940080165863037, "learning_rate": 3.965935833656052e-06, "loss": 0.198, "step": 218375 }, { "epoch": 2.15, "grad_norm": 10.206365585327148, "learning_rate": 3.965811711201804e-06, "loss": 0.1052, "step": 218400 }, { "epoch": 2.15, "grad_norm": 5.686875820159912, "learning_rate": 3.965687588747555e-06, "loss": 0.2312, "step": 218425 }, { "epoch": 2.15, "grad_norm": 10.079679489135742, "learning_rate": 3.965563466293307e-06, "loss": 0.1273, "step": 218450 }, { "epoch": 2.15, "grad_norm": 4.477410793304443, "learning_rate": 3.965439343839058e-06, "loss": 0.2175, "step": 218475 }, { "epoch": 2.15, "grad_norm": 7.821643829345703, "learning_rate": 3.96531522138481e-06, "loss": 0.119, "step": 218500 }, { "epoch": 2.15, "grad_norm": 2.26298451423645, "learning_rate": 3.965191098930562e-06, "loss": 0.1789, "step": 218525 }, { "epoch": 2.15, "grad_norm": 9.433934211730957, "learning_rate": 3.965066976476313e-06, "loss": 0.0894, "step": 218550 }, { "epoch": 2.15, "grad_norm": 4.024695873260498, "learning_rate": 3.964942854022064e-06, "loss": 0.2674, "step": 218575 }, { "epoch": 2.15, "grad_norm": 7.115659713745117, "learning_rate": 3.964818731567816e-06, "loss": 0.1469, "step": 218600 }, { "epoch": 2.15, "grad_norm": 4.19707727432251, "learning_rate": 3.964694609113567e-06, "loss": 0.2226, "step": 218625 }, { "epoch": 2.15, "grad_norm": 9.405281066894531, "learning_rate": 3.964570486659319e-06, "loss": 0.1208, "step": 218650 }, { "epoch": 2.15, "grad_norm": 4.714053153991699, "learning_rate": 3.964446364205071e-06, "loss": 0.2135, "step": 218675 }, { "epoch": 2.15, "grad_norm": 17.456157684326172, "learning_rate": 3.964322241750822e-06, "loss": 0.1742, "step": 218700 }, { "epoch": 2.15, "grad_norm": 3.761873960494995, "learning_rate": 3.964198119296573e-06, "loss": 0.2407, "step": 218725 }, { "epoch": 2.15, "grad_norm": 9.467486381530762, "learning_rate": 3.964073996842325e-06, "loss": 0.1373, "step": 218750 }, { "epoch": 2.15, "grad_norm": 2.1880886554718018, "learning_rate": 3.963949874388076e-06, "loss": 0.2551, "step": 218775 }, { "epoch": 2.15, "grad_norm": 6.120116710662842, "learning_rate": 3.963825751933828e-06, "loss": 0.128, "step": 218800 }, { "epoch": 2.15, "grad_norm": 9.04922103881836, "learning_rate": 3.96370162947958e-06, "loss": 0.2035, "step": 218825 }, { "epoch": 2.15, "grad_norm": 8.615633964538574, "learning_rate": 3.963577507025332e-06, "loss": 0.1315, "step": 218850 }, { "epoch": 2.15, "grad_norm": 7.460079193115234, "learning_rate": 3.963453384571083e-06, "loss": 0.2196, "step": 218875 }, { "epoch": 2.15, "grad_norm": 8.779266357421875, "learning_rate": 3.963329262116835e-06, "loss": 0.1286, "step": 218900 }, { "epoch": 2.15, "grad_norm": 6.384383678436279, "learning_rate": 3.963205139662586e-06, "loss": 0.1887, "step": 218925 }, { "epoch": 2.15, "grad_norm": 11.428749084472656, "learning_rate": 3.963081017208337e-06, "loss": 0.0931, "step": 218950 }, { "epoch": 2.15, "grad_norm": 3.8403544425964355, "learning_rate": 3.962956894754089e-06, "loss": 0.2127, "step": 218975 }, { "epoch": 2.15, "grad_norm": 8.040103912353516, "learning_rate": 3.962832772299841e-06, "loss": 0.127, "step": 219000 }, { "epoch": 2.15, "grad_norm": 4.657713413238525, "learning_rate": 3.962708649845592e-06, "loss": 0.2104, "step": 219025 }, { "epoch": 2.15, "grad_norm": 13.103795051574707, "learning_rate": 3.962584527391344e-06, "loss": 0.1172, "step": 219050 }, { "epoch": 2.15, "grad_norm": 6.276782035827637, "learning_rate": 3.962460404937095e-06, "loss": 0.2278, "step": 219075 }, { "epoch": 2.15, "grad_norm": 17.052642822265625, "learning_rate": 3.962336282482846e-06, "loss": 0.1454, "step": 219100 }, { "epoch": 2.15, "grad_norm": 5.136257171630859, "learning_rate": 3.9622121600285975e-06, "loss": 0.2018, "step": 219125 }, { "epoch": 2.15, "grad_norm": 17.13450813293457, "learning_rate": 3.96208803757435e-06, "loss": 0.1251, "step": 219150 }, { "epoch": 2.15, "grad_norm": 15.225081443786621, "learning_rate": 3.961963915120101e-06, "loss": 0.1976, "step": 219175 }, { "epoch": 2.16, "grad_norm": 4.021183490753174, "learning_rate": 3.961839792665853e-06, "loss": 0.1394, "step": 219200 }, { "epoch": 2.16, "grad_norm": 5.370628833770752, "learning_rate": 3.961715670211604e-06, "loss": 0.1941, "step": 219225 }, { "epoch": 2.16, "grad_norm": 6.408087253570557, "learning_rate": 3.961591547757356e-06, "loss": 0.1198, "step": 219250 }, { "epoch": 2.16, "grad_norm": 3.633277177810669, "learning_rate": 3.961467425303107e-06, "loss": 0.2154, "step": 219275 }, { "epoch": 2.16, "grad_norm": 6.593579292297363, "learning_rate": 3.961343302848859e-06, "loss": 0.105, "step": 219300 }, { "epoch": 2.16, "grad_norm": 2.607701063156128, "learning_rate": 3.961219180394611e-06, "loss": 0.2416, "step": 219325 }, { "epoch": 2.16, "grad_norm": 14.220370292663574, "learning_rate": 3.961095057940362e-06, "loss": 0.1111, "step": 219350 }, { "epoch": 2.16, "grad_norm": 4.160770416259766, "learning_rate": 3.960970935486114e-06, "loss": 0.222, "step": 219375 }, { "epoch": 2.16, "grad_norm": 12.6060152053833, "learning_rate": 3.960846813031865e-06, "loss": 0.1106, "step": 219400 }, { "epoch": 2.16, "grad_norm": 8.716622352600098, "learning_rate": 3.960722690577616e-06, "loss": 0.2336, "step": 219425 }, { "epoch": 2.16, "grad_norm": 14.406607627868652, "learning_rate": 3.960598568123368e-06, "loss": 0.1322, "step": 219450 }, { "epoch": 2.16, "grad_norm": 3.778564929962158, "learning_rate": 3.96047444566912e-06, "loss": 0.256, "step": 219475 }, { "epoch": 2.16, "grad_norm": 14.164894104003906, "learning_rate": 3.960350323214871e-06, "loss": 0.1431, "step": 219500 }, { "epoch": 2.16, "grad_norm": 3.5619935989379883, "learning_rate": 3.960226200760623e-06, "loss": 0.2546, "step": 219525 }, { "epoch": 2.16, "grad_norm": 7.3019490242004395, "learning_rate": 3.960102078306374e-06, "loss": 0.1101, "step": 219550 }, { "epoch": 2.16, "grad_norm": 4.83547306060791, "learning_rate": 3.959977955852125e-06, "loss": 0.2252, "step": 219575 }, { "epoch": 2.16, "grad_norm": 5.704540252685547, "learning_rate": 3.959853833397877e-06, "loss": 0.1195, "step": 219600 }, { "epoch": 2.16, "grad_norm": 0.9188774824142456, "learning_rate": 3.9597297109436285e-06, "loss": 0.1993, "step": 219625 }, { "epoch": 2.16, "grad_norm": 14.139154434204102, "learning_rate": 3.959605588489381e-06, "loss": 0.1393, "step": 219650 }, { "epoch": 2.16, "grad_norm": 3.907034158706665, "learning_rate": 3.959481466035132e-06, "loss": 0.1691, "step": 219675 }, { "epoch": 2.16, "grad_norm": 16.186552047729492, "learning_rate": 3.959357343580884e-06, "loss": 0.1298, "step": 219700 }, { "epoch": 2.16, "grad_norm": 6.410519599914551, "learning_rate": 3.959233221126635e-06, "loss": 0.1763, "step": 219725 }, { "epoch": 2.16, "grad_norm": 13.076272964477539, "learning_rate": 3.959109098672387e-06, "loss": 0.1488, "step": 219750 }, { "epoch": 2.16, "grad_norm": 5.110283374786377, "learning_rate": 3.958984976218138e-06, "loss": 0.2106, "step": 219775 }, { "epoch": 2.16, "grad_norm": 15.709227561950684, "learning_rate": 3.9588608537638896e-06, "loss": 0.1424, "step": 219800 }, { "epoch": 2.16, "grad_norm": 5.243505001068115, "learning_rate": 3.958736731309642e-06, "loss": 0.2103, "step": 219825 }, { "epoch": 2.16, "grad_norm": 10.553110122680664, "learning_rate": 3.958612608855393e-06, "loss": 0.1219, "step": 219850 }, { "epoch": 2.16, "grad_norm": 3.100353479385376, "learning_rate": 3.958488486401144e-06, "loss": 0.2259, "step": 219875 }, { "epoch": 2.16, "grad_norm": 10.178496360778809, "learning_rate": 3.958364363946896e-06, "loss": 0.0993, "step": 219900 }, { "epoch": 2.16, "grad_norm": 3.9287586212158203, "learning_rate": 3.958240241492647e-06, "loss": 0.1907, "step": 219925 }, { "epoch": 2.16, "grad_norm": 10.53648853302002, "learning_rate": 3.9581161190383985e-06, "loss": 0.1168, "step": 219950 }, { "epoch": 2.16, "grad_norm": 3.766157865524292, "learning_rate": 3.95799696148232e-06, "loss": 0.1969, "step": 219975 }, { "epoch": 2.16, "grad_norm": 11.103384971618652, "learning_rate": 3.957872839028072e-06, "loss": 0.1188, "step": 220000 }, { "epoch": 2.16, "eval_loss": 0.6359104514122009, "eval_runtime": 6059.6215, "eval_samples_per_second": 1.562, "eval_steps_per_second": 0.195, "eval_wer": 0.1265048606193266, "step": 220000 }, { "epoch": 2.16, "grad_norm": 6.83207368850708, "learning_rate": 3.957748716573823e-06, "loss": 0.1987, "step": 220025 }, { "epoch": 2.16, "grad_norm": 12.775317192077637, "learning_rate": 3.957624594119575e-06, "loss": 0.1262, "step": 220050 }, { "epoch": 2.16, "grad_norm": 5.040691375732422, "learning_rate": 3.957500471665326e-06, "loss": 0.2222, "step": 220075 }, { "epoch": 2.16, "grad_norm": 8.668159484863281, "learning_rate": 3.957376349211078e-06, "loss": 0.1548, "step": 220100 }, { "epoch": 2.16, "grad_norm": 5.755124092102051, "learning_rate": 3.957252226756829e-06, "loss": 0.1963, "step": 220125 }, { "epoch": 2.16, "grad_norm": 6.764210224151611, "learning_rate": 3.957128104302581e-06, "loss": 0.0977, "step": 220150 }, { "epoch": 2.16, "grad_norm": 3.16795015335083, "learning_rate": 3.957003981848333e-06, "loss": 0.1822, "step": 220175 }, { "epoch": 2.17, "grad_norm": 12.041747093200684, "learning_rate": 3.956879859394085e-06, "loss": 0.1172, "step": 220200 }, { "epoch": 2.17, "grad_norm": 8.610763549804688, "learning_rate": 3.956755736939836e-06, "loss": 0.1901, "step": 220225 }, { "epoch": 2.17, "grad_norm": 9.651317596435547, "learning_rate": 3.956631614485587e-06, "loss": 0.1316, "step": 220250 }, { "epoch": 2.17, "grad_norm": 8.362360000610352, "learning_rate": 3.956507492031339e-06, "loss": 0.2033, "step": 220275 }, { "epoch": 2.17, "grad_norm": 14.850129127502441, "learning_rate": 3.95638336957709e-06, "loss": 0.1218, "step": 220300 }, { "epoch": 2.17, "grad_norm": 3.6008570194244385, "learning_rate": 3.956259247122842e-06, "loss": 0.2203, "step": 220325 }, { "epoch": 2.17, "grad_norm": 7.622532367706299, "learning_rate": 3.956135124668594e-06, "loss": 0.1432, "step": 220350 }, { "epoch": 2.17, "grad_norm": 5.139064311981201, "learning_rate": 3.956011002214345e-06, "loss": 0.2099, "step": 220375 }, { "epoch": 2.17, "grad_norm": 9.547128677368164, "learning_rate": 3.955886879760096e-06, "loss": 0.1248, "step": 220400 }, { "epoch": 2.17, "grad_norm": 10.56515884399414, "learning_rate": 3.955762757305848e-06, "loss": 0.2239, "step": 220425 }, { "epoch": 2.17, "grad_norm": 12.912755012512207, "learning_rate": 3.955638634851599e-06, "loss": 0.1379, "step": 220450 }, { "epoch": 2.17, "grad_norm": 5.506654262542725, "learning_rate": 3.9555145123973506e-06, "loss": 0.2278, "step": 220475 }, { "epoch": 2.17, "grad_norm": 10.748215675354004, "learning_rate": 3.955390389943103e-06, "loss": 0.1142, "step": 220500 }, { "epoch": 2.17, "grad_norm": 7.6887125968933105, "learning_rate": 3.955266267488854e-06, "loss": 0.2319, "step": 220525 }, { "epoch": 2.17, "grad_norm": 7.4528608322143555, "learning_rate": 3.955142145034606e-06, "loss": 0.147, "step": 220550 }, { "epoch": 2.17, "grad_norm": 5.212543964385986, "learning_rate": 3.955018022580357e-06, "loss": 0.1878, "step": 220575 }, { "epoch": 2.17, "grad_norm": 9.55757999420166, "learning_rate": 3.954893900126109e-06, "loss": 0.116, "step": 220600 }, { "epoch": 2.17, "grad_norm": 3.150688409805298, "learning_rate": 3.95476977767186e-06, "loss": 0.2313, "step": 220625 }, { "epoch": 2.17, "grad_norm": 10.302506446838379, "learning_rate": 3.954645655217612e-06, "loss": 0.1243, "step": 220650 }, { "epoch": 2.17, "grad_norm": 6.5859270095825195, "learning_rate": 3.954521532763364e-06, "loss": 0.2252, "step": 220675 }, { "epoch": 2.17, "grad_norm": 4.929361343383789, "learning_rate": 3.954397410309115e-06, "loss": 0.1177, "step": 220700 }, { "epoch": 2.17, "grad_norm": 7.39597225189209, "learning_rate": 3.954273287854866e-06, "loss": 0.2312, "step": 220725 }, { "epoch": 2.17, "grad_norm": 10.5133695602417, "learning_rate": 3.954149165400618e-06, "loss": 0.1228, "step": 220750 }, { "epoch": 2.17, "grad_norm": 7.4379143714904785, "learning_rate": 3.954025042946369e-06, "loss": 0.1897, "step": 220775 }, { "epoch": 2.17, "grad_norm": 21.59010124206543, "learning_rate": 3.9539009204921205e-06, "loss": 0.1375, "step": 220800 }, { "epoch": 2.17, "grad_norm": 8.193257331848145, "learning_rate": 3.953776798037873e-06, "loss": 0.2281, "step": 220825 }, { "epoch": 2.17, "grad_norm": 11.468182563781738, "learning_rate": 3.953652675583624e-06, "loss": 0.106, "step": 220850 }, { "epoch": 2.17, "grad_norm": 5.391139507293701, "learning_rate": 3.953528553129375e-06, "loss": 0.2005, "step": 220875 }, { "epoch": 2.17, "grad_norm": 13.90733528137207, "learning_rate": 3.953404430675127e-06, "loss": 0.0858, "step": 220900 }, { "epoch": 2.17, "grad_norm": 2.753443956375122, "learning_rate": 3.953280308220878e-06, "loss": 0.1588, "step": 220925 }, { "epoch": 2.17, "grad_norm": 9.851441383361816, "learning_rate": 3.95315618576663e-06, "loss": 0.1158, "step": 220950 }, { "epoch": 2.17, "grad_norm": 0.037860896438360214, "learning_rate": 3.9530320633123816e-06, "loss": 0.2287, "step": 220975 }, { "epoch": 2.17, "grad_norm": 9.537018775939941, "learning_rate": 3.952907940858134e-06, "loss": 0.1583, "step": 221000 }, { "epoch": 2.17, "grad_norm": 8.422871589660645, "learning_rate": 3.952783818403885e-06, "loss": 0.2506, "step": 221025 }, { "epoch": 2.17, "grad_norm": 4.8520827293396, "learning_rate": 3.952659695949637e-06, "loss": 0.1333, "step": 221050 }, { "epoch": 2.17, "grad_norm": 5.3271894454956055, "learning_rate": 3.952535573495388e-06, "loss": 0.2216, "step": 221075 }, { "epoch": 2.17, "grad_norm": 12.885720252990723, "learning_rate": 3.952411451041139e-06, "loss": 0.1479, "step": 221100 }, { "epoch": 2.17, "grad_norm": 6.393199443817139, "learning_rate": 3.952287328586891e-06, "loss": 0.2179, "step": 221125 }, { "epoch": 2.17, "grad_norm": 15.609485626220703, "learning_rate": 3.952163206132643e-06, "loss": 0.1407, "step": 221150 }, { "epoch": 2.17, "grad_norm": 4.9306840896606445, "learning_rate": 3.952039083678394e-06, "loss": 0.2645, "step": 221175 }, { "epoch": 2.17, "grad_norm": 12.425246238708496, "learning_rate": 3.951914961224146e-06, "loss": 0.1089, "step": 221200 }, { "epoch": 2.18, "grad_norm": 4.197347164154053, "learning_rate": 3.951790838769897e-06, "loss": 0.1965, "step": 221225 }, { "epoch": 2.18, "grad_norm": 13.01657485961914, "learning_rate": 3.951666716315648e-06, "loss": 0.1463, "step": 221250 }, { "epoch": 2.18, "grad_norm": 2.8311312198638916, "learning_rate": 3.9515425938614e-06, "loss": 0.1858, "step": 221275 }, { "epoch": 2.18, "grad_norm": 10.033416748046875, "learning_rate": 3.9514184714071515e-06, "loss": 0.106, "step": 221300 }, { "epoch": 2.18, "grad_norm": 5.958981037139893, "learning_rate": 3.951294348952904e-06, "loss": 0.2611, "step": 221325 }, { "epoch": 2.18, "grad_norm": 31.6369686126709, "learning_rate": 3.951170226498655e-06, "loss": 0.1432, "step": 221350 }, { "epoch": 2.18, "grad_norm": 2.966782808303833, "learning_rate": 3.951046104044407e-06, "loss": 0.1706, "step": 221375 }, { "epoch": 2.18, "grad_norm": 9.960476875305176, "learning_rate": 3.950921981590158e-06, "loss": 0.1038, "step": 221400 }, { "epoch": 2.18, "grad_norm": 5.49379301071167, "learning_rate": 3.950797859135909e-06, "loss": 0.2356, "step": 221425 }, { "epoch": 2.18, "grad_norm": 10.433555603027344, "learning_rate": 3.950673736681661e-06, "loss": 0.1148, "step": 221450 }, { "epoch": 2.18, "grad_norm": 9.164934158325195, "learning_rate": 3.9505496142274126e-06, "loss": 0.2101, "step": 221475 }, { "epoch": 2.18, "grad_norm": 13.156194686889648, "learning_rate": 3.950425491773164e-06, "loss": 0.1194, "step": 221500 }, { "epoch": 2.18, "grad_norm": 1.9066753387451172, "learning_rate": 3.950301369318916e-06, "loss": 0.2288, "step": 221525 }, { "epoch": 2.18, "grad_norm": 6.875834941864014, "learning_rate": 3.950177246864667e-06, "loss": 0.1138, "step": 221550 }, { "epoch": 2.18, "grad_norm": 2.2395026683807373, "learning_rate": 3.950053124410418e-06, "loss": 0.2149, "step": 221575 }, { "epoch": 2.18, "grad_norm": 10.93552017211914, "learning_rate": 3.94992900195617e-06, "loss": 0.1034, "step": 221600 }, { "epoch": 2.18, "grad_norm": 4.47578763961792, "learning_rate": 3.9498048795019215e-06, "loss": 0.1957, "step": 221625 }, { "epoch": 2.18, "grad_norm": 6.7511773109436035, "learning_rate": 3.949680757047673e-06, "loss": 0.1249, "step": 221650 }, { "epoch": 2.18, "grad_norm": 4.800228118896484, "learning_rate": 3.949556634593425e-06, "loss": 0.2259, "step": 221675 }, { "epoch": 2.18, "grad_norm": 13.05075740814209, "learning_rate": 3.949432512139176e-06, "loss": 0.1468, "step": 221700 }, { "epoch": 2.18, "grad_norm": 4.872068881988525, "learning_rate": 3.949308389684928e-06, "loss": 0.2518, "step": 221725 }, { "epoch": 2.18, "grad_norm": 10.228995323181152, "learning_rate": 3.949184267230679e-06, "loss": 0.1301, "step": 221750 }, { "epoch": 2.18, "grad_norm": 2.315180540084839, "learning_rate": 3.949060144776431e-06, "loss": 0.2665, "step": 221775 }, { "epoch": 2.18, "grad_norm": 10.604947090148926, "learning_rate": 3.9489360223221826e-06, "loss": 0.1028, "step": 221800 }, { "epoch": 2.18, "grad_norm": 4.005943775177002, "learning_rate": 3.948811899867935e-06, "loss": 0.1993, "step": 221825 }, { "epoch": 2.18, "grad_norm": 12.820584297180176, "learning_rate": 3.948687777413686e-06, "loss": 0.1252, "step": 221850 }, { "epoch": 2.18, "grad_norm": 3.802351713180542, "learning_rate": 3.948563654959437e-06, "loss": 0.2054, "step": 221875 }, { "epoch": 2.18, "grad_norm": 17.40374183654785, "learning_rate": 3.948439532505189e-06, "loss": 0.1337, "step": 221900 }, { "epoch": 2.18, "grad_norm": 2.8997647762298584, "learning_rate": 3.94831541005094e-06, "loss": 0.1866, "step": 221925 }, { "epoch": 2.18, "grad_norm": 8.484502792358398, "learning_rate": 3.9481912875966915e-06, "loss": 0.1408, "step": 221950 }, { "epoch": 2.18, "grad_norm": 7.031664848327637, "learning_rate": 3.9480671651424436e-06, "loss": 0.2274, "step": 221975 }, { "epoch": 2.18, "grad_norm": 81.42937469482422, "learning_rate": 3.947943042688195e-06, "loss": 0.1846, "step": 222000 }, { "epoch": 2.18, "grad_norm": 5.280689716339111, "learning_rate": 3.947823885132116e-06, "loss": 0.1674, "step": 222025 }, { "epoch": 2.18, "grad_norm": 9.960246086120605, "learning_rate": 3.947699762677868e-06, "loss": 0.1099, "step": 222050 }, { "epoch": 2.18, "grad_norm": 5.153122901916504, "learning_rate": 3.947575640223619e-06, "loss": 0.2046, "step": 222075 }, { "epoch": 2.18, "grad_norm": 15.577549934387207, "learning_rate": 3.94745151776937e-06, "loss": 0.1678, "step": 222100 }, { "epoch": 2.18, "grad_norm": 25.76543617248535, "learning_rate": 3.947327395315122e-06, "loss": 0.2377, "step": 222125 }, { "epoch": 2.18, "grad_norm": 12.99142837524414, "learning_rate": 3.9472032728608736e-06, "loss": 0.1552, "step": 222150 }, { "epoch": 2.18, "grad_norm": 1.8383738994598389, "learning_rate": 3.947079150406626e-06, "loss": 0.2017, "step": 222175 }, { "epoch": 2.18, "grad_norm": 10.565174102783203, "learning_rate": 3.946955027952377e-06, "loss": 0.1551, "step": 222200 }, { "epoch": 2.18, "grad_norm": 6.039120197296143, "learning_rate": 3.946830905498129e-06, "loss": 0.2407, "step": 222225 }, { "epoch": 2.19, "grad_norm": 9.090132713317871, "learning_rate": 3.94670678304388e-06, "loss": 0.1109, "step": 222250 }, { "epoch": 2.19, "grad_norm": 3.6605353355407715, "learning_rate": 3.946582660589632e-06, "loss": 0.2366, "step": 222275 }, { "epoch": 2.19, "grad_norm": 10.178709983825684, "learning_rate": 3.946458538135383e-06, "loss": 0.1344, "step": 222300 }, { "epoch": 2.19, "grad_norm": 6.671667575836182, "learning_rate": 3.946334415681135e-06, "loss": 0.1768, "step": 222325 }, { "epoch": 2.19, "grad_norm": 8.13813304901123, "learning_rate": 3.946210293226887e-06, "loss": 0.1286, "step": 222350 }, { "epoch": 2.19, "grad_norm": 5.2792744636535645, "learning_rate": 3.946086170772638e-06, "loss": 0.2201, "step": 222375 }, { "epoch": 2.19, "grad_norm": 11.024855613708496, "learning_rate": 3.945962048318389e-06, "loss": 0.148, "step": 222400 }, { "epoch": 2.19, "grad_norm": 2.809504747390747, "learning_rate": 3.945837925864141e-06, "loss": 0.1865, "step": 222425 }, { "epoch": 2.19, "grad_norm": 8.793310165405273, "learning_rate": 3.945713803409892e-06, "loss": 0.1382, "step": 222450 }, { "epoch": 2.19, "grad_norm": 4.931551456451416, "learning_rate": 3.9455896809556435e-06, "loss": 0.2064, "step": 222475 }, { "epoch": 2.19, "grad_norm": 19.60686492919922, "learning_rate": 3.945465558501396e-06, "loss": 0.1211, "step": 222500 }, { "epoch": 2.19, "grad_norm": 4.236926078796387, "learning_rate": 3.945341436047147e-06, "loss": 0.1915, "step": 222525 }, { "epoch": 2.19, "grad_norm": 9.247185707092285, "learning_rate": 3.945217313592898e-06, "loss": 0.1225, "step": 222550 }, { "epoch": 2.19, "grad_norm": 3.3583462238311768, "learning_rate": 3.94509319113865e-06, "loss": 0.212, "step": 222575 }, { "epoch": 2.19, "grad_norm": 10.35400390625, "learning_rate": 3.944969068684401e-06, "loss": 0.1103, "step": 222600 }, { "epoch": 2.19, "grad_norm": 4.523715019226074, "learning_rate": 3.944844946230153e-06, "loss": 0.2364, "step": 222625 }, { "epoch": 2.19, "grad_norm": 21.691654205322266, "learning_rate": 3.9447208237759046e-06, "loss": 0.123, "step": 222650 }, { "epoch": 2.19, "grad_norm": 4.304087162017822, "learning_rate": 3.944596701321657e-06, "loss": 0.1946, "step": 222675 }, { "epoch": 2.19, "grad_norm": 10.265363693237305, "learning_rate": 3.944472578867408e-06, "loss": 0.1439, "step": 222700 }, { "epoch": 2.19, "grad_norm": 3.9290566444396973, "learning_rate": 3.94434845641316e-06, "loss": 0.2392, "step": 222725 }, { "epoch": 2.19, "grad_norm": 13.777080535888672, "learning_rate": 3.944224333958911e-06, "loss": 0.1232, "step": 222750 }, { "epoch": 2.19, "grad_norm": 3.056964874267578, "learning_rate": 3.944100211504662e-06, "loss": 0.2148, "step": 222775 }, { "epoch": 2.19, "grad_norm": 11.932677268981934, "learning_rate": 3.9439760890504135e-06, "loss": 0.1064, "step": 222800 }, { "epoch": 2.19, "grad_norm": 4.823943138122559, "learning_rate": 3.943851966596166e-06, "loss": 0.21, "step": 222825 }, { "epoch": 2.19, "grad_norm": 13.679560661315918, "learning_rate": 3.943727844141917e-06, "loss": 0.199, "step": 222850 }, { "epoch": 2.19, "grad_norm": 5.772101879119873, "learning_rate": 3.943603721687668e-06, "loss": 0.2013, "step": 222875 }, { "epoch": 2.19, "grad_norm": 9.166272163391113, "learning_rate": 3.94347959923342e-06, "loss": 0.0915, "step": 222900 }, { "epoch": 2.19, "grad_norm": 6.918309688568115, "learning_rate": 3.943355476779171e-06, "loss": 0.2143, "step": 222925 }, { "epoch": 2.19, "grad_norm": 9.251596450805664, "learning_rate": 3.9432313543249225e-06, "loss": 0.1229, "step": 222950 }, { "epoch": 2.19, "grad_norm": 6.109222412109375, "learning_rate": 3.9431072318706746e-06, "loss": 0.2555, "step": 222975 }, { "epoch": 2.19, "grad_norm": 10.846963882446289, "learning_rate": 3.942983109416426e-06, "loss": 0.1233, "step": 223000 }, { "epoch": 2.19, "grad_norm": 2.6415467262268066, "learning_rate": 3.942858986962178e-06, "loss": 0.191, "step": 223025 }, { "epoch": 2.19, "grad_norm": 9.675271034240723, "learning_rate": 3.942734864507929e-06, "loss": 0.1098, "step": 223050 }, { "epoch": 2.19, "grad_norm": 7.325107574462891, "learning_rate": 3.942610742053681e-06, "loss": 0.2241, "step": 223075 }, { "epoch": 2.19, "grad_norm": 10.348066329956055, "learning_rate": 3.942486619599432e-06, "loss": 0.1147, "step": 223100 }, { "epoch": 2.19, "grad_norm": 6.697282791137695, "learning_rate": 3.942362497145184e-06, "loss": 0.2104, "step": 223125 }, { "epoch": 2.19, "grad_norm": 9.002949714660645, "learning_rate": 3.9422383746909356e-06, "loss": 0.1179, "step": 223150 }, { "epoch": 2.19, "grad_norm": 1.7888431549072266, "learning_rate": 3.942114252236687e-06, "loss": 0.1841, "step": 223175 }, { "epoch": 2.19, "grad_norm": 6.066830635070801, "learning_rate": 3.941990129782439e-06, "loss": 0.1113, "step": 223200 }, { "epoch": 2.19, "grad_norm": 8.366311073303223, "learning_rate": 3.94186600732819e-06, "loss": 0.2112, "step": 223225 }, { "epoch": 2.2, "grad_norm": 12.451704978942871, "learning_rate": 3.941741884873941e-06, "loss": 0.1222, "step": 223250 }, { "epoch": 2.2, "grad_norm": 4.847395896911621, "learning_rate": 3.941617762419693e-06, "loss": 0.1954, "step": 223275 }, { "epoch": 2.2, "grad_norm": 11.472246170043945, "learning_rate": 3.9414936399654445e-06, "loss": 0.1314, "step": 223300 }, { "epoch": 2.2, "grad_norm": 4.76237678527832, "learning_rate": 3.941369517511196e-06, "loss": 0.1937, "step": 223325 }, { "epoch": 2.2, "grad_norm": 12.244049072265625, "learning_rate": 3.941245395056948e-06, "loss": 0.1372, "step": 223350 }, { "epoch": 2.2, "grad_norm": 7.393145561218262, "learning_rate": 3.941121272602699e-06, "loss": 0.2056, "step": 223375 }, { "epoch": 2.2, "grad_norm": 13.702780723571777, "learning_rate": 3.94099715014845e-06, "loss": 0.1343, "step": 223400 }, { "epoch": 2.2, "grad_norm": 4.9416399002075195, "learning_rate": 3.940873027694202e-06, "loss": 0.2134, "step": 223425 }, { "epoch": 2.2, "grad_norm": 13.455036163330078, "learning_rate": 3.9407489052399535e-06, "loss": 0.1239, "step": 223450 }, { "epoch": 2.2, "grad_norm": 5.362695693969727, "learning_rate": 3.9406247827857056e-06, "loss": 0.205, "step": 223475 }, { "epoch": 2.2, "grad_norm": 9.801094055175781, "learning_rate": 3.940500660331457e-06, "loss": 0.1353, "step": 223500 }, { "epoch": 2.2, "grad_norm": 3.7176995277404785, "learning_rate": 3.940376537877209e-06, "loss": 0.1847, "step": 223525 }, { "epoch": 2.2, "grad_norm": 7.907588005065918, "learning_rate": 3.94025241542296e-06, "loss": 0.11, "step": 223550 }, { "epoch": 2.2, "grad_norm": 5.657451629638672, "learning_rate": 3.940128292968712e-06, "loss": 0.218, "step": 223575 }, { "epoch": 2.2, "grad_norm": 10.173154830932617, "learning_rate": 3.940004170514463e-06, "loss": 0.1109, "step": 223600 }, { "epoch": 2.2, "grad_norm": 3.578511953353882, "learning_rate": 3.9398800480602145e-06, "loss": 0.1997, "step": 223625 }, { "epoch": 2.2, "grad_norm": 11.564624786376953, "learning_rate": 3.939755925605966e-06, "loss": 0.1379, "step": 223650 }, { "epoch": 2.2, "grad_norm": 3.4395480155944824, "learning_rate": 3.939631803151718e-06, "loss": 0.2029, "step": 223675 }, { "epoch": 2.2, "grad_norm": 20.08502769470215, "learning_rate": 3.939507680697469e-06, "loss": 0.1524, "step": 223700 }, { "epoch": 2.2, "grad_norm": 5.376059055328369, "learning_rate": 3.93938355824322e-06, "loss": 0.2274, "step": 223725 }, { "epoch": 2.2, "grad_norm": 13.061824798583984, "learning_rate": 3.939259435788972e-06, "loss": 0.1431, "step": 223750 }, { "epoch": 2.2, "grad_norm": 3.4271161556243896, "learning_rate": 3.9391353133347235e-06, "loss": 0.241, "step": 223775 }, { "epoch": 2.2, "grad_norm": 8.051992416381836, "learning_rate": 3.9390111908804755e-06, "loss": 0.0923, "step": 223800 }, { "epoch": 2.2, "grad_norm": 8.826704978942871, "learning_rate": 3.938887068426227e-06, "loss": 0.2519, "step": 223825 }, { "epoch": 2.2, "grad_norm": 15.10973072052002, "learning_rate": 3.938762945971979e-06, "loss": 0.1187, "step": 223850 }, { "epoch": 2.2, "grad_norm": 3.6222686767578125, "learning_rate": 3.93863882351773e-06, "loss": 0.1885, "step": 223875 }, { "epoch": 2.2, "grad_norm": 10.129161834716797, "learning_rate": 3.938514701063482e-06, "loss": 0.1317, "step": 223900 }, { "epoch": 2.2, "grad_norm": 6.826416015625, "learning_rate": 3.938390578609233e-06, "loss": 0.235, "step": 223925 }, { "epoch": 2.2, "grad_norm": 12.871764183044434, "learning_rate": 3.9382664561549845e-06, "loss": 0.1472, "step": 223950 }, { "epoch": 2.2, "grad_norm": 3.7484488487243652, "learning_rate": 3.9381423337007366e-06, "loss": 0.2038, "step": 223975 }, { "epoch": 2.2, "grad_norm": 10.200700759887695, "learning_rate": 3.938018211246488e-06, "loss": 0.1125, "step": 224000 }, { "epoch": 2.2, "grad_norm": 7.191545486450195, "learning_rate": 3.937894088792239e-06, "loss": 0.1937, "step": 224025 }, { "epoch": 2.2, "grad_norm": 12.065061569213867, "learning_rate": 3.937769966337991e-06, "loss": 0.1144, "step": 224050 }, { "epoch": 2.2, "grad_norm": 4.051753520965576, "learning_rate": 3.937645843883742e-06, "loss": 0.1938, "step": 224075 }, { "epoch": 2.2, "grad_norm": 6.780886173248291, "learning_rate": 3.9375217214294935e-06, "loss": 0.1137, "step": 224100 }, { "epoch": 2.2, "grad_norm": 7.186286926269531, "learning_rate": 3.9373975989752455e-06, "loss": 0.2646, "step": 224125 }, { "epoch": 2.2, "grad_norm": 8.458232879638672, "learning_rate": 3.937273476520997e-06, "loss": 0.1401, "step": 224150 }, { "epoch": 2.2, "grad_norm": 7.420370101928711, "learning_rate": 3.937149354066748e-06, "loss": 0.2458, "step": 224175 }, { "epoch": 2.2, "grad_norm": 9.127973556518555, "learning_rate": 3.9370252316125e-06, "loss": 0.1267, "step": 224200 }, { "epoch": 2.2, "grad_norm": 2.6378934383392334, "learning_rate": 3.936901109158251e-06, "loss": 0.1711, "step": 224225 }, { "epoch": 2.2, "grad_norm": 9.902485847473145, "learning_rate": 3.936776986704003e-06, "loss": 0.1129, "step": 224250 }, { "epoch": 2.21, "grad_norm": 3.9050052165985107, "learning_rate": 3.9366528642497545e-06, "loss": 0.2182, "step": 224275 }, { "epoch": 2.21, "grad_norm": 13.692856788635254, "learning_rate": 3.9365287417955065e-06, "loss": 0.0836, "step": 224300 }, { "epoch": 2.21, "grad_norm": 5.807589054107666, "learning_rate": 3.936404619341258e-06, "loss": 0.2338, "step": 224325 }, { "epoch": 2.21, "grad_norm": 12.0955228805542, "learning_rate": 3.93628049688701e-06, "loss": 0.1212, "step": 224350 }, { "epoch": 2.21, "grad_norm": 3.9485697746276855, "learning_rate": 3.936156374432761e-06, "loss": 0.2489, "step": 224375 }, { "epoch": 2.21, "grad_norm": 10.181525230407715, "learning_rate": 3.936032251978512e-06, "loss": 0.1083, "step": 224400 }, { "epoch": 2.21, "grad_norm": 5.66279411315918, "learning_rate": 3.935908129524264e-06, "loss": 0.2068, "step": 224425 }, { "epoch": 2.21, "grad_norm": 6.965583324432373, "learning_rate": 3.9357840070700155e-06, "loss": 0.1337, "step": 224450 }, { "epoch": 2.21, "grad_norm": 7.645298957824707, "learning_rate": 3.935659884615767e-06, "loss": 0.185, "step": 224475 }, { "epoch": 2.21, "grad_norm": 7.319028854370117, "learning_rate": 3.935535762161518e-06, "loss": 0.1063, "step": 224500 }, { "epoch": 2.21, "grad_norm": 3.9693260192871094, "learning_rate": 3.93541163970727e-06, "loss": 0.2449, "step": 224525 }, { "epoch": 2.21, "grad_norm": 15.64366626739502, "learning_rate": 3.935287517253021e-06, "loss": 0.1377, "step": 224550 }, { "epoch": 2.21, "grad_norm": 2.6974759101867676, "learning_rate": 3.935163394798772e-06, "loss": 0.198, "step": 224575 }, { "epoch": 2.21, "grad_norm": 11.701334953308105, "learning_rate": 3.9350392723445245e-06, "loss": 0.1162, "step": 224600 }, { "epoch": 2.21, "grad_norm": 2.6792666912078857, "learning_rate": 3.934915149890276e-06, "loss": 0.1909, "step": 224625 }, { "epoch": 2.21, "grad_norm": 9.504242897033691, "learning_rate": 3.934791027436028e-06, "loss": 0.0946, "step": 224650 }, { "epoch": 2.21, "grad_norm": 3.3971242904663086, "learning_rate": 3.934671869879949e-06, "loss": 0.2215, "step": 224675 }, { "epoch": 2.21, "grad_norm": 4.481937408447266, "learning_rate": 3.934547747425701e-06, "loss": 0.1201, "step": 224700 }, { "epoch": 2.21, "grad_norm": 5.6088666915893555, "learning_rate": 3.934423624971452e-06, "loss": 0.219, "step": 224725 }, { "epoch": 2.21, "grad_norm": 10.173486709594727, "learning_rate": 3.934299502517204e-06, "loss": 0.1269, "step": 224750 }, { "epoch": 2.21, "grad_norm": 2.3245766162872314, "learning_rate": 3.934175380062955e-06, "loss": 0.2347, "step": 224775 }, { "epoch": 2.21, "grad_norm": 10.09469223022461, "learning_rate": 3.934051257608707e-06, "loss": 0.1252, "step": 224800 }, { "epoch": 2.21, "grad_norm": 2.8067524433135986, "learning_rate": 3.933927135154459e-06, "loss": 0.1817, "step": 224825 }, { "epoch": 2.21, "grad_norm": 7.440267086029053, "learning_rate": 3.93380301270021e-06, "loss": 0.1222, "step": 224850 }, { "epoch": 2.21, "grad_norm": 2.973541498184204, "learning_rate": 3.933678890245962e-06, "loss": 0.1912, "step": 224875 }, { "epoch": 2.21, "grad_norm": 14.021037101745605, "learning_rate": 3.933554767791713e-06, "loss": 0.0995, "step": 224900 }, { "epoch": 2.21, "grad_norm": 3.4579381942749023, "learning_rate": 3.933430645337464e-06, "loss": 0.2534, "step": 224925 }, { "epoch": 2.21, "grad_norm": 10.47110366821289, "learning_rate": 3.933306522883216e-06, "loss": 0.1346, "step": 224950 }, { "epoch": 2.21, "grad_norm": 11.11630630493164, "learning_rate": 3.9331824004289675e-06, "loss": 0.2082, "step": 224975 }, { "epoch": 2.21, "grad_norm": 6.952457427978516, "learning_rate": 3.933058277974719e-06, "loss": 0.1188, "step": 225000 }, { "epoch": 2.21, "grad_norm": 6.458380222320557, "learning_rate": 3.932934155520471e-06, "loss": 0.2138, "step": 225025 }, { "epoch": 2.21, "grad_norm": 10.972137451171875, "learning_rate": 3.932810033066222e-06, "loss": 0.1256, "step": 225050 }, { "epoch": 2.21, "grad_norm": 3.198453664779663, "learning_rate": 3.932685910611973e-06, "loss": 0.2232, "step": 225075 }, { "epoch": 2.21, "grad_norm": 9.795385360717773, "learning_rate": 3.932561788157725e-06, "loss": 0.1305, "step": 225100 }, { "epoch": 2.21, "grad_norm": 4.709789276123047, "learning_rate": 3.9324376657034765e-06, "loss": 0.2275, "step": 225125 }, { "epoch": 2.21, "grad_norm": 9.954161643981934, "learning_rate": 3.9323135432492286e-06, "loss": 0.1388, "step": 225150 }, { "epoch": 2.21, "grad_norm": 5.975637912750244, "learning_rate": 3.93218942079498e-06, "loss": 0.2191, "step": 225175 }, { "epoch": 2.21, "grad_norm": 6.715907096862793, "learning_rate": 3.932065298340732e-06, "loss": 0.1488, "step": 225200 }, { "epoch": 2.21, "grad_norm": 6.963340759277344, "learning_rate": 3.931941175886483e-06, "loss": 0.1962, "step": 225225 }, { "epoch": 2.21, "grad_norm": 13.033575057983398, "learning_rate": 3.931817053432234e-06, "loss": 0.1061, "step": 225250 }, { "epoch": 2.21, "grad_norm": 0.6432746648788452, "learning_rate": 3.931692930977986e-06, "loss": 0.2188, "step": 225275 }, { "epoch": 2.22, "grad_norm": 13.904712677001953, "learning_rate": 3.9315688085237375e-06, "loss": 0.1392, "step": 225300 }, { "epoch": 2.22, "grad_norm": 5.852365493774414, "learning_rate": 3.931444686069489e-06, "loss": 0.233, "step": 225325 }, { "epoch": 2.22, "grad_norm": 12.439226150512695, "learning_rate": 3.931320563615241e-06, "loss": 0.1559, "step": 225350 }, { "epoch": 2.22, "grad_norm": 3.747498035430908, "learning_rate": 3.931196441160992e-06, "loss": 0.2136, "step": 225375 }, { "epoch": 2.22, "grad_norm": 11.77261734008789, "learning_rate": 3.931072318706743e-06, "loss": 0.1223, "step": 225400 }, { "epoch": 2.22, "grad_norm": 2.1376097202301025, "learning_rate": 3.930948196252495e-06, "loss": 0.1907, "step": 225425 }, { "epoch": 2.22, "grad_norm": 10.894187927246094, "learning_rate": 3.9308240737982465e-06, "loss": 0.1, "step": 225450 }, { "epoch": 2.22, "grad_norm": 3.0309042930603027, "learning_rate": 3.930699951343998e-06, "loss": 0.2267, "step": 225475 }, { "epoch": 2.22, "grad_norm": 15.0469970703125, "learning_rate": 3.93057582888975e-06, "loss": 0.1427, "step": 225500 }, { "epoch": 2.22, "grad_norm": 4.424531936645508, "learning_rate": 3.930451706435501e-06, "loss": 0.1944, "step": 225525 }, { "epoch": 2.22, "grad_norm": 9.815081596374512, "learning_rate": 3.930327583981253e-06, "loss": 0.1416, "step": 225550 }, { "epoch": 2.22, "grad_norm": 3.468381643295288, "learning_rate": 3.930203461527004e-06, "loss": 0.2199, "step": 225575 }, { "epoch": 2.22, "grad_norm": 14.943178176879883, "learning_rate": 3.930079339072756e-06, "loss": 0.1318, "step": 225600 }, { "epoch": 2.22, "grad_norm": 8.826290130615234, "learning_rate": 3.9299552166185075e-06, "loss": 0.225, "step": 225625 }, { "epoch": 2.22, "grad_norm": 11.014009475708008, "learning_rate": 3.9298310941642596e-06, "loss": 0.1499, "step": 225650 }, { "epoch": 2.22, "grad_norm": 5.676927089691162, "learning_rate": 3.929706971710011e-06, "loss": 0.2137, "step": 225675 }, { "epoch": 2.22, "grad_norm": 10.169700622558594, "learning_rate": 3.929582849255762e-06, "loss": 0.1349, "step": 225700 }, { "epoch": 2.22, "grad_norm": 3.1324269771575928, "learning_rate": 3.929458726801514e-06, "loss": 0.2131, "step": 225725 }, { "epoch": 2.22, "grad_norm": 10.86477279663086, "learning_rate": 3.929334604347265e-06, "loss": 0.1208, "step": 225750 }, { "epoch": 2.22, "grad_norm": 6.078059196472168, "learning_rate": 3.9292104818930165e-06, "loss": 0.2114, "step": 225775 }, { "epoch": 2.22, "grad_norm": 9.407172203063965, "learning_rate": 3.9290863594387685e-06, "loss": 0.0732, "step": 225800 }, { "epoch": 2.22, "grad_norm": 12.468256950378418, "learning_rate": 3.92896223698452e-06, "loss": 0.2038, "step": 225825 }, { "epoch": 2.22, "grad_norm": 15.03553581237793, "learning_rate": 3.928838114530271e-06, "loss": 0.1367, "step": 225850 }, { "epoch": 2.22, "grad_norm": 9.12606430053711, "learning_rate": 3.928713992076023e-06, "loss": 0.1831, "step": 225875 }, { "epoch": 2.22, "grad_norm": 11.782623291015625, "learning_rate": 3.928589869621774e-06, "loss": 0.1388, "step": 225900 }, { "epoch": 2.22, "grad_norm": 7.686950206756592, "learning_rate": 3.9284657471675254e-06, "loss": 0.2437, "step": 225925 }, { "epoch": 2.22, "grad_norm": 8.1784029006958, "learning_rate": 3.9283416247132775e-06, "loss": 0.1232, "step": 225950 }, { "epoch": 2.22, "grad_norm": 2.2562122344970703, "learning_rate": 3.928217502259029e-06, "loss": 0.2197, "step": 225975 }, { "epoch": 2.22, "grad_norm": 7.181694984436035, "learning_rate": 3.928093379804781e-06, "loss": 0.127, "step": 226000 }, { "epoch": 2.22, "grad_norm": 2.948702335357666, "learning_rate": 3.927969257350532e-06, "loss": 0.2178, "step": 226025 }, { "epoch": 2.22, "grad_norm": 14.536184310913086, "learning_rate": 3.927845134896284e-06, "loss": 0.1128, "step": 226050 }, { "epoch": 2.22, "grad_norm": 2.5678088665008545, "learning_rate": 3.927721012442035e-06, "loss": 0.2076, "step": 226075 }, { "epoch": 2.22, "grad_norm": 18.783491134643555, "learning_rate": 3.9275968899877865e-06, "loss": 0.099, "step": 226100 }, { "epoch": 2.22, "grad_norm": 1.232515811920166, "learning_rate": 3.9274727675335385e-06, "loss": 0.1907, "step": 226125 }, { "epoch": 2.22, "grad_norm": 16.01313018798828, "learning_rate": 3.92734864507929e-06, "loss": 0.1469, "step": 226150 }, { "epoch": 2.22, "grad_norm": 5.857262134552002, "learning_rate": 3.927224522625041e-06, "loss": 0.2017, "step": 226175 }, { "epoch": 2.22, "grad_norm": 12.586236000061035, "learning_rate": 3.927100400170793e-06, "loss": 0.1306, "step": 226200 }, { "epoch": 2.22, "grad_norm": 4.171325206756592, "learning_rate": 3.926976277716544e-06, "loss": 0.2204, "step": 226225 }, { "epoch": 2.22, "grad_norm": 8.868042945861816, "learning_rate": 3.926852155262295e-06, "loss": 0.102, "step": 226250 }, { "epoch": 2.22, "grad_norm": 6.305531024932861, "learning_rate": 3.9267280328080475e-06, "loss": 0.2186, "step": 226275 }, { "epoch": 2.23, "grad_norm": 6.018977165222168, "learning_rate": 3.926603910353799e-06, "loss": 0.1098, "step": 226300 }, { "epoch": 2.23, "grad_norm": 4.91454553604126, "learning_rate": 3.92647978789955e-06, "loss": 0.2114, "step": 226325 }, { "epoch": 2.23, "grad_norm": 9.955720901489258, "learning_rate": 3.926355665445302e-06, "loss": 0.1636, "step": 226350 }, { "epoch": 2.23, "grad_norm": 6.301669120788574, "learning_rate": 3.926231542991053e-06, "loss": 0.2391, "step": 226375 }, { "epoch": 2.23, "grad_norm": 14.661272048950195, "learning_rate": 3.926107420536805e-06, "loss": 0.124, "step": 226400 }, { "epoch": 2.23, "grad_norm": 4.6421284675598145, "learning_rate": 3.9259832980825564e-06, "loss": 0.223, "step": 226425 }, { "epoch": 2.23, "grad_norm": 7.6356706619262695, "learning_rate": 3.9258591756283085e-06, "loss": 0.0932, "step": 226450 }, { "epoch": 2.23, "grad_norm": 4.369649887084961, "learning_rate": 3.92573505317406e-06, "loss": 0.1655, "step": 226475 }, { "epoch": 2.23, "grad_norm": 5.789883136749268, "learning_rate": 3.925610930719812e-06, "loss": 0.1525, "step": 226500 }, { "epoch": 2.23, "grad_norm": 2.1072261333465576, "learning_rate": 3.925486808265563e-06, "loss": 0.1829, "step": 226525 }, { "epoch": 2.23, "grad_norm": 10.987232208251953, "learning_rate": 3.925362685811314e-06, "loss": 0.1327, "step": 226550 }, { "epoch": 2.23, "grad_norm": 2.25087571144104, "learning_rate": 3.925238563357066e-06, "loss": 0.1997, "step": 226575 }, { "epoch": 2.23, "grad_norm": 47.466732025146484, "learning_rate": 3.9251144409028175e-06, "loss": 0.1286, "step": 226600 }, { "epoch": 2.23, "grad_norm": 7.046113014221191, "learning_rate": 3.924990318448569e-06, "loss": 0.1724, "step": 226625 }, { "epoch": 2.23, "grad_norm": 11.483675003051758, "learning_rate": 3.924866195994321e-06, "loss": 0.1429, "step": 226650 }, { "epoch": 2.23, "grad_norm": 5.373457431793213, "learning_rate": 3.924742073540072e-06, "loss": 0.2208, "step": 226675 }, { "epoch": 2.23, "grad_norm": 15.523017883300781, "learning_rate": 3.924617951085823e-06, "loss": 0.1527, "step": 226700 }, { "epoch": 2.23, "grad_norm": 2.677602529525757, "learning_rate": 3.924493828631575e-06, "loss": 0.2133, "step": 226725 }, { "epoch": 2.23, "grad_norm": 11.379146575927734, "learning_rate": 3.9243697061773264e-06, "loss": 0.1246, "step": 226750 }, { "epoch": 2.23, "grad_norm": 5.374842643737793, "learning_rate": 3.9242455837230785e-06, "loss": 0.1931, "step": 226775 }, { "epoch": 2.23, "grad_norm": 7.908024787902832, "learning_rate": 3.92412146126883e-06, "loss": 0.1194, "step": 226800 }, { "epoch": 2.23, "grad_norm": 10.052156448364258, "learning_rate": 3.923997338814582e-06, "loss": 0.1743, "step": 226825 }, { "epoch": 2.23, "grad_norm": 12.458045959472656, "learning_rate": 3.923873216360333e-06, "loss": 0.1293, "step": 226850 }, { "epoch": 2.23, "grad_norm": 2.5241200923919678, "learning_rate": 3.923749093906084e-06, "loss": 0.2423, "step": 226875 }, { "epoch": 2.23, "grad_norm": 8.439532279968262, "learning_rate": 3.923624971451836e-06, "loss": 0.1047, "step": 226900 }, { "epoch": 2.23, "grad_norm": 6.211277961730957, "learning_rate": 3.9235008489975874e-06, "loss": 0.1999, "step": 226925 }, { "epoch": 2.23, "grad_norm": 16.95564079284668, "learning_rate": 3.923376726543339e-06, "loss": 0.1355, "step": 226950 }, { "epoch": 2.23, "grad_norm": 2.422846555709839, "learning_rate": 3.923252604089091e-06, "loss": 0.2124, "step": 226975 }, { "epoch": 2.23, "grad_norm": 10.041821479797363, "learning_rate": 3.923128481634842e-06, "loss": 0.1243, "step": 227000 }, { "epoch": 2.23, "grad_norm": 5.405528545379639, "learning_rate": 3.923004359180593e-06, "loss": 0.199, "step": 227025 }, { "epoch": 2.23, "grad_norm": 12.81997299194336, "learning_rate": 3.922880236726345e-06, "loss": 0.1146, "step": 227050 }, { "epoch": 2.23, "grad_norm": 3.4375410079956055, "learning_rate": 3.922756114272096e-06, "loss": 0.1834, "step": 227075 }, { "epoch": 2.23, "grad_norm": 4.817504405975342, "learning_rate": 3.922631991817848e-06, "loss": 0.1286, "step": 227100 }, { "epoch": 2.23, "grad_norm": 4.503298759460449, "learning_rate": 3.9225078693636e-06, "loss": 0.2278, "step": 227125 }, { "epoch": 2.23, "grad_norm": 5.729820251464844, "learning_rate": 3.922383746909351e-06, "loss": 0.1437, "step": 227150 }, { "epoch": 2.23, "grad_norm": 4.436964511871338, "learning_rate": 3.922259624455103e-06, "loss": 0.2358, "step": 227175 }, { "epoch": 2.23, "grad_norm": 16.294445037841797, "learning_rate": 3.922135502000854e-06, "loss": 0.1496, "step": 227200 }, { "epoch": 2.23, "grad_norm": 2.991828441619873, "learning_rate": 3.922011379546606e-06, "loss": 0.2622, "step": 227225 }, { "epoch": 2.23, "grad_norm": 11.337162017822266, "learning_rate": 3.9218872570923574e-06, "loss": 0.1301, "step": 227250 }, { "epoch": 2.23, "grad_norm": 6.464844226837158, "learning_rate": 3.921768099536279e-06, "loss": 0.2094, "step": 227275 }, { "epoch": 2.23, "grad_norm": 10.127256393432617, "learning_rate": 3.9216439770820305e-06, "loss": 0.1362, "step": 227300 }, { "epoch": 2.24, "grad_norm": 8.168105125427246, "learning_rate": 3.921519854627783e-06, "loss": 0.1934, "step": 227325 }, { "epoch": 2.24, "grad_norm": 13.736604690551758, "learning_rate": 3.921395732173534e-06, "loss": 0.165, "step": 227350 }, { "epoch": 2.24, "grad_norm": 5.133369445800781, "learning_rate": 3.921271609719285e-06, "loss": 0.2266, "step": 227375 }, { "epoch": 2.24, "grad_norm": 12.823077201843262, "learning_rate": 3.921147487265036e-06, "loss": 0.1209, "step": 227400 }, { "epoch": 2.24, "grad_norm": 6.828989028930664, "learning_rate": 3.921023364810788e-06, "loss": 0.199, "step": 227425 }, { "epoch": 2.24, "grad_norm": 11.1569185256958, "learning_rate": 3.9208992423565395e-06, "loss": 0.1189, "step": 227450 }, { "epoch": 2.24, "grad_norm": 3.7079079151153564, "learning_rate": 3.920775119902291e-06, "loss": 0.2352, "step": 227475 }, { "epoch": 2.24, "grad_norm": 9.96765422821045, "learning_rate": 3.920650997448043e-06, "loss": 0.1681, "step": 227500 }, { "epoch": 2.24, "grad_norm": 4.996969223022461, "learning_rate": 3.920526874993794e-06, "loss": 0.2001, "step": 227525 }, { "epoch": 2.24, "grad_norm": 12.942865371704102, "learning_rate": 3.920402752539545e-06, "loss": 0.1237, "step": 227550 }, { "epoch": 2.24, "grad_norm": 4.289699554443359, "learning_rate": 3.920278630085297e-06, "loss": 0.1986, "step": 227575 }, { "epoch": 2.24, "grad_norm": 15.724740982055664, "learning_rate": 3.9201545076310484e-06, "loss": 0.0986, "step": 227600 }, { "epoch": 2.24, "grad_norm": 4.999619960784912, "learning_rate": 3.9200303851768005e-06, "loss": 0.1827, "step": 227625 }, { "epoch": 2.24, "grad_norm": 12.622475624084473, "learning_rate": 3.919906262722552e-06, "loss": 0.137, "step": 227650 }, { "epoch": 2.24, "grad_norm": 0.7690479159355164, "learning_rate": 3.919782140268304e-06, "loss": 0.1962, "step": 227675 }, { "epoch": 2.24, "grad_norm": 13.135464668273926, "learning_rate": 3.919658017814055e-06, "loss": 0.1483, "step": 227700 }, { "epoch": 2.24, "grad_norm": 6.534430503845215, "learning_rate": 3.919533895359807e-06, "loss": 0.2121, "step": 227725 }, { "epoch": 2.24, "grad_norm": 11.406489372253418, "learning_rate": 3.919409772905558e-06, "loss": 0.116, "step": 227750 }, { "epoch": 2.24, "grad_norm": 2.807713270187378, "learning_rate": 3.9192856504513095e-06, "loss": 0.2121, "step": 227775 }, { "epoch": 2.24, "grad_norm": 15.687112808227539, "learning_rate": 3.9191615279970615e-06, "loss": 0.1478, "step": 227800 }, { "epoch": 2.24, "grad_norm": 3.7921667098999023, "learning_rate": 3.919037405542813e-06, "loss": 0.1798, "step": 227825 }, { "epoch": 2.24, "grad_norm": 13.734972953796387, "learning_rate": 3.918913283088564e-06, "loss": 0.1307, "step": 227850 }, { "epoch": 2.24, "grad_norm": 5.070532321929932, "learning_rate": 3.918789160634316e-06, "loss": 0.2329, "step": 227875 }, { "epoch": 2.24, "grad_norm": 9.16445541381836, "learning_rate": 3.918665038180067e-06, "loss": 0.1311, "step": 227900 }, { "epoch": 2.24, "grad_norm": 5.600220203399658, "learning_rate": 3.9185409157258184e-06, "loss": 0.2, "step": 227925 }, { "epoch": 2.24, "grad_norm": 8.900697708129883, "learning_rate": 3.9184167932715705e-06, "loss": 0.1213, "step": 227950 }, { "epoch": 2.24, "grad_norm": 27.969295501708984, "learning_rate": 3.918292670817322e-06, "loss": 0.2487, "step": 227975 }, { "epoch": 2.24, "grad_norm": 17.771196365356445, "learning_rate": 3.918168548363073e-06, "loss": 0.1111, "step": 228000 }, { "epoch": 2.24, "grad_norm": 5.621769428253174, "learning_rate": 3.918044425908825e-06, "loss": 0.2201, "step": 228025 }, { "epoch": 2.24, "grad_norm": 12.57668399810791, "learning_rate": 3.917920303454576e-06, "loss": 0.1121, "step": 228050 }, { "epoch": 2.24, "grad_norm": 5.641644477844238, "learning_rate": 3.917796181000328e-06, "loss": 0.2536, "step": 228075 }, { "epoch": 2.24, "grad_norm": 41.72618865966797, "learning_rate": 3.9176720585460794e-06, "loss": 0.1011, "step": 228100 }, { "epoch": 2.24, "grad_norm": 3.207465648651123, "learning_rate": 3.9175479360918315e-06, "loss": 0.1974, "step": 228125 }, { "epoch": 2.24, "grad_norm": 4.371415615081787, "learning_rate": 3.917423813637583e-06, "loss": 0.1313, "step": 228150 }, { "epoch": 2.24, "grad_norm": 3.0418646335601807, "learning_rate": 3.917299691183335e-06, "loss": 0.2329, "step": 228175 }, { "epoch": 2.24, "grad_norm": 11.222370147705078, "learning_rate": 3.917175568729086e-06, "loss": 0.1333, "step": 228200 }, { "epoch": 2.24, "grad_norm": 2.7354161739349365, "learning_rate": 3.917051446274837e-06, "loss": 0.2225, "step": 228225 }, { "epoch": 2.24, "grad_norm": 5.512189865112305, "learning_rate": 3.916927323820588e-06, "loss": 0.1347, "step": 228250 }, { "epoch": 2.24, "grad_norm": 2.0127246379852295, "learning_rate": 3.9168032013663405e-06, "loss": 0.1873, "step": 228275 }, { "epoch": 2.24, "grad_norm": 6.8861260414123535, "learning_rate": 3.916679078912092e-06, "loss": 0.1407, "step": 228300 }, { "epoch": 2.24, "grad_norm": 4.4072794914245605, "learning_rate": 3.916554956457843e-06, "loss": 0.2246, "step": 228325 }, { "epoch": 2.25, "grad_norm": 9.356306076049805, "learning_rate": 3.916430834003595e-06, "loss": 0.127, "step": 228350 }, { "epoch": 2.25, "grad_norm": 3.665186643600464, "learning_rate": 3.916306711549346e-06, "loss": 0.2244, "step": 228375 }, { "epoch": 2.25, "grad_norm": 12.074732780456543, "learning_rate": 3.916182589095097e-06, "loss": 0.1108, "step": 228400 }, { "epoch": 2.25, "grad_norm": 8.333030700683594, "learning_rate": 3.9160584666408494e-06, "loss": 0.2264, "step": 228425 }, { "epoch": 2.25, "grad_norm": 9.808589935302734, "learning_rate": 3.915934344186601e-06, "loss": 0.1112, "step": 228450 }, { "epoch": 2.25, "grad_norm": 5.2429633140563965, "learning_rate": 3.915810221732353e-06, "loss": 0.2229, "step": 228475 }, { "epoch": 2.25, "grad_norm": 10.585201263427734, "learning_rate": 3.915686099278104e-06, "loss": 0.1272, "step": 228500 }, { "epoch": 2.25, "grad_norm": 4.941930294036865, "learning_rate": 3.915561976823856e-06, "loss": 0.2124, "step": 228525 }, { "epoch": 2.25, "grad_norm": 7.524820804595947, "learning_rate": 3.915437854369607e-06, "loss": 0.1243, "step": 228550 }, { "epoch": 2.25, "grad_norm": 3.3764312267303467, "learning_rate": 3.915313731915359e-06, "loss": 0.183, "step": 228575 }, { "epoch": 2.25, "grad_norm": 11.083585739135742, "learning_rate": 3.9151896094611105e-06, "loss": 0.1112, "step": 228600 }, { "epoch": 2.25, "grad_norm": 9.25593090057373, "learning_rate": 3.915065487006862e-06, "loss": 0.2091, "step": 228625 }, { "epoch": 2.25, "grad_norm": 12.425710678100586, "learning_rate": 3.914941364552614e-06, "loss": 0.132, "step": 228650 }, { "epoch": 2.25, "grad_norm": 4.376555919647217, "learning_rate": 3.914817242098365e-06, "loss": 0.2119, "step": 228675 }, { "epoch": 2.25, "grad_norm": 9.492616653442383, "learning_rate": 3.914693119644116e-06, "loss": 0.1139, "step": 228700 }, { "epoch": 2.25, "grad_norm": 5.90005350112915, "learning_rate": 3.914568997189868e-06, "loss": 0.2413, "step": 228725 }, { "epoch": 2.25, "grad_norm": 10.877622604370117, "learning_rate": 3.914444874735619e-06, "loss": 0.1416, "step": 228750 }, { "epoch": 2.25, "grad_norm": 3.459318161010742, "learning_rate": 3.914320752281371e-06, "loss": 0.2074, "step": 228775 }, { "epoch": 2.25, "grad_norm": 24.818614959716797, "learning_rate": 3.914196629827123e-06, "loss": 0.1141, "step": 228800 }, { "epoch": 2.25, "grad_norm": 2.2243309020996094, "learning_rate": 3.914072507372874e-06, "loss": 0.2176, "step": 228825 }, { "epoch": 2.25, "grad_norm": 13.356955528259277, "learning_rate": 3.913948384918625e-06, "loss": 0.1469, "step": 228850 }, { "epoch": 2.25, "grad_norm": 5.773128509521484, "learning_rate": 3.913824262464377e-06, "loss": 0.2074, "step": 228875 }, { "epoch": 2.25, "grad_norm": 11.375556945800781, "learning_rate": 3.913700140010128e-06, "loss": 0.1102, "step": 228900 }, { "epoch": 2.25, "grad_norm": 5.018189907073975, "learning_rate": 3.9135760175558804e-06, "loss": 0.2769, "step": 228925 }, { "epoch": 2.25, "grad_norm": 11.155926704406738, "learning_rate": 3.913451895101632e-06, "loss": 0.1266, "step": 228950 }, { "epoch": 2.25, "grad_norm": 3.818108320236206, "learning_rate": 3.913327772647384e-06, "loss": 0.2446, "step": 228975 }, { "epoch": 2.25, "grad_norm": 14.680523872375488, "learning_rate": 3.913203650193135e-06, "loss": 0.1213, "step": 229000 }, { "epoch": 2.25, "grad_norm": 3.5552279949188232, "learning_rate": 3.913079527738887e-06, "loss": 0.2148, "step": 229025 }, { "epoch": 2.25, "grad_norm": 11.47806453704834, "learning_rate": 3.912955405284638e-06, "loss": 0.1444, "step": 229050 }, { "epoch": 2.25, "grad_norm": 1.9548274278640747, "learning_rate": 3.912831282830389e-06, "loss": 0.2012, "step": 229075 }, { "epoch": 2.25, "grad_norm": 6.527364730834961, "learning_rate": 3.912707160376141e-06, "loss": 0.1328, "step": 229100 }, { "epoch": 2.25, "grad_norm": 63.93705368041992, "learning_rate": 3.912583037921893e-06, "loss": 0.2438, "step": 229125 }, { "epoch": 2.25, "grad_norm": 12.032938003540039, "learning_rate": 3.912458915467644e-06, "loss": 0.1046, "step": 229150 }, { "epoch": 2.25, "grad_norm": 5.3593926429748535, "learning_rate": 3.912334793013395e-06, "loss": 0.1771, "step": 229175 }, { "epoch": 2.25, "grad_norm": 12.289250373840332, "learning_rate": 3.912210670559147e-06, "loss": 0.112, "step": 229200 }, { "epoch": 2.25, "grad_norm": 3.155733346939087, "learning_rate": 3.912086548104898e-06, "loss": 0.1905, "step": 229225 }, { "epoch": 2.25, "grad_norm": 12.00564956665039, "learning_rate": 3.9119624256506496e-06, "loss": 0.1386, "step": 229250 }, { "epoch": 2.25, "grad_norm": 13.057609558105469, "learning_rate": 3.911838303196402e-06, "loss": 0.2028, "step": 229275 }, { "epoch": 2.25, "grad_norm": 16.03413200378418, "learning_rate": 3.911714180742153e-06, "loss": 0.1454, "step": 229300 }, { "epoch": 2.25, "grad_norm": 6.15739107131958, "learning_rate": 3.911590058287905e-06, "loss": 0.2635, "step": 229325 }, { "epoch": 2.26, "grad_norm": 6.347921371459961, "learning_rate": 3.911465935833656e-06, "loss": 0.1427, "step": 229350 }, { "epoch": 2.26, "grad_norm": 2.6066293716430664, "learning_rate": 3.911346778277578e-06, "loss": 0.2695, "step": 229375 }, { "epoch": 2.26, "grad_norm": 7.843820571899414, "learning_rate": 3.911222655823329e-06, "loss": 0.1406, "step": 229400 }, { "epoch": 2.26, "grad_norm": 5.4730305671691895, "learning_rate": 3.911098533369081e-06, "loss": 0.1833, "step": 229425 }, { "epoch": 2.26, "grad_norm": 10.911948204040527, "learning_rate": 3.9109744109148325e-06, "loss": 0.1737, "step": 229450 }, { "epoch": 2.26, "grad_norm": 3.080697536468506, "learning_rate": 3.9108502884605845e-06, "loss": 0.2285, "step": 229475 }, { "epoch": 2.26, "grad_norm": 14.623040199279785, "learning_rate": 3.910726166006336e-06, "loss": 0.1416, "step": 229500 }, { "epoch": 2.26, "grad_norm": 1.2466176748275757, "learning_rate": 3.910602043552087e-06, "loss": 0.2511, "step": 229525 }, { "epoch": 2.26, "grad_norm": 28.170228958129883, "learning_rate": 3.910477921097839e-06, "loss": 0.1248, "step": 229550 }, { "epoch": 2.26, "grad_norm": 7.318478107452393, "learning_rate": 3.91035379864359e-06, "loss": 0.2118, "step": 229575 }, { "epoch": 2.26, "grad_norm": 13.099349021911621, "learning_rate": 3.9102296761893414e-06, "loss": 0.1452, "step": 229600 }, { "epoch": 2.26, "grad_norm": 2.8620755672454834, "learning_rate": 3.9101055537350935e-06, "loss": 0.165, "step": 229625 }, { "epoch": 2.26, "grad_norm": 15.230571746826172, "learning_rate": 3.909981431280845e-06, "loss": 0.1215, "step": 229650 }, { "epoch": 2.26, "grad_norm": 3.8383049964904785, "learning_rate": 3.909857308826596e-06, "loss": 0.2382, "step": 229675 }, { "epoch": 2.26, "grad_norm": 8.47443962097168, "learning_rate": 3.909733186372348e-06, "loss": 0.1767, "step": 229700 }, { "epoch": 2.26, "grad_norm": 2.5409133434295654, "learning_rate": 3.909609063918099e-06, "loss": 0.2067, "step": 229725 }, { "epoch": 2.26, "grad_norm": 8.932419776916504, "learning_rate": 3.909484941463851e-06, "loss": 0.1109, "step": 229750 }, { "epoch": 2.26, "grad_norm": 4.263859748840332, "learning_rate": 3.9093608190096025e-06, "loss": 0.1987, "step": 229775 }, { "epoch": 2.26, "grad_norm": 14.868148803710938, "learning_rate": 3.9092366965553545e-06, "loss": 0.1524, "step": 229800 }, { "epoch": 2.26, "grad_norm": 5.837740421295166, "learning_rate": 3.909112574101106e-06, "loss": 0.1939, "step": 229825 }, { "epoch": 2.26, "grad_norm": 7.661693096160889, "learning_rate": 3.908988451646857e-06, "loss": 0.0845, "step": 229850 }, { "epoch": 2.26, "grad_norm": 8.795536041259766, "learning_rate": 3.908864329192609e-06, "loss": 0.2266, "step": 229875 }, { "epoch": 2.26, "grad_norm": 8.782609939575195, "learning_rate": 3.90874020673836e-06, "loss": 0.1202, "step": 229900 }, { "epoch": 2.26, "grad_norm": 7.814498424530029, "learning_rate": 3.908616084284111e-06, "loss": 0.2088, "step": 229925 }, { "epoch": 2.26, "grad_norm": 15.40975570678711, "learning_rate": 3.9084919618298635e-06, "loss": 0.1614, "step": 229950 }, { "epoch": 2.26, "grad_norm": 5.522243976593018, "learning_rate": 3.908367839375615e-06, "loss": 0.2012, "step": 229975 }, { "epoch": 2.26, "grad_norm": 9.19359302520752, "learning_rate": 3.908243716921366e-06, "loss": 0.1324, "step": 230000 }, { "epoch": 2.26, "grad_norm": 0.5995404124259949, "learning_rate": 3.908119594467118e-06, "loss": 0.2155, "step": 230025 }, { "epoch": 2.26, "grad_norm": 18.73751449584961, "learning_rate": 3.907995472012869e-06, "loss": 0.1359, "step": 230050 }, { "epoch": 2.26, "grad_norm": 5.109967231750488, "learning_rate": 3.90787134955862e-06, "loss": 0.2184, "step": 230075 }, { "epoch": 2.26, "grad_norm": 6.215446949005127, "learning_rate": 3.9077472271043724e-06, "loss": 0.1166, "step": 230100 }, { "epoch": 2.26, "grad_norm": 6.220907688140869, "learning_rate": 3.907623104650124e-06, "loss": 0.2152, "step": 230125 }, { "epoch": 2.26, "grad_norm": 15.001561164855957, "learning_rate": 3.907498982195876e-06, "loss": 0.1133, "step": 230150 }, { "epoch": 2.26, "grad_norm": 2.457453966140747, "learning_rate": 3.907374859741627e-06, "loss": 0.2348, "step": 230175 }, { "epoch": 2.26, "grad_norm": 11.234522819519043, "learning_rate": 3.907250737287379e-06, "loss": 0.1288, "step": 230200 }, { "epoch": 2.26, "grad_norm": 4.641266822814941, "learning_rate": 3.90712661483313e-06, "loss": 0.2038, "step": 230225 }, { "epoch": 2.26, "grad_norm": 13.137649536132812, "learning_rate": 3.907002492378882e-06, "loss": 0.1419, "step": 230250 }, { "epoch": 2.26, "grad_norm": 5.788134574890137, "learning_rate": 3.9068783699246335e-06, "loss": 0.1937, "step": 230275 }, { "epoch": 2.26, "grad_norm": 8.326794624328613, "learning_rate": 3.906754247470385e-06, "loss": 0.1386, "step": 230300 }, { "epoch": 2.26, "grad_norm": 5.570496559143066, "learning_rate": 3.906630125016137e-06, "loss": 0.2061, "step": 230325 }, { "epoch": 2.26, "grad_norm": 11.892278671264648, "learning_rate": 3.906506002561888e-06, "loss": 0.1283, "step": 230350 }, { "epoch": 2.27, "grad_norm": 5.668341636657715, "learning_rate": 3.906381880107639e-06, "loss": 0.2791, "step": 230375 }, { "epoch": 2.27, "grad_norm": 11.976231575012207, "learning_rate": 3.906257757653391e-06, "loss": 0.1335, "step": 230400 }, { "epoch": 2.27, "grad_norm": 4.336565017700195, "learning_rate": 3.9061336351991424e-06, "loss": 0.2087, "step": 230425 }, { "epoch": 2.27, "grad_norm": 9.383696556091309, "learning_rate": 3.906009512744894e-06, "loss": 0.1346, "step": 230450 }, { "epoch": 2.27, "grad_norm": 3.78373122215271, "learning_rate": 3.905885390290646e-06, "loss": 0.2398, "step": 230475 }, { "epoch": 2.27, "grad_norm": 6.5824151039123535, "learning_rate": 3.905761267836397e-06, "loss": 0.1264, "step": 230500 }, { "epoch": 2.27, "grad_norm": 6.44843053817749, "learning_rate": 3.905637145382148e-06, "loss": 0.2247, "step": 230525 }, { "epoch": 2.27, "grad_norm": 9.58100414276123, "learning_rate": 3.9055130229279e-06, "loss": 0.1441, "step": 230550 }, { "epoch": 2.27, "grad_norm": 4.233091354370117, "learning_rate": 3.905388900473651e-06, "loss": 0.2435, "step": 230575 }, { "epoch": 2.27, "grad_norm": 9.847801208496094, "learning_rate": 3.9052647780194034e-06, "loss": 0.0988, "step": 230600 }, { "epoch": 2.27, "grad_norm": 3.811086416244507, "learning_rate": 3.905140655565155e-06, "loss": 0.1753, "step": 230625 }, { "epoch": 2.27, "grad_norm": 12.786188125610352, "learning_rate": 3.905016533110907e-06, "loss": 0.1011, "step": 230650 }, { "epoch": 2.27, "grad_norm": 5.696517467498779, "learning_rate": 3.904892410656658e-06, "loss": 0.2309, "step": 230675 }, { "epoch": 2.27, "grad_norm": 6.864850997924805, "learning_rate": 3.904768288202409e-06, "loss": 0.1302, "step": 230700 }, { "epoch": 2.27, "grad_norm": 3.2391357421875, "learning_rate": 3.904644165748161e-06, "loss": 0.1766, "step": 230725 }, { "epoch": 2.27, "grad_norm": 8.77242660522461, "learning_rate": 3.904520043293912e-06, "loss": 0.1175, "step": 230750 }, { "epoch": 2.27, "grad_norm": 6.330356597900391, "learning_rate": 3.904395920839664e-06, "loss": 0.2066, "step": 230775 }, { "epoch": 2.27, "grad_norm": 7.783705711364746, "learning_rate": 3.904271798385416e-06, "loss": 0.1185, "step": 230800 }, { "epoch": 2.27, "grad_norm": 7.114710330963135, "learning_rate": 3.904147675931167e-06, "loss": 0.2755, "step": 230825 }, { "epoch": 2.27, "grad_norm": 13.106735229492188, "learning_rate": 3.904023553476918e-06, "loss": 0.1191, "step": 230850 }, { "epoch": 2.27, "grad_norm": 4.063955307006836, "learning_rate": 3.90389943102267e-06, "loss": 0.2053, "step": 230875 }, { "epoch": 2.27, "grad_norm": 16.300960540771484, "learning_rate": 3.903775308568421e-06, "loss": 0.121, "step": 230900 }, { "epoch": 2.27, "grad_norm": 5.3976006507873535, "learning_rate": 3.903651186114173e-06, "loss": 0.2128, "step": 230925 }, { "epoch": 2.27, "grad_norm": 8.711119651794434, "learning_rate": 3.903527063659925e-06, "loss": 0.1589, "step": 230950 }, { "epoch": 2.27, "grad_norm": 4.764254093170166, "learning_rate": 3.903402941205676e-06, "loss": 0.209, "step": 230975 }, { "epoch": 2.27, "grad_norm": 2.7509372234344482, "learning_rate": 3.903278818751428e-06, "loss": 0.1117, "step": 231000 }, { "epoch": 2.27, "grad_norm": 4.94850492477417, "learning_rate": 3.903154696297179e-06, "loss": 0.2483, "step": 231025 }, { "epoch": 2.27, "grad_norm": 13.95238971710205, "learning_rate": 3.903030573842931e-06, "loss": 0.1193, "step": 231050 }, { "epoch": 2.27, "grad_norm": 7.801041126251221, "learning_rate": 3.902906451388682e-06, "loss": 0.2102, "step": 231075 }, { "epoch": 2.27, "grad_norm": 8.595012664794922, "learning_rate": 3.9027823289344345e-06, "loss": 0.1481, "step": 231100 }, { "epoch": 2.27, "grad_norm": 7.001913547515869, "learning_rate": 3.902658206480186e-06, "loss": 0.1881, "step": 231125 }, { "epoch": 2.27, "grad_norm": 9.723206520080566, "learning_rate": 3.902534084025937e-06, "loss": 0.1287, "step": 231150 }, { "epoch": 2.27, "grad_norm": 45.893802642822266, "learning_rate": 3.902409961571689e-06, "loss": 0.1714, "step": 231175 }, { "epoch": 2.27, "grad_norm": 3.7516889572143555, "learning_rate": 3.90228583911744e-06, "loss": 0.1046, "step": 231200 }, { "epoch": 2.27, "grad_norm": 2.1956710815429688, "learning_rate": 3.902161716663191e-06, "loss": 0.1918, "step": 231225 }, { "epoch": 2.27, "grad_norm": 7.697597980499268, "learning_rate": 3.902037594208943e-06, "loss": 0.1003, "step": 231250 }, { "epoch": 2.27, "grad_norm": 4.723092079162598, "learning_rate": 3.901913471754695e-06, "loss": 0.1763, "step": 231275 }, { "epoch": 2.27, "grad_norm": 13.1770658493042, "learning_rate": 3.901789349300446e-06, "loss": 0.1164, "step": 231300 }, { "epoch": 2.27, "grad_norm": 1.3783543109893799, "learning_rate": 3.901665226846198e-06, "loss": 0.1608, "step": 231325 }, { "epoch": 2.27, "grad_norm": 13.235325813293457, "learning_rate": 3.901541104391949e-06, "loss": 0.1512, "step": 231350 }, { "epoch": 2.27, "grad_norm": 3.8548614978790283, "learning_rate": 3.9014169819377e-06, "loss": 0.2433, "step": 231375 }, { "epoch": 2.28, "grad_norm": 11.178011894226074, "learning_rate": 3.901292859483452e-06, "loss": 0.1641, "step": 231400 }, { "epoch": 2.28, "grad_norm": 6.308544635772705, "learning_rate": 3.901173701927373e-06, "loss": 0.2305, "step": 231425 }, { "epoch": 2.28, "grad_norm": 8.386777877807617, "learning_rate": 3.9010495794731255e-06, "loss": 0.1256, "step": 231450 }, { "epoch": 2.28, "grad_norm": 3.8215296268463135, "learning_rate": 3.900925457018877e-06, "loss": 0.2022, "step": 231475 }, { "epoch": 2.28, "grad_norm": 7.103325366973877, "learning_rate": 3.900801334564629e-06, "loss": 0.0993, "step": 231500 }, { "epoch": 2.28, "grad_norm": 2.8907179832458496, "learning_rate": 3.90067721211038e-06, "loss": 0.2161, "step": 231525 }, { "epoch": 2.28, "grad_norm": 10.157585144042969, "learning_rate": 3.900553089656132e-06, "loss": 0.1243, "step": 231550 }, { "epoch": 2.28, "grad_norm": 3.559591054916382, "learning_rate": 3.900428967201883e-06, "loss": 0.2021, "step": 231575 }, { "epoch": 2.28, "grad_norm": 9.864784240722656, "learning_rate": 3.9003048447476344e-06, "loss": 0.1313, "step": 231600 }, { "epoch": 2.28, "grad_norm": 2.9208428859710693, "learning_rate": 3.9001807222933865e-06, "loss": 0.2329, "step": 231625 }, { "epoch": 2.28, "grad_norm": 9.205641746520996, "learning_rate": 3.900056599839138e-06, "loss": 0.1078, "step": 231650 }, { "epoch": 2.28, "grad_norm": 6.737003326416016, "learning_rate": 3.899932477384889e-06, "loss": 0.2359, "step": 231675 }, { "epoch": 2.28, "grad_norm": 14.27245807647705, "learning_rate": 3.899808354930641e-06, "loss": 0.1563, "step": 231700 }, { "epoch": 2.28, "grad_norm": 12.292119979858398, "learning_rate": 3.899684232476392e-06, "loss": 0.234, "step": 231725 }, { "epoch": 2.28, "grad_norm": 5.904030799865723, "learning_rate": 3.899560110022143e-06, "loss": 0.1427, "step": 231750 }, { "epoch": 2.28, "grad_norm": 6.114804267883301, "learning_rate": 3.8994359875678954e-06, "loss": 0.184, "step": 231775 }, { "epoch": 2.28, "grad_norm": 10.439909934997559, "learning_rate": 3.899311865113647e-06, "loss": 0.0935, "step": 231800 }, { "epoch": 2.28, "grad_norm": 0.42234960198402405, "learning_rate": 3.899187742659398e-06, "loss": 0.2162, "step": 231825 }, { "epoch": 2.28, "grad_norm": 10.892326354980469, "learning_rate": 3.89906362020515e-06, "loss": 0.135, "step": 231850 }, { "epoch": 2.28, "grad_norm": 5.185796737670898, "learning_rate": 3.898939497750901e-06, "loss": 0.2332, "step": 231875 }, { "epoch": 2.28, "grad_norm": 18.012222290039062, "learning_rate": 3.898815375296653e-06, "loss": 0.1215, "step": 231900 }, { "epoch": 2.28, "grad_norm": 5.711288928985596, "learning_rate": 3.898691252842404e-06, "loss": 0.2039, "step": 231925 }, { "epoch": 2.28, "grad_norm": 8.575356483459473, "learning_rate": 3.8985671303881565e-06, "loss": 0.1641, "step": 231950 }, { "epoch": 2.28, "grad_norm": 3.07651424407959, "learning_rate": 3.898443007933908e-06, "loss": 0.1527, "step": 231975 }, { "epoch": 2.28, "grad_norm": 8.418601036071777, "learning_rate": 3.898318885479659e-06, "loss": 0.0987, "step": 232000 }, { "epoch": 2.28, "grad_norm": 0.08164570480585098, "learning_rate": 3.898194763025411e-06, "loss": 0.1857, "step": 232025 }, { "epoch": 2.28, "grad_norm": 6.294027805328369, "learning_rate": 3.898070640571162e-06, "loss": 0.1137, "step": 232050 }, { "epoch": 2.28, "grad_norm": 3.9998574256896973, "learning_rate": 3.897946518116913e-06, "loss": 0.201, "step": 232075 }, { "epoch": 2.28, "grad_norm": 10.861872673034668, "learning_rate": 3.8978223956626654e-06, "loss": 0.1624, "step": 232100 }, { "epoch": 2.28, "grad_norm": 1.7909502983093262, "learning_rate": 3.897698273208417e-06, "loss": 0.2745, "step": 232125 }, { "epoch": 2.28, "grad_norm": 10.568493843078613, "learning_rate": 3.897574150754168e-06, "loss": 0.13, "step": 232150 }, { "epoch": 2.28, "grad_norm": 5.190335273742676, "learning_rate": 3.89745002829992e-06, "loss": 0.1758, "step": 232175 }, { "epoch": 2.28, "grad_norm": 6.8662109375, "learning_rate": 3.897325905845671e-06, "loss": 0.093, "step": 232200 }, { "epoch": 2.28, "grad_norm": 2.9813663959503174, "learning_rate": 3.897201783391422e-06, "loss": 0.2496, "step": 232225 }, { "epoch": 2.28, "grad_norm": 7.2533183097839355, "learning_rate": 3.897077660937174e-06, "loss": 0.1165, "step": 232250 }, { "epoch": 2.28, "grad_norm": 2.071341037750244, "learning_rate": 3.896953538482926e-06, "loss": 0.221, "step": 232275 }, { "epoch": 2.28, "grad_norm": 13.293318748474121, "learning_rate": 3.896829416028678e-06, "loss": 0.1469, "step": 232300 }, { "epoch": 2.28, "grad_norm": 4.973652362823486, "learning_rate": 3.896705293574429e-06, "loss": 0.2195, "step": 232325 }, { "epoch": 2.28, "grad_norm": 8.838778495788574, "learning_rate": 3.896581171120181e-06, "loss": 0.1323, "step": 232350 }, { "epoch": 2.28, "grad_norm": 7.819187641143799, "learning_rate": 3.896457048665932e-06, "loss": 0.1982, "step": 232375 }, { "epoch": 2.28, "grad_norm": 12.982776641845703, "learning_rate": 3.896332926211684e-06, "loss": 0.1372, "step": 232400 }, { "epoch": 2.29, "grad_norm": 2.769650459289551, "learning_rate": 3.896208803757435e-06, "loss": 0.2524, "step": 232425 }, { "epoch": 2.29, "grad_norm": 14.441356658935547, "learning_rate": 3.896084681303187e-06, "loss": 0.1132, "step": 232450 }, { "epoch": 2.29, "grad_norm": 4.808950424194336, "learning_rate": 3.895960558848939e-06, "loss": 0.2157, "step": 232475 }, { "epoch": 2.29, "grad_norm": 8.272233963012695, "learning_rate": 3.89583643639469e-06, "loss": 0.1395, "step": 232500 }, { "epoch": 2.29, "grad_norm": 3.8655240535736084, "learning_rate": 3.895712313940441e-06, "loss": 0.2112, "step": 232525 }, { "epoch": 2.29, "grad_norm": 17.39362907409668, "learning_rate": 3.895588191486193e-06, "loss": 0.1401, "step": 232550 }, { "epoch": 2.29, "grad_norm": 3.831317663192749, "learning_rate": 3.895464069031944e-06, "loss": 0.207, "step": 232575 }, { "epoch": 2.29, "grad_norm": 17.212995529174805, "learning_rate": 3.895339946577696e-06, "loss": 0.1238, "step": 232600 }, { "epoch": 2.29, "grad_norm": 4.667490005493164, "learning_rate": 3.895215824123448e-06, "loss": 0.204, "step": 232625 }, { "epoch": 2.29, "grad_norm": 14.432122230529785, "learning_rate": 3.895091701669199e-06, "loss": 0.1753, "step": 232650 }, { "epoch": 2.29, "grad_norm": 0.9844999313354492, "learning_rate": 3.894967579214951e-06, "loss": 0.187, "step": 232675 }, { "epoch": 2.29, "grad_norm": 14.74754524230957, "learning_rate": 3.894843456760702e-06, "loss": 0.1218, "step": 232700 }, { "epoch": 2.29, "grad_norm": 4.115658283233643, "learning_rate": 3.894719334306454e-06, "loss": 0.2256, "step": 232725 }, { "epoch": 2.29, "grad_norm": 7.328189849853516, "learning_rate": 3.894595211852205e-06, "loss": 0.1467, "step": 232750 }, { "epoch": 2.29, "grad_norm": 5.519545555114746, "learning_rate": 3.8944710893979575e-06, "loss": 0.2326, "step": 232775 }, { "epoch": 2.29, "grad_norm": 9.043622016906738, "learning_rate": 3.894346966943709e-06, "loss": 0.1103, "step": 232800 }, { "epoch": 2.29, "grad_norm": 5.851887226104736, "learning_rate": 3.89422284448946e-06, "loss": 0.1856, "step": 232825 }, { "epoch": 2.29, "grad_norm": 18.92449951171875, "learning_rate": 3.894098722035211e-06, "loss": 0.081, "step": 232850 }, { "epoch": 2.29, "grad_norm": 5.838744163513184, "learning_rate": 3.893974599580963e-06, "loss": 0.1861, "step": 232875 }, { "epoch": 2.29, "grad_norm": 9.252334594726562, "learning_rate": 3.893850477126714e-06, "loss": 0.1204, "step": 232900 }, { "epoch": 2.29, "grad_norm": 2.532322406768799, "learning_rate": 3.8937263546724656e-06, "loss": 0.2545, "step": 232925 }, { "epoch": 2.29, "grad_norm": 12.281084060668945, "learning_rate": 3.893602232218218e-06, "loss": 0.118, "step": 232950 }, { "epoch": 2.29, "grad_norm": 5.731017112731934, "learning_rate": 3.893478109763969e-06, "loss": 0.2013, "step": 232975 }, { "epoch": 2.29, "grad_norm": 4.584683895111084, "learning_rate": 3.89335398730972e-06, "loss": 0.1391, "step": 233000 }, { "epoch": 2.29, "grad_norm": 2.8582189083099365, "learning_rate": 3.893229864855472e-06, "loss": 0.17, "step": 233025 }, { "epoch": 2.29, "grad_norm": 9.943374633789062, "learning_rate": 3.893105742401223e-06, "loss": 0.105, "step": 233050 }, { "epoch": 2.29, "grad_norm": 9.146281242370605, "learning_rate": 3.892981619946975e-06, "loss": 0.1936, "step": 233075 }, { "epoch": 2.29, "grad_norm": 10.925787925720215, "learning_rate": 3.892857497492727e-06, "loss": 0.1355, "step": 233100 }, { "epoch": 2.29, "grad_norm": 5.386608123779297, "learning_rate": 3.892733375038479e-06, "loss": 0.2099, "step": 233125 }, { "epoch": 2.29, "grad_norm": 16.097007751464844, "learning_rate": 3.89260925258423e-06, "loss": 0.1383, "step": 233150 }, { "epoch": 2.29, "grad_norm": 1.333473563194275, "learning_rate": 3.892485130129982e-06, "loss": 0.1944, "step": 233175 }, { "epoch": 2.29, "grad_norm": 10.797720909118652, "learning_rate": 3.892361007675733e-06, "loss": 0.187, "step": 233200 }, { "epoch": 2.29, "grad_norm": 4.0225934982299805, "learning_rate": 3.892236885221484e-06, "loss": 0.2122, "step": 233225 }, { "epoch": 2.29, "grad_norm": 12.010346412658691, "learning_rate": 3.892112762767236e-06, "loss": 0.1073, "step": 233250 }, { "epoch": 2.29, "grad_norm": 9.15235710144043, "learning_rate": 3.891988640312988e-06, "loss": 0.2511, "step": 233275 }, { "epoch": 2.29, "grad_norm": 11.925019264221191, "learning_rate": 3.891864517858739e-06, "loss": 0.109, "step": 233300 }, { "epoch": 2.29, "grad_norm": 6.850942611694336, "learning_rate": 3.891740395404491e-06, "loss": 0.1921, "step": 233325 }, { "epoch": 2.29, "grad_norm": 11.625567436218262, "learning_rate": 3.891616272950242e-06, "loss": 0.1419, "step": 233350 }, { "epoch": 2.29, "grad_norm": 3.971219778060913, "learning_rate": 3.891492150495993e-06, "loss": 0.1763, "step": 233375 }, { "epoch": 2.29, "grad_norm": 8.591645240783691, "learning_rate": 3.891368028041745e-06, "loss": 0.1274, "step": 233400 }, { "epoch": 2.3, "grad_norm": 5.223263740539551, "learning_rate": 3.891243905587497e-06, "loss": 0.2582, "step": 233425 }, { "epoch": 2.3, "grad_norm": 13.696516990661621, "learning_rate": 3.891119783133248e-06, "loss": 0.1148, "step": 233450 }, { "epoch": 2.3, "grad_norm": 4.358654975891113, "learning_rate": 3.890995660679e-06, "loss": 0.2059, "step": 233475 }, { "epoch": 2.3, "grad_norm": 7.052368640899658, "learning_rate": 3.890871538224751e-06, "loss": 0.0966, "step": 233500 }, { "epoch": 2.3, "grad_norm": 3.9977333545684814, "learning_rate": 3.890747415770503e-06, "loss": 0.2456, "step": 233525 }, { "epoch": 2.3, "grad_norm": 14.70379638671875, "learning_rate": 3.890623293316254e-06, "loss": 0.1478, "step": 233550 }, { "epoch": 2.3, "grad_norm": 4.492133140563965, "learning_rate": 3.890499170862006e-06, "loss": 0.2044, "step": 233575 }, { "epoch": 2.3, "grad_norm": 9.63764476776123, "learning_rate": 3.890375048407758e-06, "loss": 0.1029, "step": 233600 }, { "epoch": 2.3, "grad_norm": 1.5324636697769165, "learning_rate": 3.89025092595351e-06, "loss": 0.2355, "step": 233625 }, { "epoch": 2.3, "grad_norm": 8.94007682800293, "learning_rate": 3.890126803499261e-06, "loss": 0.1366, "step": 233650 }, { "epoch": 2.3, "grad_norm": 6.537426471710205, "learning_rate": 3.890002681045012e-06, "loss": 0.2163, "step": 233675 }, { "epoch": 2.3, "grad_norm": 6.773644924163818, "learning_rate": 3.889878558590763e-06, "loss": 0.0906, "step": 233700 }, { "epoch": 2.3, "grad_norm": 14.011444091796875, "learning_rate": 3.889754436136515e-06, "loss": 0.1924, "step": 233725 }, { "epoch": 2.3, "grad_norm": 16.75660514831543, "learning_rate": 3.8896303136822666e-06, "loss": 0.1291, "step": 233750 }, { "epoch": 2.3, "grad_norm": 5.070399284362793, "learning_rate": 3.889506191228018e-06, "loss": 0.192, "step": 233775 }, { "epoch": 2.3, "grad_norm": 17.593006134033203, "learning_rate": 3.88938206877377e-06, "loss": 0.1554, "step": 233800 }, { "epoch": 2.3, "grad_norm": 5.718338966369629, "learning_rate": 3.889257946319521e-06, "loss": 0.2206, "step": 233825 }, { "epoch": 2.3, "grad_norm": 7.797253131866455, "learning_rate": 3.889133823865272e-06, "loss": 0.1313, "step": 233850 }, { "epoch": 2.3, "grad_norm": 1.2787336111068726, "learning_rate": 3.889009701411024e-06, "loss": 0.2405, "step": 233875 }, { "epoch": 2.3, "grad_norm": 10.22521686553955, "learning_rate": 3.8888855789567755e-06, "loss": 0.1098, "step": 233900 }, { "epoch": 2.3, "grad_norm": 5.105373382568359, "learning_rate": 3.888761456502528e-06, "loss": 0.2447, "step": 233925 }, { "epoch": 2.3, "grad_norm": 11.795064926147461, "learning_rate": 3.888637334048279e-06, "loss": 0.1162, "step": 233950 }, { "epoch": 2.3, "grad_norm": 2.225687265396118, "learning_rate": 3.888518176492201e-06, "loss": 0.2103, "step": 233975 }, { "epoch": 2.3, "grad_norm": 10.899229049682617, "learning_rate": 3.888394054037952e-06, "loss": 0.1344, "step": 234000 }, { "epoch": 2.3, "grad_norm": 2.2573814392089844, "learning_rate": 3.888269931583704e-06, "loss": 0.2521, "step": 234025 }, { "epoch": 2.3, "grad_norm": 8.096460342407227, "learning_rate": 3.888145809129455e-06, "loss": 0.1339, "step": 234050 }, { "epoch": 2.3, "grad_norm": 2.631380558013916, "learning_rate": 3.888021686675207e-06, "loss": 0.2202, "step": 234075 }, { "epoch": 2.3, "grad_norm": 18.746206283569336, "learning_rate": 3.887897564220958e-06, "loss": 0.1392, "step": 234100 }, { "epoch": 2.3, "grad_norm": 5.401305675506592, "learning_rate": 3.88777344176671e-06, "loss": 0.2075, "step": 234125 }, { "epoch": 2.3, "grad_norm": 4.919332504272461, "learning_rate": 3.887649319312462e-06, "loss": 0.1143, "step": 234150 }, { "epoch": 2.3, "grad_norm": 2.135051727294922, "learning_rate": 3.887525196858213e-06, "loss": 0.2494, "step": 234175 }, { "epoch": 2.3, "grad_norm": 11.442659378051758, "learning_rate": 3.887401074403964e-06, "loss": 0.1104, "step": 234200 }, { "epoch": 2.3, "grad_norm": 3.285695791244507, "learning_rate": 3.887276951949716e-06, "loss": 0.2348, "step": 234225 }, { "epoch": 2.3, "grad_norm": 10.450225830078125, "learning_rate": 3.887152829495467e-06, "loss": 0.1026, "step": 234250 }, { "epoch": 2.3, "grad_norm": 8.551238059997559, "learning_rate": 3.887028707041219e-06, "loss": 0.22, "step": 234275 }, { "epoch": 2.3, "grad_norm": 9.035158157348633, "learning_rate": 3.88690458458697e-06, "loss": 0.0868, "step": 234300 }, { "epoch": 2.3, "grad_norm": 3.6744744777679443, "learning_rate": 3.886780462132722e-06, "loss": 0.2145, "step": 234325 }, { "epoch": 2.3, "grad_norm": 2.8019614219665527, "learning_rate": 3.886656339678473e-06, "loss": 0.1145, "step": 234350 }, { "epoch": 2.3, "grad_norm": 6.05309534072876, "learning_rate": 3.886532217224225e-06, "loss": 0.1929, "step": 234375 }, { "epoch": 2.3, "grad_norm": 8.265913963317871, "learning_rate": 3.886408094769976e-06, "loss": 0.1196, "step": 234400 }, { "epoch": 2.3, "grad_norm": 6.951480388641357, "learning_rate": 3.886283972315728e-06, "loss": 0.2382, "step": 234425 }, { "epoch": 2.31, "grad_norm": 7.681475639343262, "learning_rate": 3.88615984986148e-06, "loss": 0.1323, "step": 234450 }, { "epoch": 2.31, "grad_norm": 8.040596961975098, "learning_rate": 3.886035727407232e-06, "loss": 0.2439, "step": 234475 }, { "epoch": 2.31, "grad_norm": 14.05887508392334, "learning_rate": 3.885911604952983e-06, "loss": 0.1619, "step": 234500 }, { "epoch": 2.31, "grad_norm": 6.735711097717285, "learning_rate": 3.885787482498734e-06, "loss": 0.1882, "step": 234525 }, { "epoch": 2.31, "grad_norm": 7.843395233154297, "learning_rate": 3.885663360044486e-06, "loss": 0.1221, "step": 234550 }, { "epoch": 2.31, "grad_norm": 6.307867527008057, "learning_rate": 3.885539237590237e-06, "loss": 0.2324, "step": 234575 }, { "epoch": 2.31, "grad_norm": 8.647250175476074, "learning_rate": 3.885415115135989e-06, "loss": 0.1547, "step": 234600 }, { "epoch": 2.31, "grad_norm": 3.0862491130828857, "learning_rate": 3.885290992681741e-06, "loss": 0.241, "step": 234625 }, { "epoch": 2.31, "grad_norm": 13.871076583862305, "learning_rate": 3.885166870227492e-06, "loss": 0.1242, "step": 234650 }, { "epoch": 2.31, "grad_norm": 2.7150282859802246, "learning_rate": 3.885042747773243e-06, "loss": 0.204, "step": 234675 }, { "epoch": 2.31, "grad_norm": 8.563865661621094, "learning_rate": 3.884918625318995e-06, "loss": 0.1039, "step": 234700 }, { "epoch": 2.31, "grad_norm": 0.2263273000717163, "learning_rate": 3.884794502864746e-06, "loss": 0.1892, "step": 234725 }, { "epoch": 2.31, "grad_norm": 12.018915176391602, "learning_rate": 3.8846703804104975e-06, "loss": 0.0977, "step": 234750 }, { "epoch": 2.31, "grad_norm": 5.628733158111572, "learning_rate": 3.88454625795625e-06, "loss": 0.2077, "step": 234775 }, { "epoch": 2.31, "grad_norm": 4.9807939529418945, "learning_rate": 3.884422135502001e-06, "loss": 0.16, "step": 234800 }, { "epoch": 2.31, "grad_norm": 5.8341474533081055, "learning_rate": 3.884298013047753e-06, "loss": 0.2215, "step": 234825 }, { "epoch": 2.31, "grad_norm": 11.925104141235352, "learning_rate": 3.884173890593504e-06, "loss": 0.128, "step": 234850 }, { "epoch": 2.31, "grad_norm": 10.334938049316406, "learning_rate": 3.884049768139256e-06, "loss": 0.2584, "step": 234875 }, { "epoch": 2.31, "grad_norm": 11.013731956481934, "learning_rate": 3.883925645685007e-06, "loss": 0.1454, "step": 234900 }, { "epoch": 2.31, "grad_norm": 4.278271675109863, "learning_rate": 3.883801523230759e-06, "loss": 0.1917, "step": 234925 }, { "epoch": 2.31, "grad_norm": 12.016586303710938, "learning_rate": 3.883677400776511e-06, "loss": 0.1141, "step": 234950 }, { "epoch": 2.31, "grad_norm": 6.878223896026611, "learning_rate": 3.883553278322262e-06, "loss": 0.2666, "step": 234975 }, { "epoch": 2.31, "grad_norm": 9.995141983032227, "learning_rate": 3.883429155868014e-06, "loss": 0.1105, "step": 235000 }, { "epoch": 2.31, "grad_norm": 6.265763282775879, "learning_rate": 3.883305033413765e-06, "loss": 0.2082, "step": 235025 }, { "epoch": 2.31, "grad_norm": 12.018630981445312, "learning_rate": 3.883180910959516e-06, "loss": 0.1215, "step": 235050 }, { "epoch": 2.31, "grad_norm": 8.041241645812988, "learning_rate": 3.883056788505268e-06, "loss": 0.1945, "step": 235075 }, { "epoch": 2.31, "grad_norm": 14.017518043518066, "learning_rate": 3.88293266605102e-06, "loss": 0.1053, "step": 235100 }, { "epoch": 2.31, "grad_norm": 3.651250123977661, "learning_rate": 3.882808543596771e-06, "loss": 0.1778, "step": 235125 }, { "epoch": 2.31, "grad_norm": 10.505250930786133, "learning_rate": 3.882684421142522e-06, "loss": 0.1211, "step": 235150 }, { "epoch": 2.31, "grad_norm": 6.187166690826416, "learning_rate": 3.882560298688274e-06, "loss": 0.1778, "step": 235175 }, { "epoch": 2.31, "grad_norm": 9.463591575622559, "learning_rate": 3.882436176234025e-06, "loss": 0.1153, "step": 235200 }, { "epoch": 2.31, "grad_norm": 6.614597320556641, "learning_rate": 3.882312053779777e-06, "loss": 0.237, "step": 235225 }, { "epoch": 2.31, "grad_norm": 10.055460929870605, "learning_rate": 3.8821879313255285e-06, "loss": 0.1302, "step": 235250 }, { "epoch": 2.31, "grad_norm": 8.309613227844238, "learning_rate": 3.882063808871281e-06, "loss": 0.2191, "step": 235275 }, { "epoch": 2.31, "grad_norm": 13.362101554870605, "learning_rate": 3.881939686417032e-06, "loss": 0.1537, "step": 235300 }, { "epoch": 2.31, "grad_norm": 4.289886951446533, "learning_rate": 3.881815563962784e-06, "loss": 0.2087, "step": 235325 }, { "epoch": 2.31, "grad_norm": 10.180673599243164, "learning_rate": 3.881691441508535e-06, "loss": 0.0962, "step": 235350 }, { "epoch": 2.31, "grad_norm": 6.493546485900879, "learning_rate": 3.881567319054286e-06, "loss": 0.2065, "step": 235375 }, { "epoch": 2.31, "grad_norm": 12.484746932983398, "learning_rate": 3.881443196600038e-06, "loss": 0.119, "step": 235400 }, { "epoch": 2.31, "grad_norm": 3.867511510848999, "learning_rate": 3.8813190741457896e-06, "loss": 0.2193, "step": 235425 }, { "epoch": 2.31, "grad_norm": 5.323732376098633, "learning_rate": 3.881194951691541e-06, "loss": 0.1295, "step": 235450 }, { "epoch": 2.32, "grad_norm": 4.976983070373535, "learning_rate": 3.881070829237293e-06, "loss": 0.2236, "step": 235475 }, { "epoch": 2.32, "grad_norm": 8.479580879211426, "learning_rate": 3.880946706783044e-06, "loss": 0.1185, "step": 235500 }, { "epoch": 2.32, "grad_norm": 2.5333898067474365, "learning_rate": 3.880822584328795e-06, "loss": 0.2188, "step": 235525 }, { "epoch": 2.32, "grad_norm": 8.957124710083008, "learning_rate": 3.880698461874547e-06, "loss": 0.1182, "step": 235550 }, { "epoch": 2.32, "grad_norm": 6.165356159210205, "learning_rate": 3.8805743394202985e-06, "loss": 0.2492, "step": 235575 }, { "epoch": 2.32, "grad_norm": 7.694282054901123, "learning_rate": 3.880450216966051e-06, "loss": 0.1192, "step": 235600 }, { "epoch": 2.32, "grad_norm": 4.79869270324707, "learning_rate": 3.880326094511802e-06, "loss": 0.2055, "step": 235625 }, { "epoch": 2.32, "grad_norm": 12.116414070129395, "learning_rate": 3.880201972057554e-06, "loss": 0.1158, "step": 235650 }, { "epoch": 2.32, "grad_norm": 7.8491129875183105, "learning_rate": 3.880077849603305e-06, "loss": 0.3233, "step": 235675 }, { "epoch": 2.32, "grad_norm": 6.050544261932373, "learning_rate": 3.879953727149057e-06, "loss": 0.137, "step": 235700 }, { "epoch": 2.32, "grad_norm": 4.265997886657715, "learning_rate": 3.879829604694808e-06, "loss": 0.2166, "step": 235725 }, { "epoch": 2.32, "grad_norm": 15.594897270202637, "learning_rate": 3.8797054822405596e-06, "loss": 0.1334, "step": 235750 }, { "epoch": 2.32, "grad_norm": 5.842802047729492, "learning_rate": 3.879581359786312e-06, "loss": 0.1861, "step": 235775 }, { "epoch": 2.32, "grad_norm": 7.480189323425293, "learning_rate": 3.879457237332063e-06, "loss": 0.1242, "step": 235800 }, { "epoch": 2.32, "grad_norm": 4.772827625274658, "learning_rate": 3.879333114877814e-06, "loss": 0.1963, "step": 235825 }, { "epoch": 2.32, "grad_norm": 11.398026466369629, "learning_rate": 3.879208992423566e-06, "loss": 0.1336, "step": 235850 }, { "epoch": 2.32, "grad_norm": 3.659165620803833, "learning_rate": 3.879084869969317e-06, "loss": 0.1649, "step": 235875 }, { "epoch": 2.32, "grad_norm": 10.323481559753418, "learning_rate": 3.8789607475150685e-06, "loss": 0.1506, "step": 235900 }, { "epoch": 2.32, "grad_norm": 5.969252109527588, "learning_rate": 3.8788366250608206e-06, "loss": 0.2629, "step": 235925 }, { "epoch": 2.32, "grad_norm": 9.125596046447754, "learning_rate": 3.878712502606572e-06, "loss": 0.1509, "step": 235950 }, { "epoch": 2.32, "grad_norm": 2.653928518295288, "learning_rate": 3.878588380152323e-06, "loss": 0.2142, "step": 235975 }, { "epoch": 2.32, "grad_norm": 7.313685417175293, "learning_rate": 3.878464257698075e-06, "loss": 0.1283, "step": 236000 }, { "epoch": 2.32, "grad_norm": 4.924842834472656, "learning_rate": 3.878340135243826e-06, "loss": 0.1872, "step": 236025 }, { "epoch": 2.32, "grad_norm": 8.143396377563477, "learning_rate": 3.878216012789578e-06, "loss": 0.1266, "step": 236050 }, { "epoch": 2.32, "grad_norm": 6.868351936340332, "learning_rate": 3.8780918903353295e-06, "loss": 0.2127, "step": 236075 }, { "epoch": 2.32, "grad_norm": 10.201228141784668, "learning_rate": 3.877967767881082e-06, "loss": 0.108, "step": 236100 }, { "epoch": 2.32, "grad_norm": 3.715578556060791, "learning_rate": 3.877843645426833e-06, "loss": 0.2381, "step": 236125 }, { "epoch": 2.32, "grad_norm": 6.537152290344238, "learning_rate": 3.877719522972584e-06, "loss": 0.1424, "step": 236150 }, { "epoch": 2.32, "grad_norm": 6.2984724044799805, "learning_rate": 3.877595400518336e-06, "loss": 0.2065, "step": 236175 }, { "epoch": 2.32, "grad_norm": 15.554980278015137, "learning_rate": 3.877471278064087e-06, "loss": 0.1249, "step": 236200 }, { "epoch": 2.32, "grad_norm": 6.575314521789551, "learning_rate": 3.8773471556098385e-06, "loss": 0.2111, "step": 236225 }, { "epoch": 2.32, "grad_norm": 14.595526695251465, "learning_rate": 3.8772230331555906e-06, "loss": 0.1431, "step": 236250 }, { "epoch": 2.32, "grad_norm": 5.583065986633301, "learning_rate": 3.877098910701342e-06, "loss": 0.2154, "step": 236275 }, { "epoch": 2.32, "grad_norm": 16.979022979736328, "learning_rate": 3.876974788247093e-06, "loss": 0.1299, "step": 236300 }, { "epoch": 2.32, "grad_norm": 1.4528660774230957, "learning_rate": 3.876855630691015e-06, "loss": 0.2216, "step": 236325 }, { "epoch": 2.32, "grad_norm": 15.500714302062988, "learning_rate": 3.876731508236766e-06, "loss": 0.1363, "step": 236350 }, { "epoch": 2.32, "grad_norm": 3.6167452335357666, "learning_rate": 3.876607385782518e-06, "loss": 0.24, "step": 236375 }, { "epoch": 2.32, "grad_norm": 8.013128280639648, "learning_rate": 3.876483263328269e-06, "loss": 0.0966, "step": 236400 }, { "epoch": 2.32, "grad_norm": 4.039517879486084, "learning_rate": 3.8763591408740205e-06, "loss": 0.1951, "step": 236425 }, { "epoch": 2.32, "grad_norm": 13.908252716064453, "learning_rate": 3.876235018419773e-06, "loss": 0.1454, "step": 236450 }, { "epoch": 2.33, "grad_norm": 3.288036823272705, "learning_rate": 3.876110895965524e-06, "loss": 0.2203, "step": 236475 }, { "epoch": 2.33, "grad_norm": 6.3096795082092285, "learning_rate": 3.875986773511276e-06, "loss": 0.1103, "step": 236500 }, { "epoch": 2.33, "grad_norm": 5.269631385803223, "learning_rate": 3.875862651057027e-06, "loss": 0.1759, "step": 236525 }, { "epoch": 2.33, "grad_norm": 14.764918327331543, "learning_rate": 3.875738528602779e-06, "loss": 0.1571, "step": 236550 }, { "epoch": 2.33, "grad_norm": 4.370074272155762, "learning_rate": 3.87561440614853e-06, "loss": 0.255, "step": 236575 }, { "epoch": 2.33, "grad_norm": 11.084874153137207, "learning_rate": 3.8754902836942816e-06, "loss": 0.1489, "step": 236600 }, { "epoch": 2.33, "grad_norm": 8.581478118896484, "learning_rate": 3.875366161240034e-06, "loss": 0.1935, "step": 236625 }, { "epoch": 2.33, "grad_norm": 8.775039672851562, "learning_rate": 3.875242038785785e-06, "loss": 0.1189, "step": 236650 }, { "epoch": 2.33, "grad_norm": 6.962904930114746, "learning_rate": 3.875117916331536e-06, "loss": 0.1873, "step": 236675 }, { "epoch": 2.33, "grad_norm": 13.36958122253418, "learning_rate": 3.874993793877288e-06, "loss": 0.1522, "step": 236700 }, { "epoch": 2.33, "grad_norm": 4.611644268035889, "learning_rate": 3.874869671423039e-06, "loss": 0.1631, "step": 236725 }, { "epoch": 2.33, "grad_norm": 7.510944366455078, "learning_rate": 3.8747455489687905e-06, "loss": 0.1271, "step": 236750 }, { "epoch": 2.33, "grad_norm": 8.129607200622559, "learning_rate": 3.874621426514543e-06, "loss": 0.1833, "step": 236775 }, { "epoch": 2.33, "grad_norm": 12.1907377243042, "learning_rate": 3.874497304060294e-06, "loss": 0.1349, "step": 236800 }, { "epoch": 2.33, "grad_norm": 9.915130615234375, "learning_rate": 3.874373181606045e-06, "loss": 0.2337, "step": 236825 }, { "epoch": 2.33, "grad_norm": 7.891322612762451, "learning_rate": 3.874249059151797e-06, "loss": 0.1788, "step": 236850 }, { "epoch": 2.33, "grad_norm": 5.7762250900268555, "learning_rate": 3.874124936697548e-06, "loss": 0.2222, "step": 236875 }, { "epoch": 2.33, "grad_norm": 12.353472709655762, "learning_rate": 3.8740008142433e-06, "loss": 0.1237, "step": 236900 }, { "epoch": 2.33, "grad_norm": 5.135797023773193, "learning_rate": 3.8738766917890516e-06, "loss": 0.218, "step": 236925 }, { "epoch": 2.33, "grad_norm": 7.434754371643066, "learning_rate": 3.873752569334804e-06, "loss": 0.1423, "step": 236950 }, { "epoch": 2.33, "grad_norm": 5.572039604187012, "learning_rate": 3.873628446880555e-06, "loss": 0.1851, "step": 236975 }, { "epoch": 2.33, "grad_norm": 15.284958839416504, "learning_rate": 3.873504324426307e-06, "loss": 0.1104, "step": 237000 }, { "epoch": 2.33, "grad_norm": 4.799895763397217, "learning_rate": 3.873380201972058e-06, "loss": 0.2187, "step": 237025 }, { "epoch": 2.33, "grad_norm": 12.992193222045898, "learning_rate": 3.873256079517809e-06, "loss": 0.128, "step": 237050 }, { "epoch": 2.33, "grad_norm": 9.223220825195312, "learning_rate": 3.873131957063561e-06, "loss": 0.1947, "step": 237075 }, { "epoch": 2.33, "grad_norm": 7.983179092407227, "learning_rate": 3.8730078346093126e-06, "loss": 0.1107, "step": 237100 }, { "epoch": 2.33, "grad_norm": 2.7543771266937256, "learning_rate": 3.872883712155064e-06, "loss": 0.2327, "step": 237125 }, { "epoch": 2.33, "grad_norm": 11.011427879333496, "learning_rate": 3.872759589700816e-06, "loss": 0.0783, "step": 237150 }, { "epoch": 2.33, "grad_norm": 6.3399739265441895, "learning_rate": 3.872635467246567e-06, "loss": 0.2097, "step": 237175 }, { "epoch": 2.33, "grad_norm": 12.224087715148926, "learning_rate": 3.872511344792318e-06, "loss": 0.0995, "step": 237200 }, { "epoch": 2.33, "grad_norm": 2.3900866508483887, "learning_rate": 3.87238722233807e-06, "loss": 0.242, "step": 237225 }, { "epoch": 2.33, "grad_norm": 10.783129692077637, "learning_rate": 3.8722630998838215e-06, "loss": 0.1206, "step": 237250 }, { "epoch": 2.33, "grad_norm": 6.082641124725342, "learning_rate": 3.872138977429573e-06, "loss": 0.2196, "step": 237275 }, { "epoch": 2.33, "grad_norm": 12.819164276123047, "learning_rate": 3.872014854975325e-06, "loss": 0.1384, "step": 237300 }, { "epoch": 2.33, "grad_norm": 5.254701137542725, "learning_rate": 3.871890732521076e-06, "loss": 0.2019, "step": 237325 }, { "epoch": 2.33, "grad_norm": 3.786142349243164, "learning_rate": 3.871766610066828e-06, "loss": 0.1457, "step": 237350 }, { "epoch": 2.33, "grad_norm": 2.1315720081329346, "learning_rate": 3.871642487612579e-06, "loss": 0.2318, "step": 237375 }, { "epoch": 2.33, "grad_norm": 11.64387035369873, "learning_rate": 3.871518365158331e-06, "loss": 0.1574, "step": 237400 }, { "epoch": 2.33, "grad_norm": 5.866992950439453, "learning_rate": 3.8713942427040826e-06, "loss": 0.1983, "step": 237425 }, { "epoch": 2.33, "grad_norm": 10.182073593139648, "learning_rate": 3.871270120249834e-06, "loss": 0.1396, "step": 237450 }, { "epoch": 2.33, "grad_norm": 5.670501708984375, "learning_rate": 3.871145997795586e-06, "loss": 0.2616, "step": 237475 }, { "epoch": 2.34, "grad_norm": 13.797478675842285, "learning_rate": 3.871021875341337e-06, "loss": 0.1532, "step": 237500 }, { "epoch": 2.34, "grad_norm": 0.9825469255447388, "learning_rate": 3.870897752887088e-06, "loss": 0.1721, "step": 237525 }, { "epoch": 2.34, "grad_norm": 5.9672112464904785, "learning_rate": 3.87077363043284e-06, "loss": 0.1496, "step": 237550 }, { "epoch": 2.34, "grad_norm": 6.42542839050293, "learning_rate": 3.8706495079785915e-06, "loss": 0.2379, "step": 237575 }, { "epoch": 2.34, "grad_norm": 8.99234676361084, "learning_rate": 3.870525385524343e-06, "loss": 0.1103, "step": 237600 }, { "epoch": 2.34, "grad_norm": 8.697084426879883, "learning_rate": 3.870401263070095e-06, "loss": 0.2318, "step": 237625 }, { "epoch": 2.34, "grad_norm": 10.257092475891113, "learning_rate": 3.870277140615846e-06, "loss": 0.1275, "step": 237650 }, { "epoch": 2.34, "grad_norm": 5.153802871704102, "learning_rate": 3.870153018161597e-06, "loss": 0.2216, "step": 237675 }, { "epoch": 2.34, "grad_norm": 9.752470016479492, "learning_rate": 3.870028895707349e-06, "loss": 0.0962, "step": 237700 }, { "epoch": 2.34, "grad_norm": 17.059246063232422, "learning_rate": 3.8699047732531005e-06, "loss": 0.2011, "step": 237725 }, { "epoch": 2.34, "grad_norm": 13.992178916931152, "learning_rate": 3.8697806507988525e-06, "loss": 0.1284, "step": 237750 }, { "epoch": 2.34, "grad_norm": 8.048600196838379, "learning_rate": 3.869656528344604e-06, "loss": 0.1899, "step": 237775 }, { "epoch": 2.34, "grad_norm": 11.867029190063477, "learning_rate": 3.869532405890356e-06, "loss": 0.1276, "step": 237800 }, { "epoch": 2.34, "grad_norm": 1.8547765016555786, "learning_rate": 3.869408283436107e-06, "loss": 0.2007, "step": 237825 }, { "epoch": 2.34, "grad_norm": 12.080202102661133, "learning_rate": 3.869284160981859e-06, "loss": 0.0996, "step": 237850 }, { "epoch": 2.34, "grad_norm": 6.15997838973999, "learning_rate": 3.86916003852761e-06, "loss": 0.2221, "step": 237875 }, { "epoch": 2.34, "grad_norm": 14.121718406677246, "learning_rate": 3.8690359160733615e-06, "loss": 0.1499, "step": 237900 }, { "epoch": 2.34, "grad_norm": 5.6521735191345215, "learning_rate": 3.8689117936191136e-06, "loss": 0.2084, "step": 237925 }, { "epoch": 2.34, "grad_norm": 14.034884452819824, "learning_rate": 3.868787671164865e-06, "loss": 0.1449, "step": 237950 }, { "epoch": 2.34, "grad_norm": 0.2231973260641098, "learning_rate": 3.868663548710616e-06, "loss": 0.222, "step": 237975 }, { "epoch": 2.34, "grad_norm": 10.597139358520508, "learning_rate": 3.868539426256368e-06, "loss": 0.1632, "step": 238000 }, { "epoch": 2.34, "grad_norm": 3.2659568786621094, "learning_rate": 3.868415303802119e-06, "loss": 0.2157, "step": 238025 }, { "epoch": 2.34, "grad_norm": 8.76544189453125, "learning_rate": 3.8682911813478705e-06, "loss": 0.1275, "step": 238050 }, { "epoch": 2.34, "grad_norm": 4.045168876647949, "learning_rate": 3.8681670588936225e-06, "loss": 0.223, "step": 238075 }, { "epoch": 2.34, "grad_norm": 19.7960147857666, "learning_rate": 3.868042936439374e-06, "loss": 0.1457, "step": 238100 }, { "epoch": 2.34, "grad_norm": 4.254698753356934, "learning_rate": 3.867918813985125e-06, "loss": 0.2342, "step": 238125 }, { "epoch": 2.34, "grad_norm": 10.72469711303711, "learning_rate": 3.867794691530877e-06, "loss": 0.1705, "step": 238150 }, { "epoch": 2.34, "grad_norm": 5.059720993041992, "learning_rate": 3.867670569076628e-06, "loss": 0.1996, "step": 238175 }, { "epoch": 2.34, "grad_norm": 10.531530380249023, "learning_rate": 3.86754644662238e-06, "loss": 0.2169, "step": 238200 }, { "epoch": 2.34, "grad_norm": 6.671280860900879, "learning_rate": 3.8674223241681315e-06, "loss": 0.1974, "step": 238225 }, { "epoch": 2.34, "grad_norm": 8.888492584228516, "learning_rate": 3.8672982017138835e-06, "loss": 0.1331, "step": 238250 }, { "epoch": 2.34, "grad_norm": 3.3397488594055176, "learning_rate": 3.867174079259635e-06, "loss": 0.1842, "step": 238275 }, { "epoch": 2.34, "grad_norm": 14.807985305786133, "learning_rate": 3.867049956805386e-06, "loss": 0.1219, "step": 238300 }, { "epoch": 2.34, "grad_norm": 7.3093109130859375, "learning_rate": 3.866925834351138e-06, "loss": 0.2109, "step": 238325 }, { "epoch": 2.34, "grad_norm": 17.606792449951172, "learning_rate": 3.866801711896889e-06, "loss": 0.1296, "step": 238350 }, { "epoch": 2.34, "grad_norm": 6.588294506072998, "learning_rate": 3.8666775894426405e-06, "loss": 0.2049, "step": 238375 }, { "epoch": 2.34, "grad_norm": 10.045605659484863, "learning_rate": 3.8665534669883925e-06, "loss": 0.1312, "step": 238400 }, { "epoch": 2.34, "grad_norm": 6.015183925628662, "learning_rate": 3.866429344534144e-06, "loss": 0.1874, "step": 238425 }, { "epoch": 2.34, "grad_norm": 9.508070945739746, "learning_rate": 3.866305222079895e-06, "loss": 0.1464, "step": 238450 }, { "epoch": 2.34, "grad_norm": 3.592571496963501, "learning_rate": 3.866181099625647e-06, "loss": 0.1971, "step": 238475 }, { "epoch": 2.34, "grad_norm": 7.732911109924316, "learning_rate": 3.866056977171398e-06, "loss": 0.1032, "step": 238500 }, { "epoch": 2.35, "grad_norm": 0.4996296167373657, "learning_rate": 3.86593285471715e-06, "loss": 0.2615, "step": 238525 }, { "epoch": 2.35, "grad_norm": 14.509527206420898, "learning_rate": 3.8658087322629015e-06, "loss": 0.1394, "step": 238550 }, { "epoch": 2.35, "grad_norm": 2.626293182373047, "learning_rate": 3.8656846098086535e-06, "loss": 0.2413, "step": 238575 }, { "epoch": 2.35, "grad_norm": 8.826775550842285, "learning_rate": 3.865560487354405e-06, "loss": 0.1162, "step": 238600 }, { "epoch": 2.35, "grad_norm": 3.228343963623047, "learning_rate": 3.865436364900157e-06, "loss": 0.1933, "step": 238625 }, { "epoch": 2.35, "grad_norm": 10.632287979125977, "learning_rate": 3.865312242445908e-06, "loss": 0.1046, "step": 238650 }, { "epoch": 2.35, "grad_norm": 4.489346504211426, "learning_rate": 3.865188119991659e-06, "loss": 0.2314, "step": 238675 }, { "epoch": 2.35, "grad_norm": 8.018503189086914, "learning_rate": 3.865063997537411e-06, "loss": 0.103, "step": 238700 }, { "epoch": 2.35, "grad_norm": 4.376277446746826, "learning_rate": 3.8649398750831625e-06, "loss": 0.2483, "step": 238725 }, { "epoch": 2.35, "grad_norm": 10.429977416992188, "learning_rate": 3.864815752628914e-06, "loss": 0.1285, "step": 238750 }, { "epoch": 2.35, "grad_norm": 2.073448657989502, "learning_rate": 3.864691630174666e-06, "loss": 0.2036, "step": 238775 }, { "epoch": 2.35, "grad_norm": 8.392827987670898, "learning_rate": 3.864567507720417e-06, "loss": 0.1262, "step": 238800 }, { "epoch": 2.35, "grad_norm": 5.983142375946045, "learning_rate": 3.864443385266168e-06, "loss": 0.1977, "step": 238825 }, { "epoch": 2.35, "grad_norm": 6.160305023193359, "learning_rate": 3.86431926281192e-06, "loss": 0.1428, "step": 238850 }, { "epoch": 2.35, "grad_norm": 2.834303617477417, "learning_rate": 3.8641951403576715e-06, "loss": 0.2153, "step": 238875 }, { "epoch": 2.35, "grad_norm": 8.99362850189209, "learning_rate": 3.864071017903423e-06, "loss": 0.1636, "step": 238900 }, { "epoch": 2.35, "grad_norm": 0.2899460792541504, "learning_rate": 3.863946895449175e-06, "loss": 0.2137, "step": 238925 }, { "epoch": 2.35, "grad_norm": 13.500408172607422, "learning_rate": 3.863822772994926e-06, "loss": 0.1312, "step": 238950 }, { "epoch": 2.35, "grad_norm": 3.691952705383301, "learning_rate": 3.863698650540678e-06, "loss": 0.2007, "step": 238975 }, { "epoch": 2.35, "grad_norm": 10.229987144470215, "learning_rate": 3.863574528086429e-06, "loss": 0.1406, "step": 239000 }, { "epoch": 2.35, "grad_norm": 5.668015003204346, "learning_rate": 3.863455370530351e-06, "loss": 0.2673, "step": 239025 }, { "epoch": 2.35, "grad_norm": 12.610414505004883, "learning_rate": 3.863331248076102e-06, "loss": 0.1414, "step": 239050 }, { "epoch": 2.35, "grad_norm": 5.436812877655029, "learning_rate": 3.863207125621854e-06, "loss": 0.2323, "step": 239075 }, { "epoch": 2.35, "grad_norm": 7.791990280151367, "learning_rate": 3.8630830031676056e-06, "loss": 0.1476, "step": 239100 }, { "epoch": 2.35, "grad_norm": 3.3415048122406006, "learning_rate": 3.862958880713357e-06, "loss": 0.2099, "step": 239125 }, { "epoch": 2.35, "grad_norm": 10.261786460876465, "learning_rate": 3.862834758259109e-06, "loss": 0.143, "step": 239150 }, { "epoch": 2.35, "grad_norm": 4.840161323547363, "learning_rate": 3.86271063580486e-06, "loss": 0.23, "step": 239175 }, { "epoch": 2.35, "grad_norm": 11.586052894592285, "learning_rate": 3.862586513350611e-06, "loss": 0.1222, "step": 239200 }, { "epoch": 2.35, "grad_norm": 2.366807460784912, "learning_rate": 3.862462390896363e-06, "loss": 0.1965, "step": 239225 }, { "epoch": 2.35, "grad_norm": 5.11442756652832, "learning_rate": 3.8623382684421145e-06, "loss": 0.1155, "step": 239250 }, { "epoch": 2.35, "grad_norm": 2.835376262664795, "learning_rate": 3.862214145987866e-06, "loss": 0.2122, "step": 239275 }, { "epoch": 2.35, "grad_norm": 14.365851402282715, "learning_rate": 3.862090023533618e-06, "loss": 0.1407, "step": 239300 }, { "epoch": 2.35, "grad_norm": 11.508612632751465, "learning_rate": 3.861965901079369e-06, "loss": 0.2104, "step": 239325 }, { "epoch": 2.35, "grad_norm": 11.436304092407227, "learning_rate": 3.86184177862512e-06, "loss": 0.1203, "step": 239350 }, { "epoch": 2.35, "grad_norm": 0.9127942323684692, "learning_rate": 3.861717656170872e-06, "loss": 0.188, "step": 239375 }, { "epoch": 2.35, "grad_norm": 15.042909622192383, "learning_rate": 3.8615935337166235e-06, "loss": 0.1525, "step": 239400 }, { "epoch": 2.35, "grad_norm": 1.375808596611023, "learning_rate": 3.8614694112623756e-06, "loss": 0.2107, "step": 239425 }, { "epoch": 2.35, "grad_norm": 13.403571128845215, "learning_rate": 3.861345288808127e-06, "loss": 0.1134, "step": 239450 }, { "epoch": 2.35, "grad_norm": 5.268499374389648, "learning_rate": 3.861221166353879e-06, "loss": 0.1623, "step": 239475 }, { "epoch": 2.35, "grad_norm": 9.692483901977539, "learning_rate": 3.86109704389963e-06, "loss": 0.1308, "step": 239500 }, { "epoch": 2.36, "grad_norm": 5.640866279602051, "learning_rate": 3.860972921445382e-06, "loss": 0.2158, "step": 239525 }, { "epoch": 2.36, "grad_norm": 14.730400085449219, "learning_rate": 3.860848798991133e-06, "loss": 0.1339, "step": 239550 }, { "epoch": 2.36, "grad_norm": 2.5624918937683105, "learning_rate": 3.8607246765368845e-06, "loss": 0.2134, "step": 239575 }, { "epoch": 2.36, "grad_norm": 11.100366592407227, "learning_rate": 3.8606005540826366e-06, "loss": 0.1711, "step": 239600 }, { "epoch": 2.36, "grad_norm": 6.989328384399414, "learning_rate": 3.860476431628388e-06, "loss": 0.2137, "step": 239625 }, { "epoch": 2.36, "grad_norm": 9.931081771850586, "learning_rate": 3.860352309174139e-06, "loss": 0.0949, "step": 239650 }, { "epoch": 2.36, "grad_norm": 3.0423424243927, "learning_rate": 3.86022818671989e-06, "loss": 0.1958, "step": 239675 }, { "epoch": 2.36, "grad_norm": 11.057196617126465, "learning_rate": 3.860104064265642e-06, "loss": 0.1109, "step": 239700 }, { "epoch": 2.36, "grad_norm": 5.019653797149658, "learning_rate": 3.8599799418113935e-06, "loss": 0.2377, "step": 239725 }, { "epoch": 2.36, "grad_norm": 11.486072540283203, "learning_rate": 3.859855819357145e-06, "loss": 0.1328, "step": 239750 }, { "epoch": 2.36, "grad_norm": 0.9907131791114807, "learning_rate": 3.859731696902897e-06, "loss": 0.2364, "step": 239775 }, { "epoch": 2.36, "grad_norm": 10.229496955871582, "learning_rate": 3.859607574448648e-06, "loss": 0.1569, "step": 239800 }, { "epoch": 2.36, "grad_norm": 4.155182838439941, "learning_rate": 3.8594834519944e-06, "loss": 0.2372, "step": 239825 }, { "epoch": 2.36, "grad_norm": 9.22446346282959, "learning_rate": 3.859359329540151e-06, "loss": 0.1109, "step": 239850 }, { "epoch": 2.36, "grad_norm": 6.23725700378418, "learning_rate": 3.859235207085903e-06, "loss": 0.2106, "step": 239875 }, { "epoch": 2.36, "grad_norm": 11.461694717407227, "learning_rate": 3.8591110846316545e-06, "loss": 0.1164, "step": 239900 }, { "epoch": 2.36, "grad_norm": 4.461648941040039, "learning_rate": 3.8589869621774066e-06, "loss": 0.2175, "step": 239925 }, { "epoch": 2.36, "grad_norm": 7.8795061111450195, "learning_rate": 3.858862839723158e-06, "loss": 0.1344, "step": 239950 }, { "epoch": 2.36, "grad_norm": 6.120720863342285, "learning_rate": 3.858738717268909e-06, "loss": 0.2397, "step": 239975 }, { "epoch": 2.36, "grad_norm": 10.790925025939941, "learning_rate": 3.858614594814661e-06, "loss": 0.1009, "step": 240000 }, { "epoch": 2.36, "eval_loss": 0.6228630542755127, "eval_runtime": 6050.004, "eval_samples_per_second": 1.565, "eval_steps_per_second": 0.196, "eval_wer": 0.125313847936651, "step": 240000 }, { "epoch": 2.36, "grad_norm": 4.395565509796143, "learning_rate": 3.858490472360412e-06, "loss": 0.235, "step": 240025 }, { "epoch": 2.36, "grad_norm": 15.585610389709473, "learning_rate": 3.8583663499061635e-06, "loss": 0.1252, "step": 240050 }, { "epoch": 2.36, "grad_norm": 4.597373008728027, "learning_rate": 3.8582422274519155e-06, "loss": 0.1963, "step": 240075 }, { "epoch": 2.36, "grad_norm": 11.231624603271484, "learning_rate": 3.858118104997667e-06, "loss": 0.1061, "step": 240100 }, { "epoch": 2.36, "grad_norm": 0.9251390695571899, "learning_rate": 3.857993982543418e-06, "loss": 0.2487, "step": 240125 }, { "epoch": 2.36, "grad_norm": 7.977749347686768, "learning_rate": 3.85786986008917e-06, "loss": 0.1315, "step": 240150 }, { "epoch": 2.36, "grad_norm": 5.935142517089844, "learning_rate": 3.857745737634921e-06, "loss": 0.2197, "step": 240175 }, { "epoch": 2.36, "grad_norm": 6.43034029006958, "learning_rate": 3.857621615180672e-06, "loss": 0.1216, "step": 240200 }, { "epoch": 2.36, "grad_norm": 2.74660587310791, "learning_rate": 3.8574974927264245e-06, "loss": 0.192, "step": 240225 }, { "epoch": 2.36, "grad_norm": 10.744649887084961, "learning_rate": 3.857373370272176e-06, "loss": 0.1206, "step": 240250 }, { "epoch": 2.36, "grad_norm": 4.454803943634033, "learning_rate": 3.857249247817928e-06, "loss": 0.2126, "step": 240275 }, { "epoch": 2.36, "grad_norm": 8.172606468200684, "learning_rate": 3.857125125363679e-06, "loss": 0.1347, "step": 240300 }, { "epoch": 2.36, "grad_norm": 3.815551519393921, "learning_rate": 3.857001002909431e-06, "loss": 0.219, "step": 240325 }, { "epoch": 2.36, "grad_norm": 8.427604675292969, "learning_rate": 3.856876880455182e-06, "loss": 0.1533, "step": 240350 }, { "epoch": 2.36, "grad_norm": 1.9133697748184204, "learning_rate": 3.856752758000934e-06, "loss": 0.2359, "step": 240375 }, { "epoch": 2.36, "grad_norm": 19.06234359741211, "learning_rate": 3.8566286355466855e-06, "loss": 0.1301, "step": 240400 }, { "epoch": 2.36, "grad_norm": 4.215423583984375, "learning_rate": 3.856504513092437e-06, "loss": 0.1732, "step": 240425 }, { "epoch": 2.36, "grad_norm": 10.23485279083252, "learning_rate": 3.856380390638189e-06, "loss": 0.14, "step": 240450 }, { "epoch": 2.36, "grad_norm": 6.319035053253174, "learning_rate": 3.85625626818394e-06, "loss": 0.2633, "step": 240475 }, { "epoch": 2.36, "grad_norm": 12.188823699951172, "learning_rate": 3.856132145729691e-06, "loss": 0.1409, "step": 240500 }, { "epoch": 2.36, "grad_norm": 4.800374984741211, "learning_rate": 3.856008023275443e-06, "loss": 0.2086, "step": 240525 }, { "epoch": 2.37, "grad_norm": 7.122547149658203, "learning_rate": 3.8558839008211945e-06, "loss": 0.112, "step": 240550 }, { "epoch": 2.37, "grad_norm": 3.1271536350250244, "learning_rate": 3.855759778366946e-06, "loss": 0.2496, "step": 240575 }, { "epoch": 2.37, "grad_norm": 8.846229553222656, "learning_rate": 3.855635655912697e-06, "loss": 0.1205, "step": 240600 }, { "epoch": 2.37, "grad_norm": 2.951498508453369, "learning_rate": 3.855511533458449e-06, "loss": 0.1615, "step": 240625 }, { "epoch": 2.37, "grad_norm": 12.27016830444336, "learning_rate": 3.8553874110042e-06, "loss": 0.1282, "step": 240650 }, { "epoch": 2.37, "grad_norm": 6.087888240814209, "learning_rate": 3.855263288549952e-06, "loss": 0.2428, "step": 240675 }, { "epoch": 2.37, "grad_norm": 11.558043479919434, "learning_rate": 3.8551391660957034e-06, "loss": 0.1257, "step": 240700 }, { "epoch": 2.37, "grad_norm": 6.6015214920043945, "learning_rate": 3.8550150436414555e-06, "loss": 0.2454, "step": 240725 }, { "epoch": 2.37, "grad_norm": 7.904842853546143, "learning_rate": 3.854890921187207e-06, "loss": 0.1307, "step": 240750 }, { "epoch": 2.37, "grad_norm": 3.423607587814331, "learning_rate": 3.854766798732959e-06, "loss": 0.2622, "step": 240775 }, { "epoch": 2.37, "grad_norm": 12.512659072875977, "learning_rate": 3.85464267627871e-06, "loss": 0.1458, "step": 240800 }, { "epoch": 2.37, "grad_norm": 5.501277446746826, "learning_rate": 3.854518553824461e-06, "loss": 0.2015, "step": 240825 }, { "epoch": 2.37, "grad_norm": 13.277795791625977, "learning_rate": 3.854394431370213e-06, "loss": 0.1211, "step": 240850 }, { "epoch": 2.37, "grad_norm": 6.702239513397217, "learning_rate": 3.8542703089159645e-06, "loss": 0.1838, "step": 240875 }, { "epoch": 2.37, "grad_norm": 17.67233657836914, "learning_rate": 3.854146186461716e-06, "loss": 0.1354, "step": 240900 }, { "epoch": 2.37, "grad_norm": 3.8471319675445557, "learning_rate": 3.854022064007468e-06, "loss": 0.2061, "step": 240925 }, { "epoch": 2.37, "grad_norm": 11.784944534301758, "learning_rate": 3.853897941553219e-06, "loss": 0.1382, "step": 240950 }, { "epoch": 2.37, "grad_norm": 7.446412563323975, "learning_rate": 3.85377381909897e-06, "loss": 0.2208, "step": 240975 }, { "epoch": 2.37, "grad_norm": 7.1903862953186035, "learning_rate": 3.853649696644722e-06, "loss": 0.1299, "step": 241000 }, { "epoch": 2.37, "grad_norm": 6.147914886474609, "learning_rate": 3.853525574190473e-06, "loss": 0.222, "step": 241025 }, { "epoch": 2.37, "grad_norm": 11.209485054016113, "learning_rate": 3.853401451736225e-06, "loss": 0.1359, "step": 241050 }, { "epoch": 2.37, "grad_norm": 5.707669734954834, "learning_rate": 3.853277329281977e-06, "loss": 0.1908, "step": 241075 }, { "epoch": 2.37, "grad_norm": 4.670114517211914, "learning_rate": 3.853153206827728e-06, "loss": 0.1357, "step": 241100 }, { "epoch": 2.37, "grad_norm": 10.025179862976074, "learning_rate": 3.85303404927165e-06, "loss": 0.2397, "step": 241125 }, { "epoch": 2.37, "grad_norm": 12.461942672729492, "learning_rate": 3.852909926817401e-06, "loss": 0.1474, "step": 241150 }, { "epoch": 2.37, "grad_norm": 0.030822215601801872, "learning_rate": 3.852785804363153e-06, "loss": 0.1919, "step": 241175 }, { "epoch": 2.37, "grad_norm": 13.150382041931152, "learning_rate": 3.852661681908904e-06, "loss": 0.1228, "step": 241200 }, { "epoch": 2.37, "grad_norm": 8.6853609085083, "learning_rate": 3.852537559454656e-06, "loss": 0.1876, "step": 241225 }, { "epoch": 2.37, "grad_norm": 17.199073791503906, "learning_rate": 3.8524134370004075e-06, "loss": 0.0998, "step": 241250 }, { "epoch": 2.37, "grad_norm": 1.9156429767608643, "learning_rate": 3.852289314546159e-06, "loss": 0.1944, "step": 241275 }, { "epoch": 2.37, "grad_norm": 13.04721450805664, "learning_rate": 3.852165192091911e-06, "loss": 0.1433, "step": 241300 }, { "epoch": 2.37, "grad_norm": 2.9019365310668945, "learning_rate": 3.852041069637662e-06, "loss": 0.1964, "step": 241325 }, { "epoch": 2.37, "grad_norm": 13.335866928100586, "learning_rate": 3.851916947183413e-06, "loss": 0.1424, "step": 241350 }, { "epoch": 2.37, "grad_norm": 4.989541053771973, "learning_rate": 3.851792824729165e-06, "loss": 0.2216, "step": 241375 }, { "epoch": 2.37, "grad_norm": 10.567788124084473, "learning_rate": 3.8516687022749165e-06, "loss": 0.1588, "step": 241400 }, { "epoch": 2.37, "grad_norm": 3.3797237873077393, "learning_rate": 3.851544579820668e-06, "loss": 0.2333, "step": 241425 }, { "epoch": 2.37, "grad_norm": 17.005306243896484, "learning_rate": 3.85142045736642e-06, "loss": 0.1228, "step": 241450 }, { "epoch": 2.37, "grad_norm": 3.367168426513672, "learning_rate": 3.851296334912171e-06, "loss": 0.2229, "step": 241475 }, { "epoch": 2.37, "grad_norm": 19.968685150146484, "learning_rate": 3.851172212457923e-06, "loss": 0.1192, "step": 241500 }, { "epoch": 2.37, "grad_norm": 5.149951934814453, "learning_rate": 3.851048090003674e-06, "loss": 0.2181, "step": 241525 }, { "epoch": 2.37, "grad_norm": 12.338281631469727, "learning_rate": 3.850923967549426e-06, "loss": 0.116, "step": 241550 }, { "epoch": 2.38, "grad_norm": 5.414470672607422, "learning_rate": 3.8507998450951775e-06, "loss": 0.1959, "step": 241575 }, { "epoch": 2.38, "grad_norm": 11.202447891235352, "learning_rate": 3.8506757226409296e-06, "loss": 0.1427, "step": 241600 }, { "epoch": 2.38, "grad_norm": 4.547730922698975, "learning_rate": 3.850551600186681e-06, "loss": 0.217, "step": 241625 }, { "epoch": 2.38, "grad_norm": 13.951239585876465, "learning_rate": 3.850427477732432e-06, "loss": 0.1361, "step": 241650 }, { "epoch": 2.38, "grad_norm": 4.015327453613281, "learning_rate": 3.850303355278184e-06, "loss": 0.2263, "step": 241675 }, { "epoch": 2.38, "grad_norm": 8.287282943725586, "learning_rate": 3.850179232823935e-06, "loss": 0.1242, "step": 241700 }, { "epoch": 2.38, "grad_norm": 3.470280647277832, "learning_rate": 3.8500551103696865e-06, "loss": 0.1846, "step": 241725 }, { "epoch": 2.38, "grad_norm": 24.04749870300293, "learning_rate": 3.8499309879154385e-06, "loss": 0.1278, "step": 241750 }, { "epoch": 2.38, "grad_norm": 4.087314128875732, "learning_rate": 3.84980686546119e-06, "loss": 0.1867, "step": 241775 }, { "epoch": 2.38, "grad_norm": 10.76887035369873, "learning_rate": 3.849682743006941e-06, "loss": 0.163, "step": 241800 }, { "epoch": 2.38, "grad_norm": 6.0594563484191895, "learning_rate": 3.849558620552693e-06, "loss": 0.2489, "step": 241825 }, { "epoch": 2.38, "grad_norm": 15.50343132019043, "learning_rate": 3.849434498098444e-06, "loss": 0.1462, "step": 241850 }, { "epoch": 2.38, "grad_norm": 2.5932350158691406, "learning_rate": 3.8493103756441954e-06, "loss": 0.1763, "step": 241875 }, { "epoch": 2.38, "grad_norm": 11.098865509033203, "learning_rate": 3.8491862531899475e-06, "loss": 0.1156, "step": 241900 }, { "epoch": 2.38, "grad_norm": 1.8094148635864258, "learning_rate": 3.849062130735699e-06, "loss": 0.1889, "step": 241925 }, { "epoch": 2.38, "grad_norm": 7.126093864440918, "learning_rate": 3.848938008281451e-06, "loss": 0.1241, "step": 241950 }, { "epoch": 2.38, "grad_norm": 5.183717727661133, "learning_rate": 3.848813885827202e-06, "loss": 0.2313, "step": 241975 }, { "epoch": 2.38, "grad_norm": 25.571239471435547, "learning_rate": 3.848689763372954e-06, "loss": 0.1282, "step": 242000 }, { "epoch": 2.38, "grad_norm": 8.553701400756836, "learning_rate": 3.848565640918705e-06, "loss": 0.2495, "step": 242025 }, { "epoch": 2.38, "grad_norm": 14.520794868469238, "learning_rate": 3.8484415184644565e-06, "loss": 0.1524, "step": 242050 }, { "epoch": 2.38, "grad_norm": 4.707149028778076, "learning_rate": 3.8483173960102085e-06, "loss": 0.1771, "step": 242075 }, { "epoch": 2.38, "grad_norm": 12.610713005065918, "learning_rate": 3.84819327355596e-06, "loss": 0.139, "step": 242100 }, { "epoch": 2.38, "grad_norm": 6.534894943237305, "learning_rate": 3.848069151101711e-06, "loss": 0.2418, "step": 242125 }, { "epoch": 2.38, "grad_norm": 12.323077201843262, "learning_rate": 3.847945028647463e-06, "loss": 0.1448, "step": 242150 }, { "epoch": 2.38, "grad_norm": 4.867340564727783, "learning_rate": 3.847820906193214e-06, "loss": 0.2442, "step": 242175 }, { "epoch": 2.38, "grad_norm": 11.730340003967285, "learning_rate": 3.847696783738965e-06, "loss": 0.151, "step": 242200 }, { "epoch": 2.38, "grad_norm": 3.646217107772827, "learning_rate": 3.8475726612847175e-06, "loss": 0.2121, "step": 242225 }, { "epoch": 2.38, "grad_norm": 12.821052551269531, "learning_rate": 3.847448538830469e-06, "loss": 0.1442, "step": 242250 }, { "epoch": 2.38, "grad_norm": 0.7209372520446777, "learning_rate": 3.84732441637622e-06, "loss": 0.2076, "step": 242275 }, { "epoch": 2.38, "grad_norm": 10.407934188842773, "learning_rate": 3.847200293921972e-06, "loss": 0.1102, "step": 242300 }, { "epoch": 2.38, "grad_norm": 0.17046426236629486, "learning_rate": 3.847076171467723e-06, "loss": 0.1863, "step": 242325 }, { "epoch": 2.38, "grad_norm": 13.118244171142578, "learning_rate": 3.846952049013475e-06, "loss": 0.156, "step": 242350 }, { "epoch": 2.38, "grad_norm": 9.617324829101562, "learning_rate": 3.8468279265592264e-06, "loss": 0.2296, "step": 242375 }, { "epoch": 2.38, "grad_norm": 10.972322463989258, "learning_rate": 3.8467038041049785e-06, "loss": 0.1187, "step": 242400 }, { "epoch": 2.38, "grad_norm": 4.080651760101318, "learning_rate": 3.84657968165073e-06, "loss": 0.2229, "step": 242425 }, { "epoch": 2.38, "grad_norm": 11.3589506149292, "learning_rate": 3.846455559196482e-06, "loss": 0.1238, "step": 242450 }, { "epoch": 2.38, "grad_norm": 3.193742275238037, "learning_rate": 3.846331436742233e-06, "loss": 0.2303, "step": 242475 }, { "epoch": 2.38, "grad_norm": 10.126819610595703, "learning_rate": 3.846207314287984e-06, "loss": 0.1193, "step": 242500 }, { "epoch": 2.38, "grad_norm": 0.5068321228027344, "learning_rate": 3.846083191833736e-06, "loss": 0.2053, "step": 242525 }, { "epoch": 2.38, "grad_norm": 10.045970916748047, "learning_rate": 3.8459590693794875e-06, "loss": 0.1247, "step": 242550 }, { "epoch": 2.39, "grad_norm": 6.873635292053223, "learning_rate": 3.845834946925239e-06, "loss": 0.2204, "step": 242575 }, { "epoch": 2.39, "grad_norm": 8.898612022399902, "learning_rate": 3.845710824470991e-06, "loss": 0.1277, "step": 242600 }, { "epoch": 2.39, "grad_norm": 2.535125970840454, "learning_rate": 3.845586702016742e-06, "loss": 0.2196, "step": 242625 }, { "epoch": 2.39, "grad_norm": 10.863365173339844, "learning_rate": 3.845462579562493e-06, "loss": 0.0897, "step": 242650 }, { "epoch": 2.39, "grad_norm": 0.16936463117599487, "learning_rate": 3.845338457108245e-06, "loss": 0.2119, "step": 242675 }, { "epoch": 2.39, "grad_norm": 8.409031867980957, "learning_rate": 3.845214334653996e-06, "loss": 0.1424, "step": 242700 }, { "epoch": 2.39, "grad_norm": 1.63960599899292, "learning_rate": 3.845090212199748e-06, "loss": 0.2201, "step": 242725 }, { "epoch": 2.39, "grad_norm": 11.031035423278809, "learning_rate": 3.8449660897455e-06, "loss": 0.1489, "step": 242750 }, { "epoch": 2.39, "grad_norm": 2.0976178646087646, "learning_rate": 3.844841967291251e-06, "loss": 0.2472, "step": 242775 }, { "epoch": 2.39, "grad_norm": 15.263148307800293, "learning_rate": 3.844717844837003e-06, "loss": 0.1291, "step": 242800 }, { "epoch": 2.39, "grad_norm": 2.8074283599853516, "learning_rate": 3.844593722382754e-06, "loss": 0.2192, "step": 242825 }, { "epoch": 2.39, "grad_norm": 8.925654411315918, "learning_rate": 3.844469599928506e-06, "loss": 0.1055, "step": 242850 }, { "epoch": 2.39, "grad_norm": 5.561548233032227, "learning_rate": 3.8443454774742574e-06, "loss": 0.2261, "step": 242875 }, { "epoch": 2.39, "grad_norm": 8.195881843566895, "learning_rate": 3.844221355020009e-06, "loss": 0.1357, "step": 242900 }, { "epoch": 2.39, "grad_norm": 3.9490721225738525, "learning_rate": 3.844097232565761e-06, "loss": 0.2157, "step": 242925 }, { "epoch": 2.39, "grad_norm": 15.633016586303711, "learning_rate": 3.843973110111512e-06, "loss": 0.1409, "step": 242950 }, { "epoch": 2.39, "grad_norm": 0.8628392815589905, "learning_rate": 3.843848987657263e-06, "loss": 0.1901, "step": 242975 }, { "epoch": 2.39, "grad_norm": 12.494399070739746, "learning_rate": 3.843724865203015e-06, "loss": 0.1393, "step": 243000 }, { "epoch": 2.39, "grad_norm": 4.6243181228637695, "learning_rate": 3.843600742748766e-06, "loss": 0.2349, "step": 243025 }, { "epoch": 2.39, "grad_norm": 10.657114028930664, "learning_rate": 3.843476620294518e-06, "loss": 0.1335, "step": 243050 }, { "epoch": 2.39, "grad_norm": 3.1671807765960693, "learning_rate": 3.84335249784027e-06, "loss": 0.1837, "step": 243075 }, { "epoch": 2.39, "grad_norm": 10.389577865600586, "learning_rate": 3.843228375386021e-06, "loss": 0.1254, "step": 243100 }, { "epoch": 2.39, "grad_norm": 4.2175493240356445, "learning_rate": 3.843104252931772e-06, "loss": 0.2237, "step": 243125 }, { "epoch": 2.39, "grad_norm": 10.512258529663086, "learning_rate": 3.842980130477524e-06, "loss": 0.1105, "step": 243150 }, { "epoch": 2.39, "grad_norm": 5.347893714904785, "learning_rate": 3.842856008023275e-06, "loss": 0.2072, "step": 243175 }, { "epoch": 2.39, "grad_norm": 11.32651424407959, "learning_rate": 3.8427318855690274e-06, "loss": 0.1682, "step": 243200 }, { "epoch": 2.39, "grad_norm": 3.08072829246521, "learning_rate": 3.842607763114779e-06, "loss": 0.1823, "step": 243225 }, { "epoch": 2.39, "grad_norm": 5.5594635009765625, "learning_rate": 3.842483640660531e-06, "loss": 0.1034, "step": 243250 }, { "epoch": 2.39, "grad_norm": 4.229267120361328, "learning_rate": 3.842359518206282e-06, "loss": 0.1977, "step": 243275 }, { "epoch": 2.39, "grad_norm": 13.03579044342041, "learning_rate": 3.842235395752034e-06, "loss": 0.1289, "step": 243300 }, { "epoch": 2.39, "grad_norm": 5.617912292480469, "learning_rate": 3.842111273297785e-06, "loss": 0.1916, "step": 243325 }, { "epoch": 2.39, "grad_norm": 16.06984519958496, "learning_rate": 3.841987150843536e-06, "loss": 0.1307, "step": 243350 }, { "epoch": 2.39, "grad_norm": 3.761892080307007, "learning_rate": 3.8418630283892884e-06, "loss": 0.2205, "step": 243375 }, { "epoch": 2.39, "grad_norm": 7.518776893615723, "learning_rate": 3.84173890593504e-06, "loss": 0.0994, "step": 243400 }, { "epoch": 2.39, "grad_norm": 5.86479377746582, "learning_rate": 3.841614783480791e-06, "loss": 0.2321, "step": 243425 }, { "epoch": 2.39, "grad_norm": 8.676749229431152, "learning_rate": 3.841490661026543e-06, "loss": 0.1158, "step": 243450 }, { "epoch": 2.39, "grad_norm": 7.887463092803955, "learning_rate": 3.841366538572294e-06, "loss": 0.2131, "step": 243475 }, { "epoch": 2.39, "grad_norm": 8.40931510925293, "learning_rate": 3.841242416118045e-06, "loss": 0.1013, "step": 243500 }, { "epoch": 2.39, "grad_norm": 3.2774155139923096, "learning_rate": 3.841118293663797e-06, "loss": 0.2218, "step": 243525 }, { "epoch": 2.39, "grad_norm": 9.276131629943848, "learning_rate": 3.840994171209549e-06, "loss": 0.1313, "step": 243550 }, { "epoch": 2.39, "grad_norm": 7.589161396026611, "learning_rate": 3.8408700487553e-06, "loss": 0.2278, "step": 243575 }, { "epoch": 2.4, "grad_norm": 11.23880386352539, "learning_rate": 3.840745926301052e-06, "loss": 0.1203, "step": 243600 }, { "epoch": 2.4, "grad_norm": 4.812204360961914, "learning_rate": 3.840621803846803e-06, "loss": 0.2146, "step": 243625 }, { "epoch": 2.4, "grad_norm": 9.042448043823242, "learning_rate": 3.840497681392555e-06, "loss": 0.1437, "step": 243650 }, { "epoch": 2.4, "grad_norm": 5.366187572479248, "learning_rate": 3.840378523836476e-06, "loss": 0.2024, "step": 243675 }, { "epoch": 2.4, "grad_norm": 10.12939167022705, "learning_rate": 3.840254401382228e-06, "loss": 0.0861, "step": 243700 }, { "epoch": 2.4, "grad_norm": 7.648562908172607, "learning_rate": 3.8401302789279795e-06, "loss": 0.1934, "step": 243725 }, { "epoch": 2.4, "grad_norm": 11.861239433288574, "learning_rate": 3.8400061564737315e-06, "loss": 0.1333, "step": 243750 }, { "epoch": 2.4, "grad_norm": 6.075140476226807, "learning_rate": 3.839882034019483e-06, "loss": 0.21, "step": 243775 }, { "epoch": 2.4, "grad_norm": 5.025904655456543, "learning_rate": 3.839757911565234e-06, "loss": 0.121, "step": 243800 }, { "epoch": 2.4, "grad_norm": 1.6770679950714111, "learning_rate": 3.839633789110986e-06, "loss": 0.1918, "step": 243825 }, { "epoch": 2.4, "grad_norm": 8.354754447937012, "learning_rate": 3.839509666656737e-06, "loss": 0.1347, "step": 243850 }, { "epoch": 2.4, "grad_norm": 3.5506398677825928, "learning_rate": 3.839385544202488e-06, "loss": 0.2287, "step": 243875 }, { "epoch": 2.4, "grad_norm": 12.860837936401367, "learning_rate": 3.8392614217482405e-06, "loss": 0.1432, "step": 243900 }, { "epoch": 2.4, "grad_norm": 1.1913251876831055, "learning_rate": 3.839137299293992e-06, "loss": 0.1975, "step": 243925 }, { "epoch": 2.4, "grad_norm": 11.156896591186523, "learning_rate": 3.839013176839743e-06, "loss": 0.1186, "step": 243950 }, { "epoch": 2.4, "grad_norm": 7.9062275886535645, "learning_rate": 3.838889054385495e-06, "loss": 0.2136, "step": 243975 }, { "epoch": 2.4, "grad_norm": 7.0446271896362305, "learning_rate": 3.838764931931246e-06, "loss": 0.125, "step": 244000 }, { "epoch": 2.4, "grad_norm": 5.332716941833496, "learning_rate": 3.838640809476997e-06, "loss": 0.1965, "step": 244025 }, { "epoch": 2.4, "grad_norm": 7.258266448974609, "learning_rate": 3.8385166870227494e-06, "loss": 0.0961, "step": 244050 }, { "epoch": 2.4, "grad_norm": 1.7945096492767334, "learning_rate": 3.838392564568501e-06, "loss": 0.2021, "step": 244075 }, { "epoch": 2.4, "grad_norm": 11.919708251953125, "learning_rate": 3.838268442114253e-06, "loss": 0.1347, "step": 244100 }, { "epoch": 2.4, "grad_norm": 3.829218626022339, "learning_rate": 3.838144319660004e-06, "loss": 0.2149, "step": 244125 }, { "epoch": 2.4, "grad_norm": 8.289020538330078, "learning_rate": 3.838020197205756e-06, "loss": 0.1341, "step": 244150 }, { "epoch": 2.4, "grad_norm": 5.094766616821289, "learning_rate": 3.837896074751507e-06, "loss": 0.1916, "step": 244175 }, { "epoch": 2.4, "grad_norm": 15.11166763305664, "learning_rate": 3.837771952297259e-06, "loss": 0.1358, "step": 244200 }, { "epoch": 2.4, "grad_norm": 6.80750036239624, "learning_rate": 3.8376478298430105e-06, "loss": 0.1859, "step": 244225 }, { "epoch": 2.4, "grad_norm": 10.945182800292969, "learning_rate": 3.837523707388762e-06, "loss": 0.1248, "step": 244250 }, { "epoch": 2.4, "grad_norm": 4.5412726402282715, "learning_rate": 3.837399584934513e-06, "loss": 0.1821, "step": 244275 }, { "epoch": 2.4, "grad_norm": 30.396087646484375, "learning_rate": 3.837275462480265e-06, "loss": 0.1421, "step": 244300 }, { "epoch": 2.4, "grad_norm": 4.838797092437744, "learning_rate": 3.837151340026016e-06, "loss": 0.2657, "step": 244325 }, { "epoch": 2.4, "grad_norm": 13.009536743164062, "learning_rate": 3.837027217571767e-06, "loss": 0.1469, "step": 244350 }, { "epoch": 2.4, "grad_norm": 3.4462215900421143, "learning_rate": 3.8369030951175194e-06, "loss": 0.2283, "step": 244375 }, { "epoch": 2.4, "grad_norm": 5.7635698318481445, "learning_rate": 3.836778972663271e-06, "loss": 0.1039, "step": 244400 }, { "epoch": 2.4, "grad_norm": 4.0792365074157715, "learning_rate": 3.836654850209023e-06, "loss": 0.2131, "step": 244425 }, { "epoch": 2.4, "grad_norm": 10.567817687988281, "learning_rate": 3.836530727754774e-06, "loss": 0.1373, "step": 244450 }, { "epoch": 2.4, "grad_norm": 2.426433563232422, "learning_rate": 3.836406605300526e-06, "loss": 0.2348, "step": 244475 }, { "epoch": 2.4, "grad_norm": 10.143393516540527, "learning_rate": 3.836282482846277e-06, "loss": 0.122, "step": 244500 }, { "epoch": 2.4, "grad_norm": 4.074505805969238, "learning_rate": 3.836158360392029e-06, "loss": 0.2312, "step": 244525 }, { "epoch": 2.4, "grad_norm": 15.082402229309082, "learning_rate": 3.8360342379377804e-06, "loss": 0.1472, "step": 244550 }, { "epoch": 2.4, "grad_norm": 10.28298282623291, "learning_rate": 3.835910115483532e-06, "loss": 0.1899, "step": 244575 }, { "epoch": 2.4, "grad_norm": 6.949423789978027, "learning_rate": 3.835785993029284e-06, "loss": 0.1305, "step": 244600 }, { "epoch": 2.41, "grad_norm": 1.9468082189559937, "learning_rate": 3.835661870575035e-06, "loss": 0.2266, "step": 244625 }, { "epoch": 2.41, "grad_norm": 13.682535171508789, "learning_rate": 3.835537748120786e-06, "loss": 0.1256, "step": 244650 }, { "epoch": 2.41, "grad_norm": 3.6532504558563232, "learning_rate": 3.835413625666538e-06, "loss": 0.2272, "step": 244675 }, { "epoch": 2.41, "grad_norm": 7.7274956703186035, "learning_rate": 3.835289503212289e-06, "loss": 0.1369, "step": 244700 }, { "epoch": 2.41, "grad_norm": 4.4644317626953125, "learning_rate": 3.835165380758041e-06, "loss": 0.1823, "step": 244725 }, { "epoch": 2.41, "grad_norm": 11.523731231689453, "learning_rate": 3.835041258303793e-06, "loss": 0.1074, "step": 244750 }, { "epoch": 2.41, "grad_norm": 5.346506595611572, "learning_rate": 3.834917135849544e-06, "loss": 0.2167, "step": 244775 }, { "epoch": 2.41, "grad_norm": 11.5980806350708, "learning_rate": 3.834793013395295e-06, "loss": 0.1485, "step": 244800 }, { "epoch": 2.41, "grad_norm": 5.640279769897461, "learning_rate": 3.834668890941047e-06, "loss": 0.2107, "step": 244825 }, { "epoch": 2.41, "grad_norm": 12.389236450195312, "learning_rate": 3.834544768486798e-06, "loss": 0.138, "step": 244850 }, { "epoch": 2.41, "grad_norm": 0.36281704902648926, "learning_rate": 3.8344206460325504e-06, "loss": 0.1948, "step": 244875 }, { "epoch": 2.41, "grad_norm": 11.021944999694824, "learning_rate": 3.834296523578302e-06, "loss": 0.1163, "step": 244900 }, { "epoch": 2.41, "grad_norm": 4.072810173034668, "learning_rate": 3.834172401124054e-06, "loss": 0.2086, "step": 244925 }, { "epoch": 2.41, "grad_norm": 10.72280216217041, "learning_rate": 3.834048278669805e-06, "loss": 0.09, "step": 244950 }, { "epoch": 2.41, "grad_norm": 3.221776247024536, "learning_rate": 3.833924156215557e-06, "loss": 0.2184, "step": 244975 }, { "epoch": 2.41, "grad_norm": 11.173604965209961, "learning_rate": 3.833800033761308e-06, "loss": 0.1426, "step": 245000 }, { "epoch": 2.41, "grad_norm": 5.304584980010986, "learning_rate": 3.833675911307059e-06, "loss": 0.2348, "step": 245025 }, { "epoch": 2.41, "grad_norm": 9.92224407196045, "learning_rate": 3.8335517888528115e-06, "loss": 0.1053, "step": 245050 }, { "epoch": 2.41, "grad_norm": 6.67433500289917, "learning_rate": 3.833427666398563e-06, "loss": 0.2651, "step": 245075 }, { "epoch": 2.41, "grad_norm": 7.034893989562988, "learning_rate": 3.833303543944314e-06, "loss": 0.1691, "step": 245100 }, { "epoch": 2.41, "grad_norm": 3.238570213317871, "learning_rate": 3.833179421490065e-06, "loss": 0.2213, "step": 245125 }, { "epoch": 2.41, "grad_norm": 11.835848808288574, "learning_rate": 3.833055299035817e-06, "loss": 0.1832, "step": 245150 }, { "epoch": 2.41, "grad_norm": 3.2573728561401367, "learning_rate": 3.832931176581568e-06, "loss": 0.2026, "step": 245175 }, { "epoch": 2.41, "grad_norm": 9.0498628616333, "learning_rate": 3.8328070541273196e-06, "loss": 0.1357, "step": 245200 }, { "epoch": 2.41, "grad_norm": 6.56796407699585, "learning_rate": 3.832682931673072e-06, "loss": 0.1807, "step": 245225 }, { "epoch": 2.41, "grad_norm": 17.328866958618164, "learning_rate": 3.832558809218823e-06, "loss": 0.1298, "step": 245250 }, { "epoch": 2.41, "grad_norm": 2.6499099731445312, "learning_rate": 3.832434686764575e-06, "loss": 0.2119, "step": 245275 }, { "epoch": 2.41, "grad_norm": 12.724763870239258, "learning_rate": 3.832310564310326e-06, "loss": 0.1033, "step": 245300 }, { "epoch": 2.41, "grad_norm": 4.223931312561035, "learning_rate": 3.832186441856078e-06, "loss": 0.2128, "step": 245325 }, { "epoch": 2.41, "grad_norm": 7.126525402069092, "learning_rate": 3.832062319401829e-06, "loss": 0.1175, "step": 245350 }, { "epoch": 2.41, "grad_norm": 5.841424465179443, "learning_rate": 3.8319381969475814e-06, "loss": 0.2229, "step": 245375 }, { "epoch": 2.41, "grad_norm": 8.380964279174805, "learning_rate": 3.831814074493333e-06, "loss": 0.119, "step": 245400 }, { "epoch": 2.41, "grad_norm": 5.905337810516357, "learning_rate": 3.831689952039084e-06, "loss": 0.2577, "step": 245425 }, { "epoch": 2.41, "grad_norm": 9.941747665405273, "learning_rate": 3.831565829584836e-06, "loss": 0.1087, "step": 245450 }, { "epoch": 2.41, "grad_norm": 5.150966167449951, "learning_rate": 3.831441707130587e-06, "loss": 0.2508, "step": 245475 }, { "epoch": 2.41, "grad_norm": 13.190898895263672, "learning_rate": 3.831317584676338e-06, "loss": 0.1234, "step": 245500 }, { "epoch": 2.41, "grad_norm": 29.52945899963379, "learning_rate": 3.83119346222209e-06, "loss": 0.2399, "step": 245525 }, { "epoch": 2.41, "grad_norm": 9.050936698913574, "learning_rate": 3.831069339767842e-06, "loss": 0.1088, "step": 245550 }, { "epoch": 2.41, "grad_norm": 6.455696105957031, "learning_rate": 3.830945217313593e-06, "loss": 0.1866, "step": 245575 }, { "epoch": 2.41, "grad_norm": 8.244136810302734, "learning_rate": 3.830821094859345e-06, "loss": 0.0901, "step": 245600 }, { "epoch": 2.42, "grad_norm": 5.399144172668457, "learning_rate": 3.830696972405096e-06, "loss": 0.2141, "step": 245625 }, { "epoch": 2.42, "grad_norm": 16.030372619628906, "learning_rate": 3.830572849950847e-06, "loss": 0.112, "step": 245650 }, { "epoch": 2.42, "grad_norm": 5.109234809875488, "learning_rate": 3.830448727496599e-06, "loss": 0.2119, "step": 245675 }, { "epoch": 2.42, "grad_norm": 4.660910606384277, "learning_rate": 3.8303246050423506e-06, "loss": 0.082, "step": 245700 }, { "epoch": 2.42, "grad_norm": 4.383688449859619, "learning_rate": 3.8302054474862724e-06, "loss": 0.1877, "step": 245725 }, { "epoch": 2.42, "grad_norm": 3.880159378051758, "learning_rate": 3.830081325032024e-06, "loss": 0.1344, "step": 245750 }, { "epoch": 2.42, "grad_norm": 5.463021278381348, "learning_rate": 3.829957202577776e-06, "loss": 0.2899, "step": 245775 }, { "epoch": 2.42, "grad_norm": 7.2878947257995605, "learning_rate": 3.829833080123527e-06, "loss": 0.1032, "step": 245800 }, { "epoch": 2.42, "grad_norm": 2.5379011631011963, "learning_rate": 3.829708957669279e-06, "loss": 0.2213, "step": 245825 }, { "epoch": 2.42, "grad_norm": 7.6440205574035645, "learning_rate": 3.82958483521503e-06, "loss": 0.1138, "step": 245850 }, { "epoch": 2.42, "grad_norm": 9.034523963928223, "learning_rate": 3.829460712760781e-06, "loss": 0.2014, "step": 245875 }, { "epoch": 2.42, "grad_norm": 11.085236549377441, "learning_rate": 3.8293365903065335e-06, "loss": 0.1149, "step": 245900 }, { "epoch": 2.42, "grad_norm": 2.6348657608032227, "learning_rate": 3.829212467852285e-06, "loss": 0.2115, "step": 245925 }, { "epoch": 2.42, "grad_norm": 8.188261032104492, "learning_rate": 3.829088345398036e-06, "loss": 0.1186, "step": 245950 }, { "epoch": 2.42, "grad_norm": 4.420398235321045, "learning_rate": 3.828964222943788e-06, "loss": 0.2218, "step": 245975 }, { "epoch": 2.42, "grad_norm": 9.330080032348633, "learning_rate": 3.828840100489539e-06, "loss": 0.1536, "step": 246000 }, { "epoch": 2.42, "grad_norm": 3.8203513622283936, "learning_rate": 3.82871597803529e-06, "loss": 0.2163, "step": 246025 }, { "epoch": 2.42, "grad_norm": 9.420336723327637, "learning_rate": 3.8285918555810424e-06, "loss": 0.1441, "step": 246050 }, { "epoch": 2.42, "grad_norm": 3.568319320678711, "learning_rate": 3.828467733126794e-06, "loss": 0.22, "step": 246075 }, { "epoch": 2.42, "grad_norm": 14.759164810180664, "learning_rate": 3.828343610672545e-06, "loss": 0.121, "step": 246100 }, { "epoch": 2.42, "grad_norm": 8.393898963928223, "learning_rate": 3.828219488218297e-06, "loss": 0.2311, "step": 246125 }, { "epoch": 2.42, "grad_norm": 11.48033618927002, "learning_rate": 3.828095365764048e-06, "loss": 0.1147, "step": 246150 }, { "epoch": 2.42, "grad_norm": 6.653813362121582, "learning_rate": 3.8279712433098e-06, "loss": 0.244, "step": 246175 }, { "epoch": 2.42, "grad_norm": 10.830023765563965, "learning_rate": 3.827847120855551e-06, "loss": 0.1187, "step": 246200 }, { "epoch": 2.42, "grad_norm": 7.335545539855957, "learning_rate": 3.8277229984013035e-06, "loss": 0.212, "step": 246225 }, { "epoch": 2.42, "grad_norm": 8.683828353881836, "learning_rate": 3.827598875947055e-06, "loss": 0.1318, "step": 246250 }, { "epoch": 2.42, "grad_norm": 4.731262683868408, "learning_rate": 3.827474753492807e-06, "loss": 0.2358, "step": 246275 }, { "epoch": 2.42, "grad_norm": 10.519852638244629, "learning_rate": 3.827350631038558e-06, "loss": 0.1283, "step": 246300 }, { "epoch": 2.42, "grad_norm": 4.17486572265625, "learning_rate": 3.827226508584309e-06, "loss": 0.2166, "step": 246325 }, { "epoch": 2.42, "grad_norm": 6.761101722717285, "learning_rate": 3.827102386130061e-06, "loss": 0.1089, "step": 246350 }, { "epoch": 2.42, "grad_norm": 5.836989402770996, "learning_rate": 3.826978263675812e-06, "loss": 0.2281, "step": 246375 }, { "epoch": 2.42, "grad_norm": 16.966840744018555, "learning_rate": 3.826854141221564e-06, "loss": 0.1196, "step": 246400 }, { "epoch": 2.42, "grad_norm": 6.082511901855469, "learning_rate": 3.826730018767316e-06, "loss": 0.1709, "step": 246425 }, { "epoch": 2.42, "grad_norm": 11.214970588684082, "learning_rate": 3.826605896313067e-06, "loss": 0.1327, "step": 246450 }, { "epoch": 2.42, "grad_norm": 1.6773650646209717, "learning_rate": 3.826481773858818e-06, "loss": 0.2133, "step": 246475 }, { "epoch": 2.42, "grad_norm": 7.17679500579834, "learning_rate": 3.82635765140457e-06, "loss": 0.1414, "step": 246500 }, { "epoch": 2.42, "grad_norm": 2.755432605743408, "learning_rate": 3.826233528950321e-06, "loss": 0.1651, "step": 246525 }, { "epoch": 2.42, "grad_norm": 10.311861038208008, "learning_rate": 3.826109406496073e-06, "loss": 0.1156, "step": 246550 }, { "epoch": 2.42, "grad_norm": 3.5823469161987305, "learning_rate": 3.825985284041825e-06, "loss": 0.2097, "step": 246575 }, { "epoch": 2.42, "grad_norm": 13.18734073638916, "learning_rate": 3.825861161587576e-06, "loss": 0.1544, "step": 246600 }, { "epoch": 2.42, "grad_norm": 3.458773136138916, "learning_rate": 3.825737039133328e-06, "loss": 0.2337, "step": 246625 }, { "epoch": 2.43, "grad_norm": 11.772068977355957, "learning_rate": 3.825612916679079e-06, "loss": 0.1159, "step": 246650 }, { "epoch": 2.43, "grad_norm": 4.370745658874512, "learning_rate": 3.825488794224831e-06, "loss": 0.2253, "step": 246675 }, { "epoch": 2.43, "grad_norm": 6.1095075607299805, "learning_rate": 3.825364671770582e-06, "loss": 0.1177, "step": 246700 }, { "epoch": 2.43, "grad_norm": 3.7526965141296387, "learning_rate": 3.825240549316334e-06, "loss": 0.221, "step": 246725 }, { "epoch": 2.43, "grad_norm": 13.220963478088379, "learning_rate": 3.825116426862086e-06, "loss": 0.1149, "step": 246750 }, { "epoch": 2.43, "grad_norm": 5.970526218414307, "learning_rate": 3.824992304407837e-06, "loss": 0.2935, "step": 246775 }, { "epoch": 2.43, "grad_norm": 9.375776290893555, "learning_rate": 3.824868181953588e-06, "loss": 0.0926, "step": 246800 }, { "epoch": 2.43, "grad_norm": 5.660680294036865, "learning_rate": 3.82474405949934e-06, "loss": 0.2045, "step": 246825 }, { "epoch": 2.43, "grad_norm": 13.394740104675293, "learning_rate": 3.824619937045091e-06, "loss": 0.1618, "step": 246850 }, { "epoch": 2.43, "grad_norm": 4.928456783294678, "learning_rate": 3.8244958145908426e-06, "loss": 0.1645, "step": 246875 }, { "epoch": 2.43, "grad_norm": 8.227799415588379, "learning_rate": 3.8243766570347644e-06, "loss": 0.1258, "step": 246900 }, { "epoch": 2.43, "grad_norm": 5.525660037994385, "learning_rate": 3.824252534580516e-06, "loss": 0.2313, "step": 246925 }, { "epoch": 2.43, "grad_norm": 11.667137145996094, "learning_rate": 3.824128412126268e-06, "loss": 0.149, "step": 246950 }, { "epoch": 2.43, "grad_norm": 4.525216102600098, "learning_rate": 3.824004289672019e-06, "loss": 0.2675, "step": 246975 }, { "epoch": 2.43, "grad_norm": 14.152220726013184, "learning_rate": 3.82388016721777e-06, "loss": 0.1244, "step": 247000 }, { "epoch": 2.43, "grad_norm": 4.242069244384766, "learning_rate": 3.823756044763522e-06, "loss": 0.179, "step": 247025 }, { "epoch": 2.43, "grad_norm": 8.983445167541504, "learning_rate": 3.823631922309273e-06, "loss": 0.1119, "step": 247050 }, { "epoch": 2.43, "grad_norm": 5.26926326751709, "learning_rate": 3.8235077998550255e-06, "loss": 0.2001, "step": 247075 }, { "epoch": 2.43, "grad_norm": 13.203181266784668, "learning_rate": 3.823383677400777e-06, "loss": 0.1333, "step": 247100 }, { "epoch": 2.43, "grad_norm": 3.533050298690796, "learning_rate": 3.823259554946529e-06, "loss": 0.1869, "step": 247125 }, { "epoch": 2.43, "grad_norm": 10.758252143859863, "learning_rate": 3.82313543249228e-06, "loss": 0.1157, "step": 247150 }, { "epoch": 2.43, "grad_norm": 4.338042736053467, "learning_rate": 3.823011310038031e-06, "loss": 0.2105, "step": 247175 }, { "epoch": 2.43, "grad_norm": 13.92014217376709, "learning_rate": 3.822887187583783e-06, "loss": 0.1314, "step": 247200 }, { "epoch": 2.43, "grad_norm": 3.1640889644622803, "learning_rate": 3.8227630651295344e-06, "loss": 0.2339, "step": 247225 }, { "epoch": 2.43, "grad_norm": 10.960591316223145, "learning_rate": 3.822638942675286e-06, "loss": 0.1558, "step": 247250 }, { "epoch": 2.43, "grad_norm": 1.346803069114685, "learning_rate": 3.822514820221038e-06, "loss": 0.1961, "step": 247275 }, { "epoch": 2.43, "grad_norm": 9.687202453613281, "learning_rate": 3.822390697766789e-06, "loss": 0.127, "step": 247300 }, { "epoch": 2.43, "grad_norm": 5.313492774963379, "learning_rate": 3.82226657531254e-06, "loss": 0.22, "step": 247325 }, { "epoch": 2.43, "grad_norm": 6.883601665496826, "learning_rate": 3.822142452858292e-06, "loss": 0.1633, "step": 247350 }, { "epoch": 2.43, "grad_norm": 11.914581298828125, "learning_rate": 3.822018330404043e-06, "loss": 0.2108, "step": 247375 }, { "epoch": 2.43, "grad_norm": 9.556008338928223, "learning_rate": 3.8218942079497955e-06, "loss": 0.1367, "step": 247400 }, { "epoch": 2.43, "grad_norm": 7.295975208282471, "learning_rate": 3.821770085495547e-06, "loss": 0.2457, "step": 247425 }, { "epoch": 2.43, "grad_norm": 17.69536590576172, "learning_rate": 3.821645963041299e-06, "loss": 0.1047, "step": 247450 }, { "epoch": 2.43, "grad_norm": 0.9300279021263123, "learning_rate": 3.82152184058705e-06, "loss": 0.2021, "step": 247475 }, { "epoch": 2.43, "grad_norm": 11.596535682678223, "learning_rate": 3.821397718132802e-06, "loss": 0.144, "step": 247500 }, { "epoch": 2.43, "grad_norm": 5.866912841796875, "learning_rate": 3.821273595678553e-06, "loss": 0.2124, "step": 247525 }, { "epoch": 2.43, "grad_norm": 9.274385452270508, "learning_rate": 3.821149473224304e-06, "loss": 0.1283, "step": 247550 }, { "epoch": 2.43, "grad_norm": 5.6743059158325195, "learning_rate": 3.8210253507700565e-06, "loss": 0.2802, "step": 247575 }, { "epoch": 2.43, "grad_norm": 15.276277542114258, "learning_rate": 3.820901228315808e-06, "loss": 0.1226, "step": 247600 }, { "epoch": 2.43, "grad_norm": 2.602919340133667, "learning_rate": 3.820777105861559e-06, "loss": 0.1914, "step": 247625 }, { "epoch": 2.43, "grad_norm": 10.174832344055176, "learning_rate": 3.820652983407311e-06, "loss": 0.1336, "step": 247650 }, { "epoch": 2.44, "grad_norm": 5.004683494567871, "learning_rate": 3.820528860953062e-06, "loss": 0.1998, "step": 247675 }, { "epoch": 2.44, "grad_norm": 19.051923751831055, "learning_rate": 3.820404738498813e-06, "loss": 0.1538, "step": 247700 }, { "epoch": 2.44, "grad_norm": 0.052593328058719635, "learning_rate": 3.8202806160445654e-06, "loss": 0.196, "step": 247725 }, { "epoch": 2.44, "grad_norm": 7.003042221069336, "learning_rate": 3.820156493590317e-06, "loss": 0.1271, "step": 247750 }, { "epoch": 2.44, "grad_norm": 4.082426071166992, "learning_rate": 3.820032371136068e-06, "loss": 0.2414, "step": 247775 }, { "epoch": 2.44, "grad_norm": 11.120699882507324, "learning_rate": 3.81990824868182e-06, "loss": 0.1587, "step": 247800 }, { "epoch": 2.44, "grad_norm": 5.648801326751709, "learning_rate": 3.819784126227571e-06, "loss": 0.192, "step": 247825 }, { "epoch": 2.44, "grad_norm": 6.237272262573242, "learning_rate": 3.819660003773323e-06, "loss": 0.1309, "step": 247850 }, { "epoch": 2.44, "grad_norm": 2.4524903297424316, "learning_rate": 3.819535881319074e-06, "loss": 0.2237, "step": 247875 }, { "epoch": 2.44, "grad_norm": 14.854691505432129, "learning_rate": 3.8194117588648265e-06, "loss": 0.1239, "step": 247900 }, { "epoch": 2.44, "grad_norm": 6.318059921264648, "learning_rate": 3.819287636410578e-06, "loss": 0.2441, "step": 247925 }, { "epoch": 2.44, "grad_norm": 8.46144962310791, "learning_rate": 3.81916351395633e-06, "loss": 0.1488, "step": 247950 }, { "epoch": 2.44, "grad_norm": 0.7036570906639099, "learning_rate": 3.819039391502081e-06, "loss": 0.2015, "step": 247975 }, { "epoch": 2.44, "grad_norm": 6.92080545425415, "learning_rate": 3.818915269047832e-06, "loss": 0.191, "step": 248000 }, { "epoch": 2.44, "grad_norm": 5.506916522979736, "learning_rate": 3.818791146593583e-06, "loss": 0.2513, "step": 248025 }, { "epoch": 2.44, "grad_norm": 12.016058921813965, "learning_rate": 3.818667024139335e-06, "loss": 0.1415, "step": 248050 }, { "epoch": 2.44, "grad_norm": 15.260380744934082, "learning_rate": 3.818542901685087e-06, "loss": 0.2289, "step": 248075 }, { "epoch": 2.44, "grad_norm": 11.389727592468262, "learning_rate": 3.818418779230838e-06, "loss": 0.1422, "step": 248100 }, { "epoch": 2.44, "grad_norm": 2.72713565826416, "learning_rate": 3.81829465677659e-06, "loss": 0.1992, "step": 248125 }, { "epoch": 2.44, "grad_norm": 16.1550350189209, "learning_rate": 3.818170534322341e-06, "loss": 0.1683, "step": 248150 }, { "epoch": 2.44, "grad_norm": 2.692763566970825, "learning_rate": 3.818046411868092e-06, "loss": 0.1876, "step": 248175 }, { "epoch": 2.44, "grad_norm": 18.392948150634766, "learning_rate": 3.817922289413844e-06, "loss": 0.173, "step": 248200 }, { "epoch": 2.44, "grad_norm": 6.668923377990723, "learning_rate": 3.817798166959596e-06, "loss": 0.1788, "step": 248225 }, { "epoch": 2.44, "grad_norm": 43.19340133666992, "learning_rate": 3.817674044505348e-06, "loss": 0.1357, "step": 248250 }, { "epoch": 2.44, "grad_norm": 4.4444193840026855, "learning_rate": 3.817549922051099e-06, "loss": 0.1941, "step": 248275 }, { "epoch": 2.44, "grad_norm": 10.96194076538086, "learning_rate": 3.817425799596851e-06, "loss": 0.0964, "step": 248300 }, { "epoch": 2.44, "grad_norm": 9.710222244262695, "learning_rate": 3.817301677142602e-06, "loss": 0.1722, "step": 248325 }, { "epoch": 2.44, "grad_norm": 8.952149391174316, "learning_rate": 3.817177554688354e-06, "loss": 0.1175, "step": 248350 }, { "epoch": 2.44, "grad_norm": 2.3849658966064453, "learning_rate": 3.817053432234105e-06, "loss": 0.204, "step": 248375 }, { "epoch": 2.44, "grad_norm": 10.279455184936523, "learning_rate": 3.816929309779857e-06, "loss": 0.1555, "step": 248400 }, { "epoch": 2.44, "grad_norm": 4.596006870269775, "learning_rate": 3.816805187325609e-06, "loss": 0.228, "step": 248425 }, { "epoch": 2.44, "grad_norm": 10.590864181518555, "learning_rate": 3.81668106487136e-06, "loss": 0.156, "step": 248450 }, { "epoch": 2.44, "grad_norm": 4.031273365020752, "learning_rate": 3.816556942417111e-06, "loss": 0.2151, "step": 248475 }, { "epoch": 2.44, "grad_norm": 12.491449356079102, "learning_rate": 3.816432819962863e-06, "loss": 0.1646, "step": 248500 }, { "epoch": 2.44, "grad_norm": 4.264003276824951, "learning_rate": 3.816308697508614e-06, "loss": 0.2145, "step": 248525 }, { "epoch": 2.44, "grad_norm": 11.72638988494873, "learning_rate": 3.816184575054366e-06, "loss": 0.1457, "step": 248550 }, { "epoch": 2.44, "grad_norm": 2.8769800662994385, "learning_rate": 3.816060452600118e-06, "loss": 0.2163, "step": 248575 }, { "epoch": 2.44, "grad_norm": 6.931196212768555, "learning_rate": 3.815936330145869e-06, "loss": 0.1343, "step": 248600 }, { "epoch": 2.44, "grad_norm": 5.58802604675293, "learning_rate": 3.81581220769162e-06, "loss": 0.2128, "step": 248625 }, { "epoch": 2.44, "grad_norm": 11.859824180603027, "learning_rate": 3.815688085237372e-06, "loss": 0.1146, "step": 248650 }, { "epoch": 2.45, "grad_norm": 3.8012921810150146, "learning_rate": 3.815563962783123e-06, "loss": 0.2308, "step": 248675 }, { "epoch": 2.45, "grad_norm": 11.453993797302246, "learning_rate": 3.815439840328875e-06, "loss": 0.1372, "step": 248700 }, { "epoch": 2.45, "grad_norm": 5.064424514770508, "learning_rate": 3.815315717874627e-06, "loss": 0.1965, "step": 248725 }, { "epoch": 2.45, "grad_norm": 8.438040733337402, "learning_rate": 3.815191595420379e-06, "loss": 0.128, "step": 248750 }, { "epoch": 2.45, "grad_norm": 5.30372428894043, "learning_rate": 3.81506747296613e-06, "loss": 0.2216, "step": 248775 }, { "epoch": 2.45, "grad_norm": 11.283256530761719, "learning_rate": 3.814943350511882e-06, "loss": 0.1246, "step": 248800 }, { "epoch": 2.45, "grad_norm": 1.6915359497070312, "learning_rate": 3.814819228057633e-06, "loss": 0.2075, "step": 248825 }, { "epoch": 2.45, "grad_norm": 6.668976306915283, "learning_rate": 3.8146951056033844e-06, "loss": 0.1132, "step": 248850 }, { "epoch": 2.45, "grad_norm": 0.29825299978256226, "learning_rate": 3.8145709831491356e-06, "loss": 0.1969, "step": 248875 }, { "epoch": 2.45, "grad_norm": 12.022941589355469, "learning_rate": 3.8144468606948876e-06, "loss": 0.1179, "step": 248900 }, { "epoch": 2.45, "grad_norm": 3.0319888591766357, "learning_rate": 3.814322738240639e-06, "loss": 0.2445, "step": 248925 }, { "epoch": 2.45, "grad_norm": 12.466651916503906, "learning_rate": 3.8141986157863905e-06, "loss": 0.1125, "step": 248950 }, { "epoch": 2.45, "grad_norm": 2.5523550510406494, "learning_rate": 3.814074493332142e-06, "loss": 0.2517, "step": 248975 }, { "epoch": 2.45, "grad_norm": 17.00925636291504, "learning_rate": 3.8139503708778937e-06, "loss": 0.1226, "step": 249000 }, { "epoch": 2.45, "grad_norm": 4.730687618255615, "learning_rate": 3.813826248423645e-06, "loss": 0.2271, "step": 249025 }, { "epoch": 2.45, "grad_norm": 6.832183837890625, "learning_rate": 3.813702125969397e-06, "loss": 0.1419, "step": 249050 }, { "epoch": 2.45, "grad_norm": 3.391972780227661, "learning_rate": 3.8135780035151482e-06, "loss": 0.1957, "step": 249075 }, { "epoch": 2.45, "grad_norm": 14.808690071105957, "learning_rate": 3.8134538810608994e-06, "loss": 0.1394, "step": 249100 }, { "epoch": 2.45, "grad_norm": 5.266613483428955, "learning_rate": 3.8133297586066515e-06, "loss": 0.2352, "step": 249125 }, { "epoch": 2.45, "grad_norm": 9.87196159362793, "learning_rate": 3.8132056361524027e-06, "loss": 0.1322, "step": 249150 }, { "epoch": 2.45, "grad_norm": 3.016446828842163, "learning_rate": 3.8130815136981543e-06, "loss": 0.1812, "step": 249175 }, { "epoch": 2.45, "grad_norm": 14.533024787902832, "learning_rate": 3.812957391243906e-06, "loss": 0.1401, "step": 249200 }, { "epoch": 2.45, "grad_norm": 4.039854049682617, "learning_rate": 3.8128332687896576e-06, "loss": 0.2096, "step": 249225 }, { "epoch": 2.45, "grad_norm": 7.011066436767578, "learning_rate": 3.812709146335409e-06, "loss": 0.139, "step": 249250 }, { "epoch": 2.45, "grad_norm": 3.943293333053589, "learning_rate": 3.812585023881161e-06, "loss": 0.1981, "step": 249275 }, { "epoch": 2.45, "grad_norm": 5.409204483032227, "learning_rate": 3.812460901426912e-06, "loss": 0.1217, "step": 249300 }, { "epoch": 2.45, "grad_norm": 6.279405117034912, "learning_rate": 3.8123367789726633e-06, "loss": 0.198, "step": 249325 }, { "epoch": 2.45, "grad_norm": 16.328397750854492, "learning_rate": 3.8122126565184154e-06, "loss": 0.1395, "step": 249350 }, { "epoch": 2.45, "grad_norm": 7.386768817901611, "learning_rate": 3.8120885340641666e-06, "loss": 0.2044, "step": 249375 }, { "epoch": 2.45, "grad_norm": 6.119364261627197, "learning_rate": 3.811964411609918e-06, "loss": 0.1125, "step": 249400 }, { "epoch": 2.45, "grad_norm": 5.212466716766357, "learning_rate": 3.81184028915567e-06, "loss": 0.2418, "step": 249425 }, { "epoch": 2.45, "grad_norm": 6.812500953674316, "learning_rate": 3.8117161667014215e-06, "loss": 0.1141, "step": 249450 }, { "epoch": 2.45, "grad_norm": 2.7403273582458496, "learning_rate": 3.8115920442471727e-06, "loss": 0.1968, "step": 249475 }, { "epoch": 2.45, "grad_norm": 10.033851623535156, "learning_rate": 3.8114679217929247e-06, "loss": 0.1631, "step": 249500 }, { "epoch": 2.45, "grad_norm": 4.411059379577637, "learning_rate": 3.811343799338676e-06, "loss": 0.2181, "step": 249525 }, { "epoch": 2.45, "grad_norm": 12.16917896270752, "learning_rate": 3.811219676884427e-06, "loss": 0.1271, "step": 249550 }, { "epoch": 2.45, "grad_norm": 2.9108738899230957, "learning_rate": 3.8110955544301792e-06, "loss": 0.1866, "step": 249575 }, { "epoch": 2.45, "grad_norm": 5.999599933624268, "learning_rate": 3.8109714319759304e-06, "loss": 0.1117, "step": 249600 }, { "epoch": 2.45, "grad_norm": 2.253678321838379, "learning_rate": 3.810847309521682e-06, "loss": 0.2019, "step": 249625 }, { "epoch": 2.45, "grad_norm": 14.385075569152832, "learning_rate": 3.8107231870674337e-06, "loss": 0.1302, "step": 249650 }, { "epoch": 2.45, "grad_norm": 2.6849570274353027, "learning_rate": 3.8105990646131853e-06, "loss": 0.218, "step": 249675 }, { "epoch": 2.46, "grad_norm": 12.874343872070312, "learning_rate": 3.8104749421589366e-06, "loss": 0.1354, "step": 249700 }, { "epoch": 2.46, "grad_norm": 5.766697883605957, "learning_rate": 3.8103508197046878e-06, "loss": 0.2457, "step": 249725 }, { "epoch": 2.46, "grad_norm": 20.44243049621582, "learning_rate": 3.81022669725044e-06, "loss": 0.1578, "step": 249750 }, { "epoch": 2.46, "grad_norm": 3.038510799407959, "learning_rate": 3.810102574796191e-06, "loss": 0.2346, "step": 249775 }, { "epoch": 2.46, "grad_norm": 10.393956184387207, "learning_rate": 3.8099784523419427e-06, "loss": 0.1153, "step": 249800 }, { "epoch": 2.46, "grad_norm": 5.516042232513428, "learning_rate": 3.8098543298876943e-06, "loss": 0.2311, "step": 249825 }, { "epoch": 2.46, "grad_norm": 8.860620498657227, "learning_rate": 3.809730207433446e-06, "loss": 0.1383, "step": 249850 }, { "epoch": 2.46, "grad_norm": 3.3523223400115967, "learning_rate": 3.809606084979197e-06, "loss": 0.2717, "step": 249875 }, { "epoch": 2.46, "grad_norm": 10.801946640014648, "learning_rate": 3.809481962524949e-06, "loss": 0.1355, "step": 249900 }, { "epoch": 2.46, "grad_norm": 3.1027395725250244, "learning_rate": 3.8093578400707004e-06, "loss": 0.1917, "step": 249925 }, { "epoch": 2.46, "grad_norm": 14.241360664367676, "learning_rate": 3.8092337176164516e-06, "loss": 0.1183, "step": 249950 }, { "epoch": 2.46, "grad_norm": 4.7233500480651855, "learning_rate": 3.8091095951622037e-06, "loss": 0.2074, "step": 249975 }, { "epoch": 2.46, "grad_norm": 7.4902448654174805, "learning_rate": 3.808985472707955e-06, "loss": 0.1292, "step": 250000 }, { "epoch": 2.46, "grad_norm": 5.5155134201049805, "learning_rate": 3.8088613502537065e-06, "loss": 0.176, "step": 250025 }, { "epoch": 2.46, "grad_norm": 14.970361709594727, "learning_rate": 3.808737227799458e-06, "loss": 0.1279, "step": 250050 }, { "epoch": 2.46, "grad_norm": 2.782400131225586, "learning_rate": 3.80861310534521e-06, "loss": 0.2426, "step": 250075 }, { "epoch": 2.46, "grad_norm": 10.917407989501953, "learning_rate": 3.808488982890961e-06, "loss": 0.1307, "step": 250100 }, { "epoch": 2.46, "grad_norm": 2.251694679260254, "learning_rate": 3.808364860436713e-06, "loss": 0.1932, "step": 250125 }, { "epoch": 2.46, "grad_norm": 6.549339294433594, "learning_rate": 3.8082407379824643e-06, "loss": 0.1011, "step": 250150 }, { "epoch": 2.46, "grad_norm": 4.056845664978027, "learning_rate": 3.8081166155282155e-06, "loss": 0.2743, "step": 250175 }, { "epoch": 2.46, "grad_norm": 10.820155143737793, "learning_rate": 3.8079924930739676e-06, "loss": 0.1258, "step": 250200 }, { "epoch": 2.46, "grad_norm": 2.691725492477417, "learning_rate": 3.8078683706197188e-06, "loss": 0.1855, "step": 250225 }, { "epoch": 2.46, "grad_norm": 8.652894020080566, "learning_rate": 3.8077442481654704e-06, "loss": 0.1471, "step": 250250 }, { "epoch": 2.46, "grad_norm": 1.3499770164489746, "learning_rate": 3.807620125711222e-06, "loss": 0.2334, "step": 250275 }, { "epoch": 2.46, "grad_norm": 8.555264472961426, "learning_rate": 3.8074960032569737e-06, "loss": 0.1375, "step": 250300 }, { "epoch": 2.46, "grad_norm": 2.730320930480957, "learning_rate": 3.807371880802725e-06, "loss": 0.2255, "step": 250325 }, { "epoch": 2.46, "grad_norm": 9.228438377380371, "learning_rate": 3.807247758348477e-06, "loss": 0.1382, "step": 250350 }, { "epoch": 2.46, "grad_norm": 4.129976272583008, "learning_rate": 3.807123635894228e-06, "loss": 0.1907, "step": 250375 }, { "epoch": 2.46, "grad_norm": 8.642457962036133, "learning_rate": 3.8069995134399794e-06, "loss": 0.1266, "step": 250400 }, { "epoch": 2.46, "grad_norm": 4.783653259277344, "learning_rate": 3.8068753909857314e-06, "loss": 0.2263, "step": 250425 }, { "epoch": 2.46, "grad_norm": 16.399682998657227, "learning_rate": 3.8067512685314826e-06, "loss": 0.1072, "step": 250450 }, { "epoch": 2.46, "grad_norm": 3.6879947185516357, "learning_rate": 3.8066271460772343e-06, "loss": 0.2232, "step": 250475 }, { "epoch": 2.46, "grad_norm": 12.754199028015137, "learning_rate": 3.806503023622986e-06, "loss": 0.1095, "step": 250500 }, { "epoch": 2.46, "grad_norm": 3.2012503147125244, "learning_rate": 3.8063789011687375e-06, "loss": 0.2184, "step": 250525 }, { "epoch": 2.46, "grad_norm": 8.908656120300293, "learning_rate": 3.8062547787144888e-06, "loss": 0.132, "step": 250550 }, { "epoch": 2.46, "grad_norm": 4.841717720031738, "learning_rate": 3.80613065626024e-06, "loss": 0.2485, "step": 250575 }, { "epoch": 2.46, "grad_norm": 8.028861999511719, "learning_rate": 3.806006533805992e-06, "loss": 0.1497, "step": 250600 }, { "epoch": 2.46, "grad_norm": 0.3405967056751251, "learning_rate": 3.8058824113517432e-06, "loss": 0.2216, "step": 250625 }, { "epoch": 2.46, "grad_norm": 7.308781147003174, "learning_rate": 3.805758288897495e-06, "loss": 0.0929, "step": 250650 }, { "epoch": 2.46, "grad_norm": 0.18816450238227844, "learning_rate": 3.8056341664432465e-06, "loss": 0.2146, "step": 250675 }, { "epoch": 2.46, "grad_norm": 13.562821388244629, "learning_rate": 3.805510043988998e-06, "loss": 0.1366, "step": 250700 }, { "epoch": 2.47, "grad_norm": 2.1495935916900635, "learning_rate": 3.8053859215347494e-06, "loss": 0.2523, "step": 250725 }, { "epoch": 2.47, "grad_norm": 15.039835929870605, "learning_rate": 3.8052617990805014e-06, "loss": 0.1298, "step": 250750 }, { "epoch": 2.47, "grad_norm": 4.8648200035095215, "learning_rate": 3.8051376766262526e-06, "loss": 0.2159, "step": 250775 }, { "epoch": 2.47, "grad_norm": 6.925776481628418, "learning_rate": 3.805013554172004e-06, "loss": 0.1181, "step": 250800 }, { "epoch": 2.47, "grad_norm": 4.417579650878906, "learning_rate": 3.804889431717756e-06, "loss": 0.2902, "step": 250825 }, { "epoch": 2.47, "grad_norm": 11.036972045898438, "learning_rate": 3.804765309263507e-06, "loss": 0.1106, "step": 250850 }, { "epoch": 2.47, "grad_norm": 5.008864402770996, "learning_rate": 3.8046411868092587e-06, "loss": 0.2104, "step": 250875 }, { "epoch": 2.47, "grad_norm": 12.441555976867676, "learning_rate": 3.8045170643550104e-06, "loss": 0.1437, "step": 250900 }, { "epoch": 2.47, "grad_norm": 11.869491577148438, "learning_rate": 3.804392941900762e-06, "loss": 0.219, "step": 250925 }, { "epoch": 2.47, "grad_norm": 7.583714008331299, "learning_rate": 3.8042688194465132e-06, "loss": 0.1264, "step": 250950 }, { "epoch": 2.47, "grad_norm": 4.584748268127441, "learning_rate": 3.8041446969922653e-06, "loss": 0.2414, "step": 250975 }, { "epoch": 2.47, "grad_norm": 13.701995849609375, "learning_rate": 3.8040205745380165e-06, "loss": 0.1601, "step": 251000 }, { "epoch": 2.47, "grad_norm": 5.005277633666992, "learning_rate": 3.8039014169819384e-06, "loss": 0.228, "step": 251025 }, { "epoch": 2.47, "grad_norm": 8.764620780944824, "learning_rate": 3.8037772945276896e-06, "loss": 0.0964, "step": 251050 }, { "epoch": 2.47, "grad_norm": 8.439142227172852, "learning_rate": 3.803653172073441e-06, "loss": 0.2123, "step": 251075 }, { "epoch": 2.47, "grad_norm": 14.447528839111328, "learning_rate": 3.803529049619193e-06, "loss": 0.12, "step": 251100 }, { "epoch": 2.47, "grad_norm": 4.737624168395996, "learning_rate": 3.803404927164944e-06, "loss": 0.2136, "step": 251125 }, { "epoch": 2.47, "grad_norm": 3.941830635070801, "learning_rate": 3.8032808047106957e-06, "loss": 0.0819, "step": 251150 }, { "epoch": 2.47, "grad_norm": 3.7722043991088867, "learning_rate": 3.803156682256447e-06, "loss": 0.2458, "step": 251175 }, { "epoch": 2.47, "grad_norm": 8.657825469970703, "learning_rate": 3.803032559802199e-06, "loss": 0.1392, "step": 251200 }, { "epoch": 2.47, "grad_norm": 3.6344618797302246, "learning_rate": 3.80290843734795e-06, "loss": 0.2281, "step": 251225 }, { "epoch": 2.47, "grad_norm": 13.869247436523438, "learning_rate": 3.8027843148937014e-06, "loss": 0.1693, "step": 251250 }, { "epoch": 2.47, "grad_norm": 4.5691118240356445, "learning_rate": 3.8026601924394534e-06, "loss": 0.226, "step": 251275 }, { "epoch": 2.47, "grad_norm": 18.397218704223633, "learning_rate": 3.8025360699852047e-06, "loss": 0.137, "step": 251300 }, { "epoch": 2.47, "grad_norm": 5.2482805252075195, "learning_rate": 3.8024119475309563e-06, "loss": 0.2147, "step": 251325 }, { "epoch": 2.47, "grad_norm": 9.05126953125, "learning_rate": 3.802287825076708e-06, "loss": 0.1257, "step": 251350 }, { "epoch": 2.47, "grad_norm": 3.718916177749634, "learning_rate": 3.8021637026224596e-06, "loss": 0.2106, "step": 251375 }, { "epoch": 2.47, "grad_norm": 9.644620895385742, "learning_rate": 3.8020395801682108e-06, "loss": 0.1015, "step": 251400 }, { "epoch": 2.47, "grad_norm": 5.943514823913574, "learning_rate": 3.801915457713963e-06, "loss": 0.2297, "step": 251425 }, { "epoch": 2.47, "grad_norm": 12.443414688110352, "learning_rate": 3.801791335259714e-06, "loss": 0.1227, "step": 251450 }, { "epoch": 2.47, "grad_norm": 4.216142177581787, "learning_rate": 3.8016672128054657e-06, "loss": 0.2022, "step": 251475 }, { "epoch": 2.47, "grad_norm": 15.469244956970215, "learning_rate": 3.8015430903512173e-06, "loss": 0.134, "step": 251500 }, { "epoch": 2.47, "grad_norm": 2.9268176555633545, "learning_rate": 3.801418967896969e-06, "loss": 0.217, "step": 251525 }, { "epoch": 2.47, "grad_norm": 5.328402042388916, "learning_rate": 3.80129484544272e-06, "loss": 0.1258, "step": 251550 }, { "epoch": 2.47, "grad_norm": 0.3901703655719757, "learning_rate": 3.801170722988472e-06, "loss": 0.2069, "step": 251575 }, { "epoch": 2.47, "grad_norm": 14.167133331298828, "learning_rate": 3.8010466005342234e-06, "loss": 0.134, "step": 251600 }, { "epoch": 2.47, "grad_norm": 5.269737243652344, "learning_rate": 3.8009224780799746e-06, "loss": 0.2189, "step": 251625 }, { "epoch": 2.47, "grad_norm": 9.44375228881836, "learning_rate": 3.8007983556257267e-06, "loss": 0.124, "step": 251650 }, { "epoch": 2.47, "grad_norm": 4.45367431640625, "learning_rate": 3.800674233171478e-06, "loss": 0.2119, "step": 251675 }, { "epoch": 2.47, "grad_norm": 9.167139053344727, "learning_rate": 3.8005501107172295e-06, "loss": 0.1115, "step": 251700 }, { "epoch": 2.48, "grad_norm": 6.079698085784912, "learning_rate": 3.800425988262981e-06, "loss": 0.171, "step": 251725 }, { "epoch": 2.48, "grad_norm": 8.685187339782715, "learning_rate": 3.800301865808733e-06, "loss": 0.1168, "step": 251750 }, { "epoch": 2.48, "grad_norm": 3.745327949523926, "learning_rate": 3.800177743354484e-06, "loss": 0.2078, "step": 251775 }, { "epoch": 2.48, "grad_norm": 10.622971534729004, "learning_rate": 3.800053620900236e-06, "loss": 0.1229, "step": 251800 }, { "epoch": 2.48, "grad_norm": 1.2461729049682617, "learning_rate": 3.7999294984459873e-06, "loss": 0.2188, "step": 251825 }, { "epoch": 2.48, "grad_norm": 6.124009132385254, "learning_rate": 3.7998053759917385e-06, "loss": 0.1446, "step": 251850 }, { "epoch": 2.48, "grad_norm": 5.478132247924805, "learning_rate": 3.7996812535374906e-06, "loss": 0.2371, "step": 251875 }, { "epoch": 2.48, "grad_norm": 10.406662940979004, "learning_rate": 3.7995571310832418e-06, "loss": 0.1441, "step": 251900 }, { "epoch": 2.48, "grad_norm": 4.841222286224365, "learning_rate": 3.7994330086289934e-06, "loss": 0.2159, "step": 251925 }, { "epoch": 2.48, "grad_norm": 12.357189178466797, "learning_rate": 3.799308886174745e-06, "loss": 0.1212, "step": 251950 }, { "epoch": 2.48, "grad_norm": 4.770321846008301, "learning_rate": 3.7991847637204967e-06, "loss": 0.2179, "step": 251975 }, { "epoch": 2.48, "grad_norm": 11.199678421020508, "learning_rate": 3.799060641266248e-06, "loss": 0.1567, "step": 252000 }, { "epoch": 2.48, "grad_norm": 3.6112587451934814, "learning_rate": 3.798936518811999e-06, "loss": 0.2497, "step": 252025 }, { "epoch": 2.48, "grad_norm": 9.817567825317383, "learning_rate": 3.798812396357751e-06, "loss": 0.1417, "step": 252050 }, { "epoch": 2.48, "grad_norm": 8.298874855041504, "learning_rate": 3.7986882739035024e-06, "loss": 0.2242, "step": 252075 }, { "epoch": 2.48, "grad_norm": 11.675538063049316, "learning_rate": 3.798564151449254e-06, "loss": 0.1751, "step": 252100 }, { "epoch": 2.48, "grad_norm": 4.2634148597717285, "learning_rate": 3.7984400289950056e-06, "loss": 0.2122, "step": 252125 }, { "epoch": 2.48, "grad_norm": 12.543381690979004, "learning_rate": 3.7983159065407573e-06, "loss": 0.1233, "step": 252150 }, { "epoch": 2.48, "grad_norm": 3.483350992202759, "learning_rate": 3.7981917840865085e-06, "loss": 0.1605, "step": 252175 }, { "epoch": 2.48, "grad_norm": 8.310179710388184, "learning_rate": 3.7980676616322606e-06, "loss": 0.1244, "step": 252200 }, { "epoch": 2.48, "grad_norm": 5.489602088928223, "learning_rate": 3.7979435391780118e-06, "loss": 0.1881, "step": 252225 }, { "epoch": 2.48, "grad_norm": 13.139788627624512, "learning_rate": 3.797819416723763e-06, "loss": 0.147, "step": 252250 }, { "epoch": 2.48, "grad_norm": 2.6238973140716553, "learning_rate": 3.797695294269515e-06, "loss": 0.1949, "step": 252275 }, { "epoch": 2.48, "grad_norm": 5.424773216247559, "learning_rate": 3.7975711718152662e-06, "loss": 0.1317, "step": 252300 }, { "epoch": 2.48, "grad_norm": 1.943784236907959, "learning_rate": 3.797447049361018e-06, "loss": 0.1946, "step": 252325 }, { "epoch": 2.48, "grad_norm": 13.48548698425293, "learning_rate": 3.7973229269067695e-06, "loss": 0.1322, "step": 252350 }, { "epoch": 2.48, "grad_norm": 5.061173439025879, "learning_rate": 3.797198804452521e-06, "loss": 0.2269, "step": 252375 }, { "epoch": 2.48, "grad_norm": 17.040464401245117, "learning_rate": 3.7970746819982724e-06, "loss": 0.1338, "step": 252400 }, { "epoch": 2.48, "grad_norm": 5.221770763397217, "learning_rate": 3.7969505595440244e-06, "loss": 0.2106, "step": 252425 }, { "epoch": 2.48, "grad_norm": 14.232245445251465, "learning_rate": 3.7968264370897756e-06, "loss": 0.1351, "step": 252450 }, { "epoch": 2.48, "grad_norm": 2.7485568523406982, "learning_rate": 3.796702314635527e-06, "loss": 0.21, "step": 252475 }, { "epoch": 2.48, "grad_norm": 23.012998580932617, "learning_rate": 3.796578192181279e-06, "loss": 0.1674, "step": 252500 }, { "epoch": 2.48, "grad_norm": 3.3585190773010254, "learning_rate": 3.79645406972703e-06, "loss": 0.2318, "step": 252525 }, { "epoch": 2.48, "grad_norm": 12.231101036071777, "learning_rate": 3.7963299472727817e-06, "loss": 0.1302, "step": 252550 }, { "epoch": 2.48, "grad_norm": 4.8900980949401855, "learning_rate": 3.7962058248185334e-06, "loss": 0.2493, "step": 252575 }, { "epoch": 2.48, "grad_norm": 10.967719078063965, "learning_rate": 3.796081702364285e-06, "loss": 0.1412, "step": 252600 }, { "epoch": 2.48, "grad_norm": 5.948084831237793, "learning_rate": 3.7959575799100362e-06, "loss": 0.2518, "step": 252625 }, { "epoch": 2.48, "grad_norm": 14.406489372253418, "learning_rate": 3.7958334574557883e-06, "loss": 0.185, "step": 252650 }, { "epoch": 2.48, "grad_norm": 0.3031902611255646, "learning_rate": 3.7957093350015395e-06, "loss": 0.1997, "step": 252675 }, { "epoch": 2.48, "grad_norm": 8.927108764648438, "learning_rate": 3.7955852125472907e-06, "loss": 0.1292, "step": 252700 }, { "epoch": 2.48, "grad_norm": 4.488778114318848, "learning_rate": 3.7954610900930428e-06, "loss": 0.25, "step": 252725 }, { "epoch": 2.49, "grad_norm": 13.270501136779785, "learning_rate": 3.795336967638794e-06, "loss": 0.1118, "step": 252750 }, { "epoch": 2.49, "grad_norm": 3.7323567867279053, "learning_rate": 3.7952128451845456e-06, "loss": 0.2031, "step": 252775 }, { "epoch": 2.49, "grad_norm": 14.606219291687012, "learning_rate": 3.7950887227302972e-06, "loss": 0.1318, "step": 252800 }, { "epoch": 2.49, "grad_norm": 1.3378487825393677, "learning_rate": 3.794964600276049e-06, "loss": 0.1995, "step": 252825 }, { "epoch": 2.49, "grad_norm": 16.87533187866211, "learning_rate": 3.7948404778218e-06, "loss": 0.1244, "step": 252850 }, { "epoch": 2.49, "grad_norm": 6.37476110458374, "learning_rate": 3.7947163553675513e-06, "loss": 0.2021, "step": 252875 }, { "epoch": 2.49, "grad_norm": 9.262884140014648, "learning_rate": 3.7945922329133034e-06, "loss": 0.1443, "step": 252900 }, { "epoch": 2.49, "grad_norm": 3.4173073768615723, "learning_rate": 3.7944681104590546e-06, "loss": 0.2106, "step": 252925 }, { "epoch": 2.49, "grad_norm": 7.946659564971924, "learning_rate": 3.7943439880048062e-06, "loss": 0.1023, "step": 252950 }, { "epoch": 2.49, "grad_norm": 5.240279197692871, "learning_rate": 3.794219865550558e-06, "loss": 0.2293, "step": 252975 }, { "epoch": 2.49, "grad_norm": 9.440174102783203, "learning_rate": 3.7940957430963095e-06, "loss": 0.0879, "step": 253000 }, { "epoch": 2.49, "grad_norm": 2.7835824489593506, "learning_rate": 3.7939716206420607e-06, "loss": 0.1911, "step": 253025 }, { "epoch": 2.49, "grad_norm": 11.18395709991455, "learning_rate": 3.7938474981878128e-06, "loss": 0.1711, "step": 253050 }, { "epoch": 2.49, "grad_norm": 2.3037853240966797, "learning_rate": 3.7937283406317338e-06, "loss": 0.2232, "step": 253075 }, { "epoch": 2.49, "grad_norm": 11.256714820861816, "learning_rate": 3.793604218177486e-06, "loss": 0.1328, "step": 253100 }, { "epoch": 2.49, "grad_norm": 3.9980359077453613, "learning_rate": 3.793480095723237e-06, "loss": 0.2148, "step": 253125 }, { "epoch": 2.49, "grad_norm": 11.4444580078125, "learning_rate": 3.7933559732689883e-06, "loss": 0.1199, "step": 253150 }, { "epoch": 2.49, "grad_norm": 4.270406723022461, "learning_rate": 3.7932318508147403e-06, "loss": 0.2266, "step": 253175 }, { "epoch": 2.49, "grad_norm": 11.892337799072266, "learning_rate": 3.7931077283604915e-06, "loss": 0.1271, "step": 253200 }, { "epoch": 2.49, "grad_norm": 8.177929878234863, "learning_rate": 3.792983605906243e-06, "loss": 0.2408, "step": 253225 }, { "epoch": 2.49, "grad_norm": 8.182223320007324, "learning_rate": 3.792859483451995e-06, "loss": 0.1475, "step": 253250 }, { "epoch": 2.49, "grad_norm": 2.5754857063293457, "learning_rate": 3.7927353609977464e-06, "loss": 0.1751, "step": 253275 }, { "epoch": 2.49, "grad_norm": 11.244436264038086, "learning_rate": 3.7926112385434976e-06, "loss": 0.133, "step": 253300 }, { "epoch": 2.49, "grad_norm": 4.837428569793701, "learning_rate": 3.7924871160892497e-06, "loss": 0.1917, "step": 253325 }, { "epoch": 2.49, "grad_norm": 8.276856422424316, "learning_rate": 3.792362993635001e-06, "loss": 0.1193, "step": 253350 }, { "epoch": 2.49, "grad_norm": 5.24970006942749, "learning_rate": 3.792238871180752e-06, "loss": 0.2082, "step": 253375 }, { "epoch": 2.49, "grad_norm": 11.365180969238281, "learning_rate": 3.792114748726504e-06, "loss": 0.1413, "step": 253400 }, { "epoch": 2.49, "grad_norm": 2.0706098079681396, "learning_rate": 3.7919906262722554e-06, "loss": 0.2065, "step": 253425 }, { "epoch": 2.49, "grad_norm": 10.113486289978027, "learning_rate": 3.791866503818007e-06, "loss": 0.1464, "step": 253450 }, { "epoch": 2.49, "grad_norm": 2.9117414951324463, "learning_rate": 3.7917423813637582e-06, "loss": 0.2274, "step": 253475 }, { "epoch": 2.49, "grad_norm": 7.2680206298828125, "learning_rate": 3.7916182589095103e-06, "loss": 0.1037, "step": 253500 }, { "epoch": 2.49, "grad_norm": 4.498603820800781, "learning_rate": 3.7914941364552615e-06, "loss": 0.2045, "step": 253525 }, { "epoch": 2.49, "grad_norm": 12.937247276306152, "learning_rate": 3.7913700140010127e-06, "loss": 0.1463, "step": 253550 }, { "epoch": 2.49, "grad_norm": 2.6515021324157715, "learning_rate": 3.7912458915467648e-06, "loss": 0.2474, "step": 253575 }, { "epoch": 2.49, "grad_norm": 13.95267105102539, "learning_rate": 3.791121769092516e-06, "loss": 0.1322, "step": 253600 }, { "epoch": 2.49, "grad_norm": 3.6868081092834473, "learning_rate": 3.7909976466382676e-06, "loss": 0.2219, "step": 253625 }, { "epoch": 2.49, "grad_norm": 7.35267448425293, "learning_rate": 3.7908735241840193e-06, "loss": 0.1194, "step": 253650 }, { "epoch": 2.49, "grad_norm": 5.91545295715332, "learning_rate": 3.790749401729771e-06, "loss": 0.2074, "step": 253675 }, { "epoch": 2.49, "grad_norm": 14.688565254211426, "learning_rate": 3.790625279275522e-06, "loss": 0.139, "step": 253700 }, { "epoch": 2.49, "grad_norm": 3.273228645324707, "learning_rate": 3.790501156821274e-06, "loss": 0.1802, "step": 253725 }, { "epoch": 2.49, "grad_norm": 8.497468948364258, "learning_rate": 3.7903770343670254e-06, "loss": 0.1405, "step": 253750 }, { "epoch": 2.5, "grad_norm": 3.2631542682647705, "learning_rate": 3.7902529119127766e-06, "loss": 0.2202, "step": 253775 }, { "epoch": 2.5, "grad_norm": 16.243186950683594, "learning_rate": 3.7901287894585287e-06, "loss": 0.1745, "step": 253800 }, { "epoch": 2.5, "grad_norm": 5.938221454620361, "learning_rate": 3.79000466700428e-06, "loss": 0.2375, "step": 253825 }, { "epoch": 2.5, "grad_norm": 9.78365707397461, "learning_rate": 3.7898805445500315e-06, "loss": 0.1206, "step": 253850 }, { "epoch": 2.5, "grad_norm": 5.078273773193359, "learning_rate": 3.789756422095783e-06, "loss": 0.2325, "step": 253875 }, { "epoch": 2.5, "grad_norm": 12.086729049682617, "learning_rate": 3.7896322996415348e-06, "loss": 0.141, "step": 253900 }, { "epoch": 2.5, "grad_norm": 5.347399711608887, "learning_rate": 3.789508177187286e-06, "loss": 0.2323, "step": 253925 }, { "epoch": 2.5, "grad_norm": 9.713937759399414, "learning_rate": 3.789384054733038e-06, "loss": 0.1508, "step": 253950 }, { "epoch": 2.5, "grad_norm": 2.853477716445923, "learning_rate": 3.7892599322787893e-06, "loss": 0.2064, "step": 253975 }, { "epoch": 2.5, "grad_norm": 12.612770080566406, "learning_rate": 3.7891358098245405e-06, "loss": 0.1431, "step": 254000 }, { "epoch": 2.5, "grad_norm": 5.235032081604004, "learning_rate": 3.7890116873702925e-06, "loss": 0.2156, "step": 254025 }, { "epoch": 2.5, "grad_norm": 20.963226318359375, "learning_rate": 3.7888875649160437e-06, "loss": 0.1284, "step": 254050 }, { "epoch": 2.5, "grad_norm": 6.598414897918701, "learning_rate": 3.7887634424617954e-06, "loss": 0.1961, "step": 254075 }, { "epoch": 2.5, "grad_norm": 11.204341888427734, "learning_rate": 3.788639320007547e-06, "loss": 0.1523, "step": 254100 }, { "epoch": 2.5, "grad_norm": 0.22206510603427887, "learning_rate": 3.7885151975532986e-06, "loss": 0.2149, "step": 254125 }, { "epoch": 2.5, "grad_norm": 21.354995727539062, "learning_rate": 3.78839107509905e-06, "loss": 0.1784, "step": 254150 }, { "epoch": 2.5, "grad_norm": 1.6025587320327759, "learning_rate": 3.788266952644802e-06, "loss": 0.2344, "step": 254175 }, { "epoch": 2.5, "grad_norm": 11.24300765991211, "learning_rate": 3.788142830190553e-06, "loss": 0.1318, "step": 254200 }, { "epoch": 2.5, "grad_norm": 3.573837995529175, "learning_rate": 3.7880187077363043e-06, "loss": 0.1883, "step": 254225 }, { "epoch": 2.5, "grad_norm": 10.219364166259766, "learning_rate": 3.7878945852820564e-06, "loss": 0.1375, "step": 254250 }, { "epoch": 2.5, "grad_norm": 6.193301677703857, "learning_rate": 3.7877704628278076e-06, "loss": 0.2292, "step": 254275 }, { "epoch": 2.5, "grad_norm": 10.535426139831543, "learning_rate": 3.7876463403735592e-06, "loss": 0.1461, "step": 254300 }, { "epoch": 2.5, "grad_norm": 0.688090980052948, "learning_rate": 3.7875222179193104e-06, "loss": 0.2011, "step": 254325 }, { "epoch": 2.5, "grad_norm": 16.991731643676758, "learning_rate": 3.7873980954650625e-06, "loss": 0.114, "step": 254350 }, { "epoch": 2.5, "grad_norm": 3.078298568725586, "learning_rate": 3.7872739730108137e-06, "loss": 0.1775, "step": 254375 }, { "epoch": 2.5, "grad_norm": 11.655817031860352, "learning_rate": 3.7871498505565654e-06, "loss": 0.109, "step": 254400 }, { "epoch": 2.5, "grad_norm": 6.872893810272217, "learning_rate": 3.787025728102317e-06, "loss": 0.219, "step": 254425 }, { "epoch": 2.5, "grad_norm": 13.73558521270752, "learning_rate": 3.7869016056480686e-06, "loss": 0.1524, "step": 254450 }, { "epoch": 2.5, "grad_norm": 5.155332565307617, "learning_rate": 3.78677748319382e-06, "loss": 0.2352, "step": 254475 }, { "epoch": 2.5, "grad_norm": 4.297087669372559, "learning_rate": 3.786653360739572e-06, "loss": 0.1351, "step": 254500 }, { "epoch": 2.5, "grad_norm": 5.14694356918335, "learning_rate": 3.786529238285323e-06, "loss": 0.1918, "step": 254525 }, { "epoch": 2.5, "grad_norm": 10.524712562561035, "learning_rate": 3.7864051158310743e-06, "loss": 0.1541, "step": 254550 }, { "epoch": 2.5, "grad_norm": 5.3148603439331055, "learning_rate": 3.7862809933768264e-06, "loss": 0.218, "step": 254575 }, { "epoch": 2.5, "grad_norm": 12.551168441772461, "learning_rate": 3.7861568709225776e-06, "loss": 0.147, "step": 254600 }, { "epoch": 2.5, "grad_norm": 2.9637045860290527, "learning_rate": 3.7860327484683292e-06, "loss": 0.226, "step": 254625 }, { "epoch": 2.5, "grad_norm": 10.831130981445312, "learning_rate": 3.785908626014081e-06, "loss": 0.1328, "step": 254650 }, { "epoch": 2.5, "grad_norm": 5.0914154052734375, "learning_rate": 3.7857845035598325e-06, "loss": 0.2308, "step": 254675 }, { "epoch": 2.5, "grad_norm": 2.1792478561401367, "learning_rate": 3.7856603811055837e-06, "loss": 0.1516, "step": 254700 }, { "epoch": 2.5, "grad_norm": 3.9898805618286133, "learning_rate": 3.7855362586513358e-06, "loss": 0.197, "step": 254725 }, { "epoch": 2.5, "grad_norm": 8.197566986083984, "learning_rate": 3.785412136197087e-06, "loss": 0.1443, "step": 254750 }, { "epoch": 2.5, "grad_norm": 4.029721736907959, "learning_rate": 3.785288013742838e-06, "loss": 0.2041, "step": 254775 }, { "epoch": 2.51, "grad_norm": 12.418191909790039, "learning_rate": 3.7851638912885902e-06, "loss": 0.1353, "step": 254800 }, { "epoch": 2.51, "grad_norm": 2.9744436740875244, "learning_rate": 3.7850397688343415e-06, "loss": 0.2143, "step": 254825 }, { "epoch": 2.51, "grad_norm": 7.45154333114624, "learning_rate": 3.784915646380093e-06, "loss": 0.1079, "step": 254850 }, { "epoch": 2.51, "grad_norm": 1.4122484922409058, "learning_rate": 3.7847915239258447e-06, "loss": 0.2062, "step": 254875 }, { "epoch": 2.51, "grad_norm": 14.993205070495605, "learning_rate": 3.7846674014715964e-06, "loss": 0.1431, "step": 254900 }, { "epoch": 2.51, "grad_norm": 1.113391637802124, "learning_rate": 3.7845432790173476e-06, "loss": 0.1946, "step": 254925 }, { "epoch": 2.51, "grad_norm": 11.019282341003418, "learning_rate": 3.7844191565630996e-06, "loss": 0.1048, "step": 254950 }, { "epoch": 2.51, "grad_norm": 3.6592087745666504, "learning_rate": 3.784295034108851e-06, "loss": 0.1746, "step": 254975 }, { "epoch": 2.51, "grad_norm": 15.175031661987305, "learning_rate": 3.784170911654602e-06, "loss": 0.1431, "step": 255000 }, { "epoch": 2.51, "grad_norm": 2.161012649536133, "learning_rate": 3.784046789200354e-06, "loss": 0.2073, "step": 255025 }, { "epoch": 2.51, "grad_norm": 10.58542251586914, "learning_rate": 3.7839226667461053e-06, "loss": 0.1505, "step": 255050 }, { "epoch": 2.51, "grad_norm": 2.96956205368042, "learning_rate": 3.783798544291857e-06, "loss": 0.2663, "step": 255075 }, { "epoch": 2.51, "grad_norm": 10.574209213256836, "learning_rate": 3.7836744218376086e-06, "loss": 0.1208, "step": 255100 }, { "epoch": 2.51, "grad_norm": 5.859383583068848, "learning_rate": 3.7835502993833602e-06, "loss": 0.2032, "step": 255125 }, { "epoch": 2.51, "grad_norm": 12.8696870803833, "learning_rate": 3.7834261769291114e-06, "loss": 0.1233, "step": 255150 }, { "epoch": 2.51, "grad_norm": 4.433146953582764, "learning_rate": 3.7833020544748626e-06, "loss": 0.2336, "step": 255175 }, { "epoch": 2.51, "grad_norm": 15.244584083557129, "learning_rate": 3.7831779320206147e-06, "loss": 0.164, "step": 255200 }, { "epoch": 2.51, "grad_norm": 5.941529750823975, "learning_rate": 3.783053809566366e-06, "loss": 0.226, "step": 255225 }, { "epoch": 2.51, "grad_norm": 8.277892112731934, "learning_rate": 3.7829296871121176e-06, "loss": 0.125, "step": 255250 }, { "epoch": 2.51, "grad_norm": 6.276139259338379, "learning_rate": 3.782805564657869e-06, "loss": 0.1823, "step": 255275 }, { "epoch": 2.51, "grad_norm": 8.412886619567871, "learning_rate": 3.782681442203621e-06, "loss": 0.1482, "step": 255300 }, { "epoch": 2.51, "grad_norm": 0.33954986929893494, "learning_rate": 3.782557319749372e-06, "loss": 0.2033, "step": 255325 }, { "epoch": 2.51, "grad_norm": 15.466663360595703, "learning_rate": 3.782433197295124e-06, "loss": 0.1561, "step": 255350 }, { "epoch": 2.51, "grad_norm": 3.9311296939849854, "learning_rate": 3.7823090748408753e-06, "loss": 0.2364, "step": 255375 }, { "epoch": 2.51, "grad_norm": 11.024463653564453, "learning_rate": 3.7821849523866265e-06, "loss": 0.1206, "step": 255400 }, { "epoch": 2.51, "grad_norm": 4.957257270812988, "learning_rate": 3.7820608299323786e-06, "loss": 0.206, "step": 255425 }, { "epoch": 2.51, "grad_norm": 12.119479179382324, "learning_rate": 3.7819367074781298e-06, "loss": 0.14, "step": 255450 }, { "epoch": 2.51, "grad_norm": 3.298077344894409, "learning_rate": 3.7818125850238814e-06, "loss": 0.2013, "step": 255475 }, { "epoch": 2.51, "grad_norm": 11.947440147399902, "learning_rate": 3.781688462569633e-06, "loss": 0.1392, "step": 255500 }, { "epoch": 2.51, "grad_norm": 5.661924362182617, "learning_rate": 3.7815643401153847e-06, "loss": 0.1889, "step": 255525 }, { "epoch": 2.51, "grad_norm": 12.63482666015625, "learning_rate": 3.781440217661136e-06, "loss": 0.1522, "step": 255550 }, { "epoch": 2.51, "grad_norm": 3.7574682235717773, "learning_rate": 3.781316095206888e-06, "loss": 0.1561, "step": 255575 }, { "epoch": 2.51, "grad_norm": 5.4935784339904785, "learning_rate": 3.781191972752639e-06, "loss": 0.1133, "step": 255600 }, { "epoch": 2.51, "grad_norm": 5.263088703155518, "learning_rate": 3.7810678502983904e-06, "loss": 0.1948, "step": 255625 }, { "epoch": 2.51, "grad_norm": 10.238543510437012, "learning_rate": 3.7809437278441424e-06, "loss": 0.126, "step": 255650 }, { "epoch": 2.51, "grad_norm": 7.405660152435303, "learning_rate": 3.7808196053898937e-06, "loss": 0.199, "step": 255675 }, { "epoch": 2.51, "grad_norm": 6.335493087768555, "learning_rate": 3.7806954829356453e-06, "loss": 0.1219, "step": 255700 }, { "epoch": 2.51, "grad_norm": 2.5975677967071533, "learning_rate": 3.780571360481397e-06, "loss": 0.2243, "step": 255725 }, { "epoch": 2.51, "grad_norm": 7.8059468269348145, "learning_rate": 3.7804472380271486e-06, "loss": 0.0981, "step": 255750 }, { "epoch": 2.51, "grad_norm": 1.514211654663086, "learning_rate": 3.7803280804710696e-06, "loss": 0.209, "step": 255775 }, { "epoch": 2.52, "grad_norm": 10.254481315612793, "learning_rate": 3.7802039580168216e-06, "loss": 0.1122, "step": 255800 }, { "epoch": 2.52, "grad_norm": 4.364445209503174, "learning_rate": 3.780079835562573e-06, "loss": 0.2013, "step": 255825 }, { "epoch": 2.52, "grad_norm": 11.474078178405762, "learning_rate": 3.779955713108324e-06, "loss": 0.1078, "step": 255850 }, { "epoch": 2.52, "grad_norm": 4.972644329071045, "learning_rate": 3.779831590654076e-06, "loss": 0.2291, "step": 255875 }, { "epoch": 2.52, "grad_norm": 7.080767631530762, "learning_rate": 3.7797074681998273e-06, "loss": 0.1084, "step": 255900 }, { "epoch": 2.52, "grad_norm": 5.434871196746826, "learning_rate": 3.779583345745579e-06, "loss": 0.2336, "step": 255925 }, { "epoch": 2.52, "grad_norm": 9.396323204040527, "learning_rate": 3.7794592232913306e-06, "loss": 0.115, "step": 255950 }, { "epoch": 2.52, "grad_norm": 0.7693742513656616, "learning_rate": 3.7793351008370822e-06, "loss": 0.1992, "step": 255975 }, { "epoch": 2.52, "grad_norm": 10.350164413452148, "learning_rate": 3.7792109783828335e-06, "loss": 0.1353, "step": 256000 }, { "epoch": 2.52, "grad_norm": 5.7835493087768555, "learning_rate": 3.7790868559285855e-06, "loss": 0.2552, "step": 256025 }, { "epoch": 2.52, "grad_norm": 10.970226287841797, "learning_rate": 3.7789627334743367e-06, "loss": 0.1349, "step": 256050 }, { "epoch": 2.52, "grad_norm": 5.02412748336792, "learning_rate": 3.778838611020088e-06, "loss": 0.1848, "step": 256075 }, { "epoch": 2.52, "grad_norm": 7.273187160491943, "learning_rate": 3.77871448856584e-06, "loss": 0.1028, "step": 256100 }, { "epoch": 2.52, "grad_norm": 5.120324611663818, "learning_rate": 3.778590366111591e-06, "loss": 0.2108, "step": 256125 }, { "epoch": 2.52, "grad_norm": 10.756645202636719, "learning_rate": 3.778466243657343e-06, "loss": 0.1251, "step": 256150 }, { "epoch": 2.52, "grad_norm": 5.686420917510986, "learning_rate": 3.7783421212030945e-06, "loss": 0.2362, "step": 256175 }, { "epoch": 2.52, "grad_norm": 16.43275260925293, "learning_rate": 3.778217998748846e-06, "loss": 0.1617, "step": 256200 }, { "epoch": 2.52, "grad_norm": 2.421154022216797, "learning_rate": 3.7780938762945973e-06, "loss": 0.1748, "step": 256225 }, { "epoch": 2.52, "grad_norm": 9.86347770690918, "learning_rate": 3.7779697538403494e-06, "loss": 0.1225, "step": 256250 }, { "epoch": 2.52, "grad_norm": 1.8336116075515747, "learning_rate": 3.7778456313861006e-06, "loss": 0.2025, "step": 256275 }, { "epoch": 2.52, "grad_norm": 11.455265998840332, "learning_rate": 3.777721508931852e-06, "loss": 0.1518, "step": 256300 }, { "epoch": 2.52, "grad_norm": 4.580586910247803, "learning_rate": 3.777597386477604e-06, "loss": 0.2425, "step": 256325 }, { "epoch": 2.52, "grad_norm": 9.174232482910156, "learning_rate": 3.777473264023355e-06, "loss": 0.1353, "step": 256350 }, { "epoch": 2.52, "grad_norm": 1.5841623544692993, "learning_rate": 3.7773491415691067e-06, "loss": 0.2248, "step": 256375 }, { "epoch": 2.52, "grad_norm": 10.585862159729004, "learning_rate": 3.7772250191148583e-06, "loss": 0.1078, "step": 256400 }, { "epoch": 2.52, "grad_norm": 3.1559572219848633, "learning_rate": 3.77710089666061e-06, "loss": 0.1921, "step": 256425 }, { "epoch": 2.52, "grad_norm": 16.276201248168945, "learning_rate": 3.776976774206361e-06, "loss": 0.1746, "step": 256450 }, { "epoch": 2.52, "grad_norm": 2.650668144226074, "learning_rate": 3.7768526517521132e-06, "loss": 0.2329, "step": 256475 }, { "epoch": 2.52, "grad_norm": 6.067011833190918, "learning_rate": 3.7767285292978645e-06, "loss": 0.1172, "step": 256500 }, { "epoch": 2.52, "grad_norm": 0.6636252403259277, "learning_rate": 3.7766044068436157e-06, "loss": 0.1915, "step": 256525 }, { "epoch": 2.52, "grad_norm": 12.021676063537598, "learning_rate": 3.7764802843893677e-06, "loss": 0.1385, "step": 256550 }, { "epoch": 2.52, "grad_norm": 4.202225208282471, "learning_rate": 3.776356161935119e-06, "loss": 0.23, "step": 256575 }, { "epoch": 2.52, "grad_norm": 8.866186141967773, "learning_rate": 3.7762320394808706e-06, "loss": 0.1539, "step": 256600 }, { "epoch": 2.52, "grad_norm": 3.789933443069458, "learning_rate": 3.7761079170266218e-06, "loss": 0.2181, "step": 256625 }, { "epoch": 2.52, "grad_norm": 8.916718482971191, "learning_rate": 3.775983794572374e-06, "loss": 0.1429, "step": 256650 }, { "epoch": 2.52, "grad_norm": 3.0600767135620117, "learning_rate": 3.775859672118125e-06, "loss": 0.1825, "step": 256675 }, { "epoch": 2.52, "grad_norm": 6.7436604499816895, "learning_rate": 3.7757355496638763e-06, "loss": 0.136, "step": 256700 }, { "epoch": 2.52, "grad_norm": 3.824782371520996, "learning_rate": 3.7756114272096283e-06, "loss": 0.1918, "step": 256725 }, { "epoch": 2.52, "grad_norm": 9.471197128295898, "learning_rate": 3.7754873047553795e-06, "loss": 0.1298, "step": 256750 }, { "epoch": 2.52, "grad_norm": 5.484696865081787, "learning_rate": 3.7753681471993014e-06, "loss": 0.2436, "step": 256775 }, { "epoch": 2.52, "grad_norm": 12.359843254089355, "learning_rate": 3.7752440247450526e-06, "loss": 0.1303, "step": 256800 }, { "epoch": 2.53, "grad_norm": 3.095494270324707, "learning_rate": 3.7751199022908043e-06, "loss": 0.2389, "step": 256825 }, { "epoch": 2.53, "grad_norm": 10.182856559753418, "learning_rate": 3.774995779836556e-06, "loss": 0.1259, "step": 256850 }, { "epoch": 2.53, "grad_norm": 6.292796611785889, "learning_rate": 3.7748716573823075e-06, "loss": 0.2112, "step": 256875 }, { "epoch": 2.53, "grad_norm": 12.780611038208008, "learning_rate": 3.7747475349280587e-06, "loss": 0.1227, "step": 256900 }, { "epoch": 2.53, "grad_norm": 3.82421612739563, "learning_rate": 3.774623412473811e-06, "loss": 0.2137, "step": 256925 }, { "epoch": 2.53, "grad_norm": 8.771180152893066, "learning_rate": 3.774499290019562e-06, "loss": 0.1392, "step": 256950 }, { "epoch": 2.53, "grad_norm": 6.510988235473633, "learning_rate": 3.7743751675653132e-06, "loss": 0.2064, "step": 256975 }, { "epoch": 2.53, "grad_norm": 14.821929931640625, "learning_rate": 3.7742510451110653e-06, "loss": 0.1493, "step": 257000 }, { "epoch": 2.53, "grad_norm": 2.916346788406372, "learning_rate": 3.7741269226568165e-06, "loss": 0.2073, "step": 257025 }, { "epoch": 2.53, "grad_norm": 10.474222183227539, "learning_rate": 3.774002800202568e-06, "loss": 0.1424, "step": 257050 }, { "epoch": 2.53, "grad_norm": 8.078845024108887, "learning_rate": 3.7738786777483198e-06, "loss": 0.2318, "step": 257075 }, { "epoch": 2.53, "grad_norm": 13.999007225036621, "learning_rate": 3.7737545552940714e-06, "loss": 0.1834, "step": 257100 }, { "epoch": 2.53, "grad_norm": 3.947145700454712, "learning_rate": 3.7736304328398226e-06, "loss": 0.2258, "step": 257125 }, { "epoch": 2.53, "grad_norm": 9.943046569824219, "learning_rate": 3.773506310385574e-06, "loss": 0.1612, "step": 257150 }, { "epoch": 2.53, "grad_norm": 5.366250514984131, "learning_rate": 3.773382187931326e-06, "loss": 0.2032, "step": 257175 }, { "epoch": 2.53, "grad_norm": 15.130144119262695, "learning_rate": 3.773258065477077e-06, "loss": 0.2036, "step": 257200 }, { "epoch": 2.53, "grad_norm": 5.1330037117004395, "learning_rate": 3.7731339430228287e-06, "loss": 0.2125, "step": 257225 }, { "epoch": 2.53, "grad_norm": 10.692391395568848, "learning_rate": 3.7730098205685804e-06, "loss": 0.1396, "step": 257250 }, { "epoch": 2.53, "grad_norm": 2.0186221599578857, "learning_rate": 3.772885698114332e-06, "loss": 0.206, "step": 257275 }, { "epoch": 2.53, "grad_norm": 7.8247761726379395, "learning_rate": 3.772761575660083e-06, "loss": 0.1318, "step": 257300 }, { "epoch": 2.53, "grad_norm": 8.143075942993164, "learning_rate": 3.7726374532058353e-06, "loss": 0.1623, "step": 257325 }, { "epoch": 2.53, "grad_norm": 7.602710247039795, "learning_rate": 3.7725133307515865e-06, "loss": 0.1392, "step": 257350 }, { "epoch": 2.53, "grad_norm": 3.8194334506988525, "learning_rate": 3.772389208297338e-06, "loss": 0.2496, "step": 257375 }, { "epoch": 2.53, "grad_norm": 10.331913948059082, "learning_rate": 3.7722650858430897e-06, "loss": 0.1416, "step": 257400 }, { "epoch": 2.53, "grad_norm": 5.623044013977051, "learning_rate": 3.7721409633888414e-06, "loss": 0.2276, "step": 257425 }, { "epoch": 2.53, "grad_norm": 20.477697372436523, "learning_rate": 3.7720168409345926e-06, "loss": 0.1265, "step": 257450 }, { "epoch": 2.53, "grad_norm": 8.878543853759766, "learning_rate": 3.7718927184803446e-06, "loss": 0.1964, "step": 257475 }, { "epoch": 2.53, "grad_norm": 15.13271427154541, "learning_rate": 3.771768596026096e-06, "loss": 0.1022, "step": 257500 }, { "epoch": 2.53, "grad_norm": 5.595910549163818, "learning_rate": 3.771644473571847e-06, "loss": 0.2766, "step": 257525 }, { "epoch": 2.53, "grad_norm": 16.1505126953125, "learning_rate": 3.771520351117599e-06, "loss": 0.1316, "step": 257550 }, { "epoch": 2.53, "grad_norm": 9.599944114685059, "learning_rate": 3.7713962286633503e-06, "loss": 0.2199, "step": 257575 }, { "epoch": 2.53, "grad_norm": 11.155838012695312, "learning_rate": 3.771272106209102e-06, "loss": 0.1308, "step": 257600 }, { "epoch": 2.53, "grad_norm": 3.098846197128296, "learning_rate": 3.7711479837548536e-06, "loss": 0.2466, "step": 257625 }, { "epoch": 2.53, "grad_norm": 10.87541675567627, "learning_rate": 3.7710238613006052e-06, "loss": 0.1253, "step": 257650 }, { "epoch": 2.53, "grad_norm": 4.163573265075684, "learning_rate": 3.7708997388463565e-06, "loss": 0.2022, "step": 257675 }, { "epoch": 2.53, "grad_norm": 12.382164001464844, "learning_rate": 3.7707756163921085e-06, "loss": 0.1279, "step": 257700 }, { "epoch": 2.53, "grad_norm": 6.148820400238037, "learning_rate": 3.7706514939378597e-06, "loss": 0.2154, "step": 257725 }, { "epoch": 2.53, "grad_norm": 11.42254638671875, "learning_rate": 3.770527371483611e-06, "loss": 0.1315, "step": 257750 }, { "epoch": 2.53, "grad_norm": 3.839287281036377, "learning_rate": 3.770403249029363e-06, "loss": 0.225, "step": 257775 }, { "epoch": 2.53, "grad_norm": 14.624632835388184, "learning_rate": 3.770279126575114e-06, "loss": 0.1346, "step": 257800 }, { "epoch": 2.53, "grad_norm": 4.471219539642334, "learning_rate": 3.770155004120866e-06, "loss": 0.2197, "step": 257825 }, { "epoch": 2.54, "grad_norm": 7.570837020874023, "learning_rate": 3.7700308816666175e-06, "loss": 0.1219, "step": 257850 }, { "epoch": 2.54, "grad_norm": 5.847224235534668, "learning_rate": 3.769906759212369e-06, "loss": 0.2318, "step": 257875 }, { "epoch": 2.54, "grad_norm": 21.695812225341797, "learning_rate": 3.7697826367581203e-06, "loss": 0.144, "step": 257900 }, { "epoch": 2.54, "grad_norm": 5.416363716125488, "learning_rate": 3.7696585143038724e-06, "loss": 0.1845, "step": 257925 }, { "epoch": 2.54, "grad_norm": 13.532193183898926, "learning_rate": 3.7695343918496236e-06, "loss": 0.1427, "step": 257950 }, { "epoch": 2.54, "grad_norm": 6.255847454071045, "learning_rate": 3.769410269395375e-06, "loss": 0.1943, "step": 257975 }, { "epoch": 2.54, "grad_norm": 7.9227375984191895, "learning_rate": 3.769286146941127e-06, "loss": 0.1092, "step": 258000 }, { "epoch": 2.54, "grad_norm": 5.90216588973999, "learning_rate": 3.769162024486878e-06, "loss": 0.2227, "step": 258025 }, { "epoch": 2.54, "grad_norm": 9.488709449768066, "learning_rate": 3.7690379020326297e-06, "loss": 0.0938, "step": 258050 }, { "epoch": 2.54, "grad_norm": 3.876739740371704, "learning_rate": 3.768913779578381e-06, "loss": 0.2168, "step": 258075 }, { "epoch": 2.54, "grad_norm": 11.90186595916748, "learning_rate": 3.768789657124133e-06, "loss": 0.1466, "step": 258100 }, { "epoch": 2.54, "grad_norm": 0.29942119121551514, "learning_rate": 3.768665534669884e-06, "loss": 0.2737, "step": 258125 }, { "epoch": 2.54, "grad_norm": 10.588194847106934, "learning_rate": 3.7685414122156354e-06, "loss": 0.1433, "step": 258150 }, { "epoch": 2.54, "grad_norm": 5.007184982299805, "learning_rate": 3.7684172897613875e-06, "loss": 0.219, "step": 258175 }, { "epoch": 2.54, "grad_norm": 19.90030860900879, "learning_rate": 3.7682931673071387e-06, "loss": 0.1454, "step": 258200 }, { "epoch": 2.54, "grad_norm": 4.7520647048950195, "learning_rate": 3.7681690448528903e-06, "loss": 0.2217, "step": 258225 }, { "epoch": 2.54, "grad_norm": 11.905082702636719, "learning_rate": 3.768044922398642e-06, "loss": 0.1034, "step": 258250 }, { "epoch": 2.54, "grad_norm": 3.23895263671875, "learning_rate": 3.7679207999443936e-06, "loss": 0.1856, "step": 258275 }, { "epoch": 2.54, "grad_norm": 7.808941841125488, "learning_rate": 3.767796677490145e-06, "loss": 0.1143, "step": 258300 }, { "epoch": 2.54, "grad_norm": 1.4607757329940796, "learning_rate": 3.767672555035897e-06, "loss": 0.2248, "step": 258325 }, { "epoch": 2.54, "grad_norm": 3.3792288303375244, "learning_rate": 3.767548432581648e-06, "loss": 0.1469, "step": 258350 }, { "epoch": 2.54, "grad_norm": 3.0144858360290527, "learning_rate": 3.7674243101273993e-06, "loss": 0.2367, "step": 258375 }, { "epoch": 2.54, "grad_norm": 6.314090728759766, "learning_rate": 3.7673001876731513e-06, "loss": 0.1258, "step": 258400 }, { "epoch": 2.54, "grad_norm": 4.879312992095947, "learning_rate": 3.7671760652189025e-06, "loss": 0.2106, "step": 258425 }, { "epoch": 2.54, "grad_norm": 8.852322578430176, "learning_rate": 3.767051942764654e-06, "loss": 0.137, "step": 258450 }, { "epoch": 2.54, "grad_norm": 2.9550588130950928, "learning_rate": 3.766927820310406e-06, "loss": 0.2344, "step": 258475 }, { "epoch": 2.54, "grad_norm": 12.187602996826172, "learning_rate": 3.7668036978561574e-06, "loss": 0.1521, "step": 258500 }, { "epoch": 2.54, "grad_norm": 4.820014953613281, "learning_rate": 3.7666795754019087e-06, "loss": 0.219, "step": 258525 }, { "epoch": 2.54, "grad_norm": 15.815055847167969, "learning_rate": 3.7665554529476607e-06, "loss": 0.1545, "step": 258550 }, { "epoch": 2.54, "grad_norm": 3.038422107696533, "learning_rate": 3.766431330493412e-06, "loss": 0.2478, "step": 258575 }, { "epoch": 2.54, "grad_norm": 3.2737503051757812, "learning_rate": 3.766307208039163e-06, "loss": 0.1521, "step": 258600 }, { "epoch": 2.54, "grad_norm": 2.3039658069610596, "learning_rate": 3.766183085584915e-06, "loss": 0.222, "step": 258625 }, { "epoch": 2.54, "grad_norm": 5.826328277587891, "learning_rate": 3.7660589631306664e-06, "loss": 0.124, "step": 258650 }, { "epoch": 2.54, "grad_norm": 3.82887864112854, "learning_rate": 3.765934840676418e-06, "loss": 0.2417, "step": 258675 }, { "epoch": 2.54, "grad_norm": 7.386754035949707, "learning_rate": 3.7658107182221697e-06, "loss": 0.0892, "step": 258700 }, { "epoch": 2.54, "grad_norm": 5.058088779449463, "learning_rate": 3.7656865957679213e-06, "loss": 0.2223, "step": 258725 }, { "epoch": 2.54, "grad_norm": 14.091797828674316, "learning_rate": 3.7655624733136725e-06, "loss": 0.1416, "step": 258750 }, { "epoch": 2.54, "grad_norm": 0.018284643068909645, "learning_rate": 3.7654383508594246e-06, "loss": 0.219, "step": 258775 }, { "epoch": 2.54, "grad_norm": 8.739449501037598, "learning_rate": 3.765314228405176e-06, "loss": 0.1213, "step": 258800 }, { "epoch": 2.54, "grad_norm": 5.484696865081787, "learning_rate": 3.765190105950927e-06, "loss": 0.2225, "step": 258825 }, { "epoch": 2.55, "grad_norm": 8.717065811157227, "learning_rate": 3.765065983496679e-06, "loss": 0.1173, "step": 258850 }, { "epoch": 2.55, "grad_norm": 8.616206169128418, "learning_rate": 3.7649418610424303e-06, "loss": 0.2034, "step": 258875 }, { "epoch": 2.55, "grad_norm": 11.182707786560059, "learning_rate": 3.764817738588182e-06, "loss": 0.1484, "step": 258900 }, { "epoch": 2.55, "grad_norm": 3.4849045276641846, "learning_rate": 3.764693616133933e-06, "loss": 0.2214, "step": 258925 }, { "epoch": 2.55, "grad_norm": 6.246933937072754, "learning_rate": 3.764569493679685e-06, "loss": 0.1274, "step": 258950 }, { "epoch": 2.55, "grad_norm": 4.22899055480957, "learning_rate": 3.7644453712254364e-06, "loss": 0.2198, "step": 258975 }, { "epoch": 2.55, "grad_norm": 16.700204849243164, "learning_rate": 3.7643212487711876e-06, "loss": 0.1505, "step": 259000 }, { "epoch": 2.55, "grad_norm": 5.2390546798706055, "learning_rate": 3.7641971263169397e-06, "loss": 0.2289, "step": 259025 }, { "epoch": 2.55, "grad_norm": 9.450000762939453, "learning_rate": 3.764073003862691e-06, "loss": 0.1262, "step": 259050 }, { "epoch": 2.55, "grad_norm": 2.9180655479431152, "learning_rate": 3.7639488814084425e-06, "loss": 0.2314, "step": 259075 }, { "epoch": 2.55, "grad_norm": 12.862384796142578, "learning_rate": 3.763824758954194e-06, "loss": 0.1572, "step": 259100 }, { "epoch": 2.55, "grad_norm": 3.1303210258483887, "learning_rate": 3.7637006364999458e-06, "loss": 0.2393, "step": 259125 }, { "epoch": 2.55, "grad_norm": 14.003423690795898, "learning_rate": 3.763576514045697e-06, "loss": 0.1691, "step": 259150 }, { "epoch": 2.55, "grad_norm": 5.705611228942871, "learning_rate": 3.763452391591449e-06, "loss": 0.1951, "step": 259175 }, { "epoch": 2.55, "grad_norm": 13.965069770812988, "learning_rate": 3.7633282691372003e-06, "loss": 0.1407, "step": 259200 }, { "epoch": 2.55, "grad_norm": 5.017302513122559, "learning_rate": 3.7632041466829515e-06, "loss": 0.2147, "step": 259225 }, { "epoch": 2.55, "grad_norm": 15.529586791992188, "learning_rate": 3.7630800242287035e-06, "loss": 0.1031, "step": 259250 }, { "epoch": 2.55, "grad_norm": 2.7555017471313477, "learning_rate": 3.7629559017744547e-06, "loss": 0.1899, "step": 259275 }, { "epoch": 2.55, "grad_norm": 10.0020751953125, "learning_rate": 3.7628317793202064e-06, "loss": 0.1295, "step": 259300 }, { "epoch": 2.55, "grad_norm": 3.5630714893341064, "learning_rate": 3.762707656865958e-06, "loss": 0.1958, "step": 259325 }, { "epoch": 2.55, "grad_norm": 11.524697303771973, "learning_rate": 3.7625835344117096e-06, "loss": 0.1293, "step": 259350 }, { "epoch": 2.55, "grad_norm": 3.3882806301116943, "learning_rate": 3.762459411957461e-06, "loss": 0.2454, "step": 259375 }, { "epoch": 2.55, "grad_norm": 10.707003593444824, "learning_rate": 3.762335289503213e-06, "loss": 0.1384, "step": 259400 }, { "epoch": 2.55, "grad_norm": 2.6547975540161133, "learning_rate": 3.762211167048964e-06, "loss": 0.2003, "step": 259425 }, { "epoch": 2.55, "grad_norm": 9.438066482543945, "learning_rate": 3.7620870445947153e-06, "loss": 0.1679, "step": 259450 }, { "epoch": 2.55, "grad_norm": 5.79262113571167, "learning_rate": 3.7619629221404674e-06, "loss": 0.1865, "step": 259475 }, { "epoch": 2.55, "grad_norm": 9.222309112548828, "learning_rate": 3.7618387996862186e-06, "loss": 0.1217, "step": 259500 }, { "epoch": 2.55, "grad_norm": 5.393226623535156, "learning_rate": 3.7617146772319702e-06, "loss": 0.2379, "step": 259525 }, { "epoch": 2.55, "grad_norm": 5.995694637298584, "learning_rate": 3.761590554777722e-06, "loss": 0.114, "step": 259550 }, { "epoch": 2.55, "grad_norm": 4.590497970581055, "learning_rate": 3.7614664323234735e-06, "loss": 0.1842, "step": 259575 }, { "epoch": 2.55, "grad_norm": 11.623085975646973, "learning_rate": 3.7613423098692247e-06, "loss": 0.1414, "step": 259600 }, { "epoch": 2.55, "grad_norm": 5.898256301879883, "learning_rate": 3.7612181874149768e-06, "loss": 0.2173, "step": 259625 }, { "epoch": 2.55, "grad_norm": 10.309513092041016, "learning_rate": 3.761094064960728e-06, "loss": 0.1255, "step": 259650 }, { "epoch": 2.55, "grad_norm": 4.134179592132568, "learning_rate": 3.760969942506479e-06, "loss": 0.2274, "step": 259675 }, { "epoch": 2.55, "grad_norm": 16.09148597717285, "learning_rate": 3.7608458200522313e-06, "loss": 0.1216, "step": 259700 }, { "epoch": 2.55, "grad_norm": 4.673446178436279, "learning_rate": 3.7607216975979825e-06, "loss": 0.2298, "step": 259725 }, { "epoch": 2.55, "grad_norm": 13.144637107849121, "learning_rate": 3.760597575143734e-06, "loss": 0.1309, "step": 259750 }, { "epoch": 2.55, "grad_norm": 7.355662822723389, "learning_rate": 3.7604734526894853e-06, "loss": 0.257, "step": 259775 }, { "epoch": 2.55, "grad_norm": 27.635969161987305, "learning_rate": 3.7603493302352374e-06, "loss": 0.1142, "step": 259800 }, { "epoch": 2.55, "grad_norm": 7.080280780792236, "learning_rate": 3.7602252077809886e-06, "loss": 0.213, "step": 259825 }, { "epoch": 2.55, "grad_norm": 5.996690273284912, "learning_rate": 3.76010108532674e-06, "loss": 0.1114, "step": 259850 }, { "epoch": 2.56, "grad_norm": 3.885697841644287, "learning_rate": 3.759976962872492e-06, "loss": 0.1742, "step": 259875 }, { "epoch": 2.56, "grad_norm": 12.375605583190918, "learning_rate": 3.759852840418243e-06, "loss": 0.1405, "step": 259900 }, { "epoch": 2.56, "grad_norm": 0.8518593907356262, "learning_rate": 3.7597287179639947e-06, "loss": 0.1972, "step": 259925 }, { "epoch": 2.56, "grad_norm": 13.398197174072266, "learning_rate": 3.7596045955097463e-06, "loss": 0.1192, "step": 259950 }, { "epoch": 2.56, "grad_norm": 7.56881856918335, "learning_rate": 3.759480473055498e-06, "loss": 0.2182, "step": 259975 }, { "epoch": 2.56, "grad_norm": 10.307709693908691, "learning_rate": 3.759356350601249e-06, "loss": 0.1394, "step": 260000 }, { "epoch": 2.56, "eval_loss": 0.57342129945755, "eval_runtime": 6031.9923, "eval_samples_per_second": 1.569, "eval_steps_per_second": 0.196, "eval_wer": 0.12314105452906715, "step": 260000 }, { "epoch": 2.56, "grad_norm": 7.499889850616455, "learning_rate": 3.7592322281470013e-06, "loss": 0.2428, "step": 260025 }, { "epoch": 2.56, "grad_norm": 14.7726411819458, "learning_rate": 3.7591081056927525e-06, "loss": 0.095, "step": 260050 }, { "epoch": 2.56, "grad_norm": 3.6485958099365234, "learning_rate": 3.7589839832385037e-06, "loss": 0.2183, "step": 260075 }, { "epoch": 2.56, "grad_norm": 10.425260543823242, "learning_rate": 3.7588598607842557e-06, "loss": 0.1157, "step": 260100 }, { "epoch": 2.56, "grad_norm": 8.34438705444336, "learning_rate": 3.758735738330007e-06, "loss": 0.245, "step": 260125 }, { "epoch": 2.56, "grad_norm": 10.520173072814941, "learning_rate": 3.7586116158757586e-06, "loss": 0.1358, "step": 260150 }, { "epoch": 2.56, "grad_norm": 4.541428089141846, "learning_rate": 3.7584874934215102e-06, "loss": 0.2054, "step": 260175 }, { "epoch": 2.56, "grad_norm": 10.007134437561035, "learning_rate": 3.758363370967262e-06, "loss": 0.1249, "step": 260200 }, { "epoch": 2.56, "grad_norm": 2.6425390243530273, "learning_rate": 3.758239248513013e-06, "loss": 0.1974, "step": 260225 }, { "epoch": 2.56, "grad_norm": 10.735048294067383, "learning_rate": 3.758115126058765e-06, "loss": 0.123, "step": 260250 }, { "epoch": 2.56, "grad_norm": 3.5687508583068848, "learning_rate": 3.7579910036045163e-06, "loss": 0.2141, "step": 260275 }, { "epoch": 2.56, "grad_norm": 11.868779182434082, "learning_rate": 3.757866881150268e-06, "loss": 0.109, "step": 260300 }, { "epoch": 2.56, "grad_norm": 4.205657005310059, "learning_rate": 3.7577427586960196e-06, "loss": 0.201, "step": 260325 }, { "epoch": 2.56, "grad_norm": 7.672210216522217, "learning_rate": 3.7576186362417712e-06, "loss": 0.1637, "step": 260350 }, { "epoch": 2.56, "grad_norm": 5.046449184417725, "learning_rate": 3.7574945137875224e-06, "loss": 0.1869, "step": 260375 }, { "epoch": 2.56, "grad_norm": 12.024364471435547, "learning_rate": 3.7573703913332745e-06, "loss": 0.1234, "step": 260400 }, { "epoch": 2.56, "grad_norm": 4.927854537963867, "learning_rate": 3.7572462688790257e-06, "loss": 0.2324, "step": 260425 }, { "epoch": 2.56, "grad_norm": 11.517630577087402, "learning_rate": 3.757122146424777e-06, "loss": 0.1161, "step": 260450 }, { "epoch": 2.56, "grad_norm": 4.676196575164795, "learning_rate": 3.756998023970529e-06, "loss": 0.2106, "step": 260475 }, { "epoch": 2.56, "grad_norm": 15.834805488586426, "learning_rate": 3.75687390151628e-06, "loss": 0.1567, "step": 260500 }, { "epoch": 2.56, "grad_norm": 5.805502414703369, "learning_rate": 3.756749779062032e-06, "loss": 0.2472, "step": 260525 }, { "epoch": 2.56, "grad_norm": 18.792572021484375, "learning_rate": 3.7566256566077835e-06, "loss": 0.1354, "step": 260550 }, { "epoch": 2.56, "grad_norm": 3.900824785232544, "learning_rate": 3.756501534153535e-06, "loss": 0.2334, "step": 260575 }, { "epoch": 2.56, "grad_norm": 12.404107093811035, "learning_rate": 3.7563774116992863e-06, "loss": 0.1236, "step": 260600 }, { "epoch": 2.56, "grad_norm": 28.03407096862793, "learning_rate": 3.7562532892450375e-06, "loss": 0.2084, "step": 260625 }, { "epoch": 2.56, "grad_norm": 8.063557624816895, "learning_rate": 3.7561291667907896e-06, "loss": 0.1467, "step": 260650 }, { "epoch": 2.56, "grad_norm": 5.033152103424072, "learning_rate": 3.756005044336541e-06, "loss": 0.1958, "step": 260675 }, { "epoch": 2.56, "grad_norm": 7.554753303527832, "learning_rate": 3.7558809218822924e-06, "loss": 0.1086, "step": 260700 }, { "epoch": 2.56, "grad_norm": 6.252439022064209, "learning_rate": 3.755756799428044e-06, "loss": 0.2115, "step": 260725 }, { "epoch": 2.56, "grad_norm": 6.843128681182861, "learning_rate": 3.7556326769737957e-06, "loss": 0.1517, "step": 260750 }, { "epoch": 2.56, "grad_norm": 4.364349842071533, "learning_rate": 3.755513519417717e-06, "loss": 0.2297, "step": 260775 }, { "epoch": 2.56, "grad_norm": 10.52418327331543, "learning_rate": 3.755389396963469e-06, "loss": 0.1626, "step": 260800 }, { "epoch": 2.56, "grad_norm": 4.363192081451416, "learning_rate": 3.75526527450922e-06, "loss": 0.2147, "step": 260825 }, { "epoch": 2.56, "grad_norm": 3.724672555923462, "learning_rate": 3.755141152054972e-06, "loss": 0.1462, "step": 260850 }, { "epoch": 2.56, "grad_norm": 1.45834219455719, "learning_rate": 3.7550170296007233e-06, "loss": 0.2461, "step": 260875 }, { "epoch": 2.57, "grad_norm": 16.94317626953125, "learning_rate": 3.7548929071464745e-06, "loss": 0.1086, "step": 260900 }, { "epoch": 2.57, "grad_norm": 2.24615478515625, "learning_rate": 3.7547687846922265e-06, "loss": 0.1906, "step": 260925 }, { "epoch": 2.57, "grad_norm": 11.651933670043945, "learning_rate": 3.7546446622379778e-06, "loss": 0.1181, "step": 260950 }, { "epoch": 2.57, "grad_norm": 4.062032699584961, "learning_rate": 3.7545205397837294e-06, "loss": 0.2405, "step": 260975 }, { "epoch": 2.57, "grad_norm": 12.841846466064453, "learning_rate": 3.754396417329481e-06, "loss": 0.1155, "step": 261000 }, { "epoch": 2.57, "grad_norm": 3.6465203762054443, "learning_rate": 3.7542722948752327e-06, "loss": 0.2107, "step": 261025 }, { "epoch": 2.57, "grad_norm": 7.912140846252441, "learning_rate": 3.754148172420984e-06, "loss": 0.1021, "step": 261050 }, { "epoch": 2.57, "grad_norm": 4.190739154815674, "learning_rate": 3.754024049966736e-06, "loss": 0.2279, "step": 261075 }, { "epoch": 2.57, "grad_norm": 6.186626434326172, "learning_rate": 3.753899927512487e-06, "loss": 0.1122, "step": 261100 }, { "epoch": 2.57, "grad_norm": 3.281637191772461, "learning_rate": 3.7537758050582383e-06, "loss": 0.2357, "step": 261125 }, { "epoch": 2.57, "grad_norm": 8.906075477600098, "learning_rate": 3.7536516826039904e-06, "loss": 0.1268, "step": 261150 }, { "epoch": 2.57, "grad_norm": 6.986564636230469, "learning_rate": 3.7535275601497416e-06, "loss": 0.2526, "step": 261175 }, { "epoch": 2.57, "grad_norm": 5.932482719421387, "learning_rate": 3.7534034376954933e-06, "loss": 0.13, "step": 261200 }, { "epoch": 2.57, "grad_norm": 0.7940159440040588, "learning_rate": 3.7532793152412445e-06, "loss": 0.2008, "step": 261225 }, { "epoch": 2.57, "grad_norm": 11.665306091308594, "learning_rate": 3.7531551927869965e-06, "loss": 0.1647, "step": 261250 }, { "epoch": 2.57, "grad_norm": 1.8022242784500122, "learning_rate": 3.7530310703327477e-06, "loss": 0.1637, "step": 261275 }, { "epoch": 2.57, "grad_norm": 14.634345054626465, "learning_rate": 3.752906947878499e-06, "loss": 0.1343, "step": 261300 }, { "epoch": 2.57, "grad_norm": 7.091131687164307, "learning_rate": 3.752782825424251e-06, "loss": 0.2106, "step": 261325 }, { "epoch": 2.57, "grad_norm": 8.968057632446289, "learning_rate": 3.7526587029700022e-06, "loss": 0.1111, "step": 261350 }, { "epoch": 2.57, "grad_norm": 4.573159217834473, "learning_rate": 3.752534580515754e-06, "loss": 0.1987, "step": 261375 }, { "epoch": 2.57, "grad_norm": 10.410737991333008, "learning_rate": 3.7524104580615055e-06, "loss": 0.1304, "step": 261400 }, { "epoch": 2.57, "grad_norm": 2.553321123123169, "learning_rate": 3.752286335607257e-06, "loss": 0.2211, "step": 261425 }, { "epoch": 2.57, "grad_norm": 12.718467712402344, "learning_rate": 3.7521622131530083e-06, "loss": 0.1217, "step": 261450 }, { "epoch": 2.57, "grad_norm": 4.4159979820251465, "learning_rate": 3.7520380906987604e-06, "loss": 0.19, "step": 261475 }, { "epoch": 2.57, "grad_norm": 14.394644737243652, "learning_rate": 3.7519139682445116e-06, "loss": 0.1105, "step": 261500 }, { "epoch": 2.57, "grad_norm": 6.320724010467529, "learning_rate": 3.751789845790263e-06, "loss": 0.2136, "step": 261525 }, { "epoch": 2.57, "grad_norm": 9.45744800567627, "learning_rate": 3.751665723336015e-06, "loss": 0.1456, "step": 261550 }, { "epoch": 2.57, "grad_norm": 10.255683898925781, "learning_rate": 3.751541600881766e-06, "loss": 0.2556, "step": 261575 }, { "epoch": 2.57, "grad_norm": 10.660456657409668, "learning_rate": 3.7514174784275177e-06, "loss": 0.1074, "step": 261600 }, { "epoch": 2.57, "grad_norm": 4.89682674407959, "learning_rate": 3.7512933559732694e-06, "loss": 0.1959, "step": 261625 }, { "epoch": 2.57, "grad_norm": 15.756166458129883, "learning_rate": 3.751169233519021e-06, "loss": 0.1307, "step": 261650 }, { "epoch": 2.57, "grad_norm": 0.41740497946739197, "learning_rate": 3.751045111064772e-06, "loss": 0.2291, "step": 261675 }, { "epoch": 2.57, "grad_norm": 16.59619140625, "learning_rate": 3.7509209886105243e-06, "loss": 0.1449, "step": 261700 }, { "epoch": 2.57, "grad_norm": 4.555671215057373, "learning_rate": 3.7507968661562755e-06, "loss": 0.2047, "step": 261725 }, { "epoch": 2.57, "grad_norm": 7.426295757293701, "learning_rate": 3.7506727437020267e-06, "loss": 0.123, "step": 261750 }, { "epoch": 2.57, "grad_norm": 5.672253131866455, "learning_rate": 3.7505486212477787e-06, "loss": 0.218, "step": 261775 }, { "epoch": 2.57, "grad_norm": 9.396730422973633, "learning_rate": 3.75042449879353e-06, "loss": 0.1202, "step": 261800 }, { "epoch": 2.57, "grad_norm": 18.306007385253906, "learning_rate": 3.7503003763392816e-06, "loss": 0.1913, "step": 261825 }, { "epoch": 2.57, "grad_norm": 11.958409309387207, "learning_rate": 3.7501762538850332e-06, "loss": 0.1453, "step": 261850 }, { "epoch": 2.57, "grad_norm": 4.320794582366943, "learning_rate": 3.750052131430785e-06, "loss": 0.1691, "step": 261875 }, { "epoch": 2.58, "grad_norm": 10.44019603729248, "learning_rate": 3.749928008976536e-06, "loss": 0.1577, "step": 261900 }, { "epoch": 2.58, "grad_norm": 3.673464059829712, "learning_rate": 3.749803886522288e-06, "loss": 0.2232, "step": 261925 }, { "epoch": 2.58, "grad_norm": 16.21293830871582, "learning_rate": 3.7496797640680393e-06, "loss": 0.1153, "step": 261950 }, { "epoch": 2.58, "grad_norm": 7.816793918609619, "learning_rate": 3.7495556416137906e-06, "loss": 0.2233, "step": 261975 }, { "epoch": 2.58, "grad_norm": 10.280723571777344, "learning_rate": 3.7494315191595426e-06, "loss": 0.1189, "step": 262000 }, { "epoch": 2.58, "grad_norm": 3.685851573944092, "learning_rate": 3.749307396705294e-06, "loss": 0.2077, "step": 262025 }, { "epoch": 2.58, "grad_norm": 5.827277183532715, "learning_rate": 3.7491832742510455e-06, "loss": 0.1371, "step": 262050 }, { "epoch": 2.58, "grad_norm": 8.144241333007812, "learning_rate": 3.7490591517967967e-06, "loss": 0.224, "step": 262075 }, { "epoch": 2.58, "grad_norm": 11.125285148620605, "learning_rate": 3.7489350293425487e-06, "loss": 0.1015, "step": 262100 }, { "epoch": 2.58, "grad_norm": 6.811689376831055, "learning_rate": 3.7488109068883e-06, "loss": 0.2532, "step": 262125 }, { "epoch": 2.58, "grad_norm": 16.55181312561035, "learning_rate": 3.748686784434051e-06, "loss": 0.1484, "step": 262150 }, { "epoch": 2.58, "grad_norm": 9.274212837219238, "learning_rate": 3.748562661979803e-06, "loss": 0.2225, "step": 262175 }, { "epoch": 2.58, "grad_norm": 10.739167213439941, "learning_rate": 3.7484385395255544e-06, "loss": 0.1404, "step": 262200 }, { "epoch": 2.58, "grad_norm": 2.6632604598999023, "learning_rate": 3.748314417071306e-06, "loss": 0.2174, "step": 262225 }, { "epoch": 2.58, "grad_norm": 5.543617248535156, "learning_rate": 3.7481902946170577e-06, "loss": 0.1212, "step": 262250 }, { "epoch": 2.58, "grad_norm": 4.135402679443359, "learning_rate": 3.7480661721628093e-06, "loss": 0.2594, "step": 262275 }, { "epoch": 2.58, "grad_norm": 10.145546913146973, "learning_rate": 3.7479420497085605e-06, "loss": 0.1122, "step": 262300 }, { "epoch": 2.58, "grad_norm": 6.030662536621094, "learning_rate": 3.7478179272543126e-06, "loss": 0.2457, "step": 262325 }, { "epoch": 2.58, "grad_norm": 8.149598121643066, "learning_rate": 3.747693804800064e-06, "loss": 0.1392, "step": 262350 }, { "epoch": 2.58, "grad_norm": 5.2865214347839355, "learning_rate": 3.747569682345815e-06, "loss": 0.2482, "step": 262375 }, { "epoch": 2.58, "grad_norm": 6.26718807220459, "learning_rate": 3.747445559891567e-06, "loss": 0.1266, "step": 262400 }, { "epoch": 2.58, "grad_norm": 5.073844909667969, "learning_rate": 3.7473214374373183e-06, "loss": 0.2248, "step": 262425 }, { "epoch": 2.58, "grad_norm": 15.327573776245117, "learning_rate": 3.74719731498307e-06, "loss": 0.1166, "step": 262450 }, { "epoch": 2.58, "grad_norm": 4.047191143035889, "learning_rate": 3.7470731925288216e-06, "loss": 0.2294, "step": 262475 }, { "epoch": 2.58, "grad_norm": 9.90582275390625, "learning_rate": 3.746949070074573e-06, "loss": 0.17, "step": 262500 }, { "epoch": 2.58, "grad_norm": 7.78876256942749, "learning_rate": 3.7468249476203244e-06, "loss": 0.2156, "step": 262525 }, { "epoch": 2.58, "grad_norm": 8.581506729125977, "learning_rate": 3.7467008251660765e-06, "loss": 0.1059, "step": 262550 }, { "epoch": 2.58, "grad_norm": 4.425512313842773, "learning_rate": 3.7465767027118277e-06, "loss": 0.2683, "step": 262575 }, { "epoch": 2.58, "grad_norm": 13.4368257522583, "learning_rate": 3.746452580257579e-06, "loss": 0.1534, "step": 262600 }, { "epoch": 2.58, "grad_norm": 4.921104431152344, "learning_rate": 3.746328457803331e-06, "loss": 0.2203, "step": 262625 }, { "epoch": 2.58, "grad_norm": 29.07013511657715, "learning_rate": 3.746204335349082e-06, "loss": 0.1651, "step": 262650 }, { "epoch": 2.58, "grad_norm": 5.34168004989624, "learning_rate": 3.746080212894834e-06, "loss": 0.2025, "step": 262675 }, { "epoch": 2.58, "grad_norm": 9.228187561035156, "learning_rate": 3.7459560904405854e-06, "loss": 0.1305, "step": 262700 }, { "epoch": 2.58, "grad_norm": 6.417633533477783, "learning_rate": 3.745831967986337e-06, "loss": 0.2104, "step": 262725 }, { "epoch": 2.58, "grad_norm": 11.162362098693848, "learning_rate": 3.7457078455320883e-06, "loss": 0.0959, "step": 262750 }, { "epoch": 2.58, "grad_norm": 9.500197410583496, "learning_rate": 3.7455837230778403e-06, "loss": 0.2025, "step": 262775 }, { "epoch": 2.58, "grad_norm": 10.161698341369629, "learning_rate": 3.7454596006235915e-06, "loss": 0.1412, "step": 262800 }, { "epoch": 2.58, "grad_norm": 1.2249926328659058, "learning_rate": 3.7453354781693428e-06, "loss": 0.1954, "step": 262825 }, { "epoch": 2.58, "grad_norm": 11.499341011047363, "learning_rate": 3.745211355715095e-06, "loss": 0.1264, "step": 262850 }, { "epoch": 2.58, "grad_norm": 3.200472116470337, "learning_rate": 3.745087233260846e-06, "loss": 0.2063, "step": 262875 }, { "epoch": 2.58, "grad_norm": 11.081083297729492, "learning_rate": 3.7449631108065977e-06, "loss": 0.1397, "step": 262900 }, { "epoch": 2.59, "grad_norm": 4.484785556793213, "learning_rate": 3.744838988352349e-06, "loss": 0.2127, "step": 262925 }, { "epoch": 2.59, "grad_norm": 13.709066390991211, "learning_rate": 3.744714865898101e-06, "loss": 0.1449, "step": 262950 }, { "epoch": 2.59, "grad_norm": 4.805722713470459, "learning_rate": 3.744590743443852e-06, "loss": 0.2083, "step": 262975 }, { "epoch": 2.59, "grad_norm": 22.94837760925293, "learning_rate": 3.7444666209896038e-06, "loss": 0.1473, "step": 263000 }, { "epoch": 2.59, "grad_norm": 4.7862982749938965, "learning_rate": 3.7443424985353554e-06, "loss": 0.2185, "step": 263025 }, { "epoch": 2.59, "grad_norm": 11.467313766479492, "learning_rate": 3.744218376081107e-06, "loss": 0.1794, "step": 263050 }, { "epoch": 2.59, "grad_norm": 5.6343889236450195, "learning_rate": 3.7440942536268583e-06, "loss": 0.2226, "step": 263075 }, { "epoch": 2.59, "grad_norm": 9.702984809875488, "learning_rate": 3.7439701311726103e-06, "loss": 0.1547, "step": 263100 }, { "epoch": 2.59, "grad_norm": 0.6476994156837463, "learning_rate": 3.7438460087183615e-06, "loss": 0.2293, "step": 263125 }, { "epoch": 2.59, "grad_norm": 8.515168190002441, "learning_rate": 3.7437218862641127e-06, "loss": 0.15, "step": 263150 }, { "epoch": 2.59, "grad_norm": 9.874588012695312, "learning_rate": 3.743597763809865e-06, "loss": 0.2388, "step": 263175 }, { "epoch": 2.59, "grad_norm": 12.000236511230469, "learning_rate": 3.743473641355616e-06, "loss": 0.127, "step": 263200 }, { "epoch": 2.59, "grad_norm": 5.049372673034668, "learning_rate": 3.7433495189013676e-06, "loss": 0.2039, "step": 263225 }, { "epoch": 2.59, "grad_norm": 13.378734588623047, "learning_rate": 3.7432253964471193e-06, "loss": 0.1133, "step": 263250 }, { "epoch": 2.59, "grad_norm": 3.2256357669830322, "learning_rate": 3.743101273992871e-06, "loss": 0.2363, "step": 263275 }, { "epoch": 2.59, "grad_norm": 13.748591423034668, "learning_rate": 3.742977151538622e-06, "loss": 0.145, "step": 263300 }, { "epoch": 2.59, "grad_norm": 7.292609691619873, "learning_rate": 3.742853029084374e-06, "loss": 0.2319, "step": 263325 }, { "epoch": 2.59, "grad_norm": 5.1667938232421875, "learning_rate": 3.7427289066301254e-06, "loss": 0.1033, "step": 263350 }, { "epoch": 2.59, "grad_norm": 1.943194031715393, "learning_rate": 3.7426047841758766e-06, "loss": 0.2607, "step": 263375 }, { "epoch": 2.59, "grad_norm": 17.2710018157959, "learning_rate": 3.7424806617216287e-06, "loss": 0.1095, "step": 263400 }, { "epoch": 2.59, "grad_norm": 4.068770408630371, "learning_rate": 3.74235653926738e-06, "loss": 0.2147, "step": 263425 }, { "epoch": 2.59, "grad_norm": 10.154881477355957, "learning_rate": 3.7422324168131315e-06, "loss": 0.1459, "step": 263450 }, { "epoch": 2.59, "grad_norm": 5.918961048126221, "learning_rate": 3.742108294358883e-06, "loss": 0.175, "step": 263475 }, { "epoch": 2.59, "grad_norm": 12.2582426071167, "learning_rate": 3.7419841719046348e-06, "loss": 0.132, "step": 263500 }, { "epoch": 2.59, "grad_norm": 3.430776596069336, "learning_rate": 3.741860049450386e-06, "loss": 0.2113, "step": 263525 }, { "epoch": 2.59, "grad_norm": 7.224241733551025, "learning_rate": 3.741735926996138e-06, "loss": 0.1388, "step": 263550 }, { "epoch": 2.59, "grad_norm": 5.953498363494873, "learning_rate": 3.7416118045418893e-06, "loss": 0.248, "step": 263575 }, { "epoch": 2.59, "grad_norm": 12.60918140411377, "learning_rate": 3.7414876820876405e-06, "loss": 0.1339, "step": 263600 }, { "epoch": 2.59, "grad_norm": 5.250988960266113, "learning_rate": 3.7413635596333925e-06, "loss": 0.198, "step": 263625 }, { "epoch": 2.59, "grad_norm": 21.5794677734375, "learning_rate": 3.7412394371791437e-06, "loss": 0.1524, "step": 263650 }, { "epoch": 2.59, "grad_norm": 1.024815559387207, "learning_rate": 3.7411153147248954e-06, "loss": 0.2166, "step": 263675 }, { "epoch": 2.59, "grad_norm": 7.0814056396484375, "learning_rate": 3.740991192270647e-06, "loss": 0.1564, "step": 263700 }, { "epoch": 2.59, "grad_norm": 4.611361503601074, "learning_rate": 3.7408720347145685e-06, "loss": 0.232, "step": 263725 }, { "epoch": 2.59, "grad_norm": 13.806999206542969, "learning_rate": 3.7407479122603197e-06, "loss": 0.1471, "step": 263750 }, { "epoch": 2.59, "grad_norm": 4.40524959564209, "learning_rate": 3.7406237898060717e-06, "loss": 0.2401, "step": 263775 }, { "epoch": 2.59, "grad_norm": 12.144730567932129, "learning_rate": 3.740499667351823e-06, "loss": 0.0784, "step": 263800 }, { "epoch": 2.59, "grad_norm": 2.4561150074005127, "learning_rate": 3.740375544897574e-06, "loss": 0.2283, "step": 263825 }, { "epoch": 2.59, "grad_norm": 12.823722839355469, "learning_rate": 3.7402514224433262e-06, "loss": 0.1176, "step": 263850 }, { "epoch": 2.59, "grad_norm": 5.840722560882568, "learning_rate": 3.7401272999890774e-06, "loss": 0.2095, "step": 263875 }, { "epoch": 2.59, "grad_norm": 14.053350448608398, "learning_rate": 3.740003177534829e-06, "loss": 0.1097, "step": 263900 }, { "epoch": 2.59, "grad_norm": 4.905303478240967, "learning_rate": 3.7398790550805807e-06, "loss": 0.2375, "step": 263925 }, { "epoch": 2.6, "grad_norm": 8.702226638793945, "learning_rate": 3.7397549326263323e-06, "loss": 0.1399, "step": 263950 }, { "epoch": 2.6, "grad_norm": 8.595465660095215, "learning_rate": 3.7396308101720835e-06, "loss": 0.2169, "step": 263975 }, { "epoch": 2.6, "grad_norm": 12.483504295349121, "learning_rate": 3.7395066877178356e-06, "loss": 0.176, "step": 264000 }, { "epoch": 2.6, "grad_norm": 5.113598823547363, "learning_rate": 3.739382565263587e-06, "loss": 0.2205, "step": 264025 }, { "epoch": 2.6, "grad_norm": 18.067577362060547, "learning_rate": 3.739258442809338e-06, "loss": 0.1352, "step": 264050 }, { "epoch": 2.6, "grad_norm": 3.480450391769409, "learning_rate": 3.73913432035509e-06, "loss": 0.207, "step": 264075 }, { "epoch": 2.6, "grad_norm": 10.44619369506836, "learning_rate": 3.7390101979008413e-06, "loss": 0.1409, "step": 264100 }, { "epoch": 2.6, "grad_norm": 1.735513687133789, "learning_rate": 3.738886075446593e-06, "loss": 0.2274, "step": 264125 }, { "epoch": 2.6, "grad_norm": 6.293200492858887, "learning_rate": 3.7387619529923446e-06, "loss": 0.1516, "step": 264150 }, { "epoch": 2.6, "grad_norm": 6.733705997467041, "learning_rate": 3.738637830538096e-06, "loss": 0.2231, "step": 264175 }, { "epoch": 2.6, "grad_norm": 11.539763450622559, "learning_rate": 3.7385137080838474e-06, "loss": 0.148, "step": 264200 }, { "epoch": 2.6, "grad_norm": 3.626838445663452, "learning_rate": 3.7383895856295995e-06, "loss": 0.2026, "step": 264225 }, { "epoch": 2.6, "grad_norm": 11.701671600341797, "learning_rate": 3.7382654631753507e-06, "loss": 0.1351, "step": 264250 }, { "epoch": 2.6, "grad_norm": 0.6863385438919067, "learning_rate": 3.738141340721102e-06, "loss": 0.1582, "step": 264275 }, { "epoch": 2.6, "grad_norm": 15.563055992126465, "learning_rate": 3.7380172182668535e-06, "loss": 0.0921, "step": 264300 }, { "epoch": 2.6, "grad_norm": 3.0822880268096924, "learning_rate": 3.737893095812605e-06, "loss": 0.1859, "step": 264325 }, { "epoch": 2.6, "grad_norm": 10.756817817687988, "learning_rate": 3.737768973358357e-06, "loss": 0.1453, "step": 264350 }, { "epoch": 2.6, "grad_norm": 9.604659080505371, "learning_rate": 3.737644850904108e-06, "loss": 0.2138, "step": 264375 }, { "epoch": 2.6, "grad_norm": 13.369630813598633, "learning_rate": 3.73752072844986e-06, "loss": 0.1212, "step": 264400 }, { "epoch": 2.6, "grad_norm": 1.1882888078689575, "learning_rate": 3.7373966059956113e-06, "loss": 0.1948, "step": 264425 }, { "epoch": 2.6, "grad_norm": 14.679040908813477, "learning_rate": 3.7372724835413625e-06, "loss": 0.1589, "step": 264450 }, { "epoch": 2.6, "grad_norm": 8.193826675415039, "learning_rate": 3.7371483610871145e-06, "loss": 0.2304, "step": 264475 }, { "epoch": 2.6, "grad_norm": 12.786247253417969, "learning_rate": 3.7370242386328658e-06, "loss": 0.1274, "step": 264500 }, { "epoch": 2.6, "grad_norm": 12.944596290588379, "learning_rate": 3.7369001161786174e-06, "loss": 0.2085, "step": 264525 }, { "epoch": 2.6, "grad_norm": 13.649617195129395, "learning_rate": 3.736775993724369e-06, "loss": 0.1279, "step": 264550 }, { "epoch": 2.6, "grad_norm": 1.390463948249817, "learning_rate": 3.7366518712701207e-06, "loss": 0.1905, "step": 264575 }, { "epoch": 2.6, "grad_norm": 10.268566131591797, "learning_rate": 3.736527748815872e-06, "loss": 0.1139, "step": 264600 }, { "epoch": 2.6, "grad_norm": 5.115251541137695, "learning_rate": 3.736403626361624e-06, "loss": 0.2326, "step": 264625 }, { "epoch": 2.6, "grad_norm": 12.072633743286133, "learning_rate": 3.736279503907375e-06, "loss": 0.1306, "step": 264650 }, { "epoch": 2.6, "grad_norm": 2.9199893474578857, "learning_rate": 3.7361553814531264e-06, "loss": 0.2441, "step": 264675 }, { "epoch": 2.6, "grad_norm": 14.949310302734375, "learning_rate": 3.7360312589988784e-06, "loss": 0.1231, "step": 264700 }, { "epoch": 2.6, "grad_norm": 7.142917633056641, "learning_rate": 3.7359071365446296e-06, "loss": 0.2445, "step": 264725 }, { "epoch": 2.6, "grad_norm": 11.85176944732666, "learning_rate": 3.7357830140903813e-06, "loss": 0.1423, "step": 264750 }, { "epoch": 2.6, "grad_norm": 3.3821818828582764, "learning_rate": 3.735658891636133e-06, "loss": 0.1965, "step": 264775 }, { "epoch": 2.6, "grad_norm": 9.66645336151123, "learning_rate": 3.7355347691818845e-06, "loss": 0.1116, "step": 264800 }, { "epoch": 2.6, "grad_norm": 4.607944965362549, "learning_rate": 3.7354106467276357e-06, "loss": 0.1954, "step": 264825 }, { "epoch": 2.6, "grad_norm": 17.103235244750977, "learning_rate": 3.735286524273388e-06, "loss": 0.1116, "step": 264850 }, { "epoch": 2.6, "grad_norm": 3.4819905757904053, "learning_rate": 3.735162401819139e-06, "loss": 0.2252, "step": 264875 }, { "epoch": 2.6, "grad_norm": 9.340963363647461, "learning_rate": 3.7350382793648902e-06, "loss": 0.1824, "step": 264900 }, { "epoch": 2.6, "grad_norm": 3.9580237865448, "learning_rate": 3.7349141569106423e-06, "loss": 0.2563, "step": 264925 }, { "epoch": 2.61, "grad_norm": 8.982805252075195, "learning_rate": 3.7347900344563935e-06, "loss": 0.0957, "step": 264950 }, { "epoch": 2.61, "grad_norm": 7.187701225280762, "learning_rate": 3.734665912002145e-06, "loss": 0.232, "step": 264975 }, { "epoch": 2.61, "grad_norm": 9.544981956481934, "learning_rate": 3.7345417895478968e-06, "loss": 0.129, "step": 265000 }, { "epoch": 2.61, "grad_norm": 5.725057601928711, "learning_rate": 3.7344176670936484e-06, "loss": 0.2326, "step": 265025 }, { "epoch": 2.61, "grad_norm": 14.592304229736328, "learning_rate": 3.7342935446393996e-06, "loss": 0.1323, "step": 265050 }, { "epoch": 2.61, "grad_norm": 5.094920635223389, "learning_rate": 3.7341694221851517e-06, "loss": 0.2017, "step": 265075 }, { "epoch": 2.61, "grad_norm": 15.41443157196045, "learning_rate": 3.734045299730903e-06, "loss": 0.1265, "step": 265100 }, { "epoch": 2.61, "grad_norm": 9.661003112792969, "learning_rate": 3.733921177276654e-06, "loss": 0.2229, "step": 265125 }, { "epoch": 2.61, "grad_norm": 10.279143333435059, "learning_rate": 3.733797054822406e-06, "loss": 0.1541, "step": 265150 }, { "epoch": 2.61, "grad_norm": 4.242587089538574, "learning_rate": 3.7336729323681574e-06, "loss": 0.2265, "step": 265175 }, { "epoch": 2.61, "grad_norm": 14.003304481506348, "learning_rate": 3.733548809913909e-06, "loss": 0.1261, "step": 265200 }, { "epoch": 2.61, "grad_norm": 4.269500255584717, "learning_rate": 3.73342468745966e-06, "loss": 0.2065, "step": 265225 }, { "epoch": 2.61, "grad_norm": 11.418456077575684, "learning_rate": 3.7333005650054123e-06, "loss": 0.1112, "step": 265250 }, { "epoch": 2.61, "grad_norm": 5.552100658416748, "learning_rate": 3.7331764425511635e-06, "loss": 0.2123, "step": 265275 }, { "epoch": 2.61, "grad_norm": 6.690584659576416, "learning_rate": 3.7330523200969147e-06, "loss": 0.1262, "step": 265300 }, { "epoch": 2.61, "grad_norm": 3.857114791870117, "learning_rate": 3.7329281976426667e-06, "loss": 0.213, "step": 265325 }, { "epoch": 2.61, "grad_norm": 14.555258750915527, "learning_rate": 3.732804075188418e-06, "loss": 0.1026, "step": 265350 }, { "epoch": 2.61, "grad_norm": 8.23718547821045, "learning_rate": 3.7326799527341696e-06, "loss": 0.2014, "step": 265375 }, { "epoch": 2.61, "grad_norm": 7.788849830627441, "learning_rate": 3.7325558302799212e-06, "loss": 0.1108, "step": 265400 }, { "epoch": 2.61, "grad_norm": 2.1420655250549316, "learning_rate": 3.732431707825673e-06, "loss": 0.235, "step": 265425 }, { "epoch": 2.61, "grad_norm": 3.159132480621338, "learning_rate": 3.732307585371424e-06, "loss": 0.1162, "step": 265450 }, { "epoch": 2.61, "grad_norm": 28.289989471435547, "learning_rate": 3.732183462917176e-06, "loss": 0.2179, "step": 265475 }, { "epoch": 2.61, "grad_norm": 10.595142364501953, "learning_rate": 3.7320593404629273e-06, "loss": 0.1376, "step": 265500 }, { "epoch": 2.61, "grad_norm": 6.553267955780029, "learning_rate": 3.7319352180086786e-06, "loss": 0.206, "step": 265525 }, { "epoch": 2.61, "grad_norm": 12.330270767211914, "learning_rate": 3.7318110955544306e-06, "loss": 0.1095, "step": 265550 }, { "epoch": 2.61, "grad_norm": 5.980013370513916, "learning_rate": 3.731686973100182e-06, "loss": 0.2342, "step": 265575 }, { "epoch": 2.61, "grad_norm": 10.661259651184082, "learning_rate": 3.7315628506459335e-06, "loss": 0.1064, "step": 265600 }, { "epoch": 2.61, "grad_norm": 5.562642574310303, "learning_rate": 3.731438728191685e-06, "loss": 0.2261, "step": 265625 }, { "epoch": 2.61, "grad_norm": 7.2048869132995605, "learning_rate": 3.7313146057374367e-06, "loss": 0.1241, "step": 265650 }, { "epoch": 2.61, "grad_norm": 6.270697116851807, "learning_rate": 3.731190483283188e-06, "loss": 0.1909, "step": 265675 }, { "epoch": 2.61, "grad_norm": 7.068108081817627, "learning_rate": 3.73106636082894e-06, "loss": 0.1262, "step": 265700 }, { "epoch": 2.61, "grad_norm": 0.48324263095855713, "learning_rate": 3.7309422383746912e-06, "loss": 0.2299, "step": 265725 }, { "epoch": 2.61, "grad_norm": 17.891773223876953, "learning_rate": 3.7308181159204424e-06, "loss": 0.1293, "step": 265750 }, { "epoch": 2.61, "grad_norm": 2.3057024478912354, "learning_rate": 3.7306939934661945e-06, "loss": 0.2352, "step": 265775 }, { "epoch": 2.61, "grad_norm": 7.9578728675842285, "learning_rate": 3.7305698710119457e-06, "loss": 0.1295, "step": 265800 }, { "epoch": 2.61, "grad_norm": 5.730440616607666, "learning_rate": 3.7304457485576973e-06, "loss": 0.222, "step": 265825 }, { "epoch": 2.61, "grad_norm": 15.968916893005371, "learning_rate": 3.730321626103449e-06, "loss": 0.1147, "step": 265850 }, { "epoch": 2.61, "grad_norm": 2.497039556503296, "learning_rate": 3.7301975036492006e-06, "loss": 0.2038, "step": 265875 }, { "epoch": 2.61, "grad_norm": 16.502452850341797, "learning_rate": 3.730073381194952e-06, "loss": 0.1188, "step": 265900 }, { "epoch": 2.61, "grad_norm": 4.396456718444824, "learning_rate": 3.729949258740704e-06, "loss": 0.2271, "step": 265925 }, { "epoch": 2.61, "grad_norm": 14.532909393310547, "learning_rate": 3.729825136286455e-06, "loss": 0.1412, "step": 265950 }, { "epoch": 2.62, "grad_norm": 4.310069561004639, "learning_rate": 3.7297059787303765e-06, "loss": 0.2479, "step": 265975 }, { "epoch": 2.62, "grad_norm": 12.017084121704102, "learning_rate": 3.729581856276128e-06, "loss": 0.1308, "step": 266000 }, { "epoch": 2.62, "grad_norm": 1.7771027088165283, "learning_rate": 3.7294577338218794e-06, "loss": 0.1969, "step": 266025 }, { "epoch": 2.62, "grad_norm": 11.229576110839844, "learning_rate": 3.729333611367631e-06, "loss": 0.1215, "step": 266050 }, { "epoch": 2.62, "grad_norm": 3.825361490249634, "learning_rate": 3.7292094889133826e-06, "loss": 0.2612, "step": 266075 }, { "epoch": 2.62, "grad_norm": 15.606451034545898, "learning_rate": 3.7290853664591343e-06, "loss": 0.1006, "step": 266100 }, { "epoch": 2.62, "grad_norm": 5.707878112792969, "learning_rate": 3.7289612440048855e-06, "loss": 0.2072, "step": 266125 }, { "epoch": 2.62, "grad_norm": 11.310408592224121, "learning_rate": 3.7288371215506376e-06, "loss": 0.1317, "step": 266150 }, { "epoch": 2.62, "grad_norm": 5.317818641662598, "learning_rate": 3.7287129990963888e-06, "loss": 0.2119, "step": 266175 }, { "epoch": 2.62, "grad_norm": 11.453119277954102, "learning_rate": 3.7285888766421404e-06, "loss": 0.1446, "step": 266200 }, { "epoch": 2.62, "grad_norm": 0.7865216135978699, "learning_rate": 3.728464754187892e-06, "loss": 0.2398, "step": 266225 }, { "epoch": 2.62, "grad_norm": 8.858484268188477, "learning_rate": 3.7283406317336437e-06, "loss": 0.1179, "step": 266250 }, { "epoch": 2.62, "grad_norm": 7.694440841674805, "learning_rate": 3.728216509279395e-06, "loss": 0.2292, "step": 266275 }, { "epoch": 2.62, "grad_norm": 11.17662525177002, "learning_rate": 3.728092386825147e-06, "loss": 0.103, "step": 266300 }, { "epoch": 2.62, "grad_norm": 3.4659080505371094, "learning_rate": 3.727968264370898e-06, "loss": 0.2218, "step": 266325 }, { "epoch": 2.62, "grad_norm": 15.931214332580566, "learning_rate": 3.7278441419166494e-06, "loss": 0.1255, "step": 266350 }, { "epoch": 2.62, "grad_norm": 2.1093590259552, "learning_rate": 3.7277200194624014e-06, "loss": 0.2328, "step": 266375 }, { "epoch": 2.62, "grad_norm": 14.974214553833008, "learning_rate": 3.7275958970081526e-06, "loss": 0.1161, "step": 266400 }, { "epoch": 2.62, "grad_norm": 3.8595707416534424, "learning_rate": 3.7274717745539043e-06, "loss": 0.1885, "step": 266425 }, { "epoch": 2.62, "grad_norm": 8.880078315734863, "learning_rate": 3.727347652099656e-06, "loss": 0.1765, "step": 266450 }, { "epoch": 2.62, "grad_norm": 6.027508735656738, "learning_rate": 3.7272235296454075e-06, "loss": 0.219, "step": 266475 }, { "epoch": 2.62, "grad_norm": 11.900086402893066, "learning_rate": 3.7270994071911587e-06, "loss": 0.1289, "step": 266500 }, { "epoch": 2.62, "grad_norm": 2.126378059387207, "learning_rate": 3.726975284736911e-06, "loss": 0.2716, "step": 266525 }, { "epoch": 2.62, "grad_norm": 22.165449142456055, "learning_rate": 3.726851162282662e-06, "loss": 0.1048, "step": 266550 }, { "epoch": 2.62, "grad_norm": 7.016496658325195, "learning_rate": 3.7267270398284132e-06, "loss": 0.2135, "step": 266575 }, { "epoch": 2.62, "grad_norm": 10.88856029510498, "learning_rate": 3.726602917374165e-06, "loss": 0.1215, "step": 266600 }, { "epoch": 2.62, "grad_norm": 8.73658561706543, "learning_rate": 3.7264787949199165e-06, "loss": 0.2162, "step": 266625 }, { "epoch": 2.62, "grad_norm": 4.76532506942749, "learning_rate": 3.726354672465668e-06, "loss": 0.1189, "step": 266650 }, { "epoch": 2.62, "grad_norm": 6.34337854385376, "learning_rate": 3.7262305500114193e-06, "loss": 0.2401, "step": 266675 }, { "epoch": 2.62, "grad_norm": 12.57925796508789, "learning_rate": 3.7261064275571714e-06, "loss": 0.1296, "step": 266700 }, { "epoch": 2.62, "grad_norm": 5.720302581787109, "learning_rate": 3.7259823051029226e-06, "loss": 0.2444, "step": 266725 }, { "epoch": 2.62, "grad_norm": 6.620325565338135, "learning_rate": 3.725858182648674e-06, "loss": 0.0892, "step": 266750 }, { "epoch": 2.62, "grad_norm": 3.014331817626953, "learning_rate": 3.725734060194426e-06, "loss": 0.2034, "step": 266775 }, { "epoch": 2.62, "grad_norm": 5.416557788848877, "learning_rate": 3.725609937740177e-06, "loss": 0.1224, "step": 266800 }, { "epoch": 2.62, "grad_norm": 3.6787519454956055, "learning_rate": 3.7254858152859287e-06, "loss": 0.1946, "step": 266825 }, { "epoch": 2.62, "grad_norm": 9.782474517822266, "learning_rate": 3.7253616928316804e-06, "loss": 0.1119, "step": 266850 }, { "epoch": 2.62, "grad_norm": 24.056522369384766, "learning_rate": 3.725237570377432e-06, "loss": 0.2672, "step": 266875 }, { "epoch": 2.62, "grad_norm": 9.841761589050293, "learning_rate": 3.7251134479231832e-06, "loss": 0.0906, "step": 266900 }, { "epoch": 2.62, "grad_norm": 7.793657302856445, "learning_rate": 3.7249893254689353e-06, "loss": 0.2406, "step": 266925 }, { "epoch": 2.62, "grad_norm": 7.099705219268799, "learning_rate": 3.7248652030146865e-06, "loss": 0.1367, "step": 266950 }, { "epoch": 2.62, "grad_norm": 5.726408004760742, "learning_rate": 3.7247410805604377e-06, "loss": 0.223, "step": 266975 }, { "epoch": 2.63, "grad_norm": 4.774311542510986, "learning_rate": 3.7246169581061898e-06, "loss": 0.1239, "step": 267000 }, { "epoch": 2.63, "grad_norm": 6.325273513793945, "learning_rate": 3.724492835651941e-06, "loss": 0.1982, "step": 267025 }, { "epoch": 2.63, "grad_norm": 15.517959594726562, "learning_rate": 3.7243687131976926e-06, "loss": 0.1473, "step": 267050 }, { "epoch": 2.63, "grad_norm": 3.147263288497925, "learning_rate": 3.7242445907434442e-06, "loss": 0.183, "step": 267075 }, { "epoch": 2.63, "grad_norm": 13.968832969665527, "learning_rate": 3.724120468289196e-06, "loss": 0.1493, "step": 267100 }, { "epoch": 2.63, "grad_norm": 4.56804084777832, "learning_rate": 3.723996345834947e-06, "loss": 0.236, "step": 267125 }, { "epoch": 2.63, "grad_norm": 14.021903038024902, "learning_rate": 3.723872223380699e-06, "loss": 0.1502, "step": 267150 }, { "epoch": 2.63, "grad_norm": 7.321807861328125, "learning_rate": 3.7237481009264504e-06, "loss": 0.2586, "step": 267175 }, { "epoch": 2.63, "grad_norm": 10.569011688232422, "learning_rate": 3.7236239784722016e-06, "loss": 0.1146, "step": 267200 }, { "epoch": 2.63, "grad_norm": 1.3845335245132446, "learning_rate": 3.7234998560179536e-06, "loss": 0.193, "step": 267225 }, { "epoch": 2.63, "grad_norm": 8.353403091430664, "learning_rate": 3.723375733563705e-06, "loss": 0.1181, "step": 267250 }, { "epoch": 2.63, "grad_norm": 1.9202316999435425, "learning_rate": 3.7232516111094565e-06, "loss": 0.1906, "step": 267275 }, { "epoch": 2.63, "grad_norm": 8.802103996276855, "learning_rate": 3.723127488655208e-06, "loss": 0.1422, "step": 267300 }, { "epoch": 2.63, "grad_norm": 3.9059982299804688, "learning_rate": 3.7230033662009597e-06, "loss": 0.2297, "step": 267325 }, { "epoch": 2.63, "grad_norm": 8.107209205627441, "learning_rate": 3.722879243746711e-06, "loss": 0.1187, "step": 267350 }, { "epoch": 2.63, "grad_norm": 9.495769500732422, "learning_rate": 3.722755121292463e-06, "loss": 0.2674, "step": 267375 }, { "epoch": 2.63, "grad_norm": 11.604856491088867, "learning_rate": 3.7226309988382142e-06, "loss": 0.1381, "step": 267400 }, { "epoch": 2.63, "grad_norm": 7.847933292388916, "learning_rate": 3.7225068763839654e-06, "loss": 0.2352, "step": 267425 }, { "epoch": 2.63, "grad_norm": 14.443227767944336, "learning_rate": 3.722382753929717e-06, "loss": 0.1498, "step": 267450 }, { "epoch": 2.63, "grad_norm": 4.763205528259277, "learning_rate": 3.7222586314754687e-06, "loss": 0.2104, "step": 267475 }, { "epoch": 2.63, "grad_norm": 4.608799934387207, "learning_rate": 3.7221345090212203e-06, "loss": 0.1298, "step": 267500 }, { "epoch": 2.63, "grad_norm": 5.5472846031188965, "learning_rate": 3.7220103865669715e-06, "loss": 0.2343, "step": 267525 }, { "epoch": 2.63, "grad_norm": 12.30190658569336, "learning_rate": 3.7218862641127236e-06, "loss": 0.1456, "step": 267550 }, { "epoch": 2.63, "grad_norm": 3.53528094291687, "learning_rate": 3.721762141658475e-06, "loss": 0.2564, "step": 267575 }, { "epoch": 2.63, "grad_norm": 6.105391502380371, "learning_rate": 3.721638019204226e-06, "loss": 0.1371, "step": 267600 }, { "epoch": 2.63, "grad_norm": 4.182328224182129, "learning_rate": 3.721513896749978e-06, "loss": 0.2433, "step": 267625 }, { "epoch": 2.63, "grad_norm": 11.701815605163574, "learning_rate": 3.7213897742957293e-06, "loss": 0.1342, "step": 267650 }, { "epoch": 2.63, "grad_norm": 5.710608005523682, "learning_rate": 3.721265651841481e-06, "loss": 0.2293, "step": 267675 }, { "epoch": 2.63, "grad_norm": 9.91317367553711, "learning_rate": 3.7211415293872326e-06, "loss": 0.1228, "step": 267700 }, { "epoch": 2.63, "grad_norm": 4.618306636810303, "learning_rate": 3.721017406932984e-06, "loss": 0.2149, "step": 267725 }, { "epoch": 2.63, "grad_norm": 12.468664169311523, "learning_rate": 3.7208932844787354e-06, "loss": 0.1202, "step": 267750 }, { "epoch": 2.63, "grad_norm": 3.5532591342926025, "learning_rate": 3.7207691620244875e-06, "loss": 0.2303, "step": 267775 }, { "epoch": 2.63, "grad_norm": 10.300922393798828, "learning_rate": 3.7206450395702387e-06, "loss": 0.1239, "step": 267800 }, { "epoch": 2.63, "grad_norm": 0.0658794716000557, "learning_rate": 3.72052091711599e-06, "loss": 0.1976, "step": 267825 }, { "epoch": 2.63, "grad_norm": 14.349509239196777, "learning_rate": 3.720396794661742e-06, "loss": 0.1501, "step": 267850 }, { "epoch": 2.63, "grad_norm": 4.137950897216797, "learning_rate": 3.720272672207493e-06, "loss": 0.253, "step": 267875 }, { "epoch": 2.63, "grad_norm": 14.068713188171387, "learning_rate": 3.720148549753245e-06, "loss": 0.1349, "step": 267900 }, { "epoch": 2.63, "grad_norm": 2.4827170372009277, "learning_rate": 3.7200244272989964e-06, "loss": 0.1847, "step": 267925 }, { "epoch": 2.63, "grad_norm": 6.7530670166015625, "learning_rate": 3.719900304844748e-06, "loss": 0.1487, "step": 267950 }, { "epoch": 2.63, "grad_norm": 3.0819594860076904, "learning_rate": 3.7197811472886695e-06, "loss": 0.2061, "step": 267975 }, { "epoch": 2.64, "grad_norm": 7.310165882110596, "learning_rate": 3.719657024834421e-06, "loss": 0.1046, "step": 268000 }, { "epoch": 2.64, "grad_norm": 1.0152050256729126, "learning_rate": 3.7195329023801724e-06, "loss": 0.1991, "step": 268025 }, { "epoch": 2.64, "grad_norm": 10.148322105407715, "learning_rate": 3.7194087799259236e-06, "loss": 0.1264, "step": 268050 }, { "epoch": 2.64, "grad_norm": 0.27724236249923706, "learning_rate": 3.7192846574716756e-06, "loss": 0.2204, "step": 268075 }, { "epoch": 2.64, "grad_norm": 10.203027725219727, "learning_rate": 3.719160535017427e-06, "loss": 0.1332, "step": 268100 }, { "epoch": 2.64, "grad_norm": 6.381344318389893, "learning_rate": 3.7190364125631785e-06, "loss": 0.1857, "step": 268125 }, { "epoch": 2.64, "grad_norm": 11.006593704223633, "learning_rate": 3.71891229010893e-06, "loss": 0.1096, "step": 268150 }, { "epoch": 2.64, "grad_norm": 4.863068103790283, "learning_rate": 3.7187881676546818e-06, "loss": 0.2268, "step": 268175 }, { "epoch": 2.64, "grad_norm": 10.41195297241211, "learning_rate": 3.718664045200433e-06, "loss": 0.1514, "step": 268200 }, { "epoch": 2.64, "grad_norm": 2.9646923542022705, "learning_rate": 3.718539922746185e-06, "loss": 0.2429, "step": 268225 }, { "epoch": 2.64, "grad_norm": 21.31376075744629, "learning_rate": 3.7184158002919362e-06, "loss": 0.2107, "step": 268250 }, { "epoch": 2.64, "grad_norm": 5.161921501159668, "learning_rate": 3.7182916778376874e-06, "loss": 0.2789, "step": 268275 }, { "epoch": 2.64, "grad_norm": 11.203100204467773, "learning_rate": 3.7181675553834395e-06, "loss": 0.1166, "step": 268300 }, { "epoch": 2.64, "grad_norm": 3.0992910861968994, "learning_rate": 3.7180434329291907e-06, "loss": 0.2036, "step": 268325 }, { "epoch": 2.64, "grad_norm": 15.01816177368164, "learning_rate": 3.7179193104749424e-06, "loss": 0.1452, "step": 268350 }, { "epoch": 2.64, "grad_norm": 3.8100361824035645, "learning_rate": 3.717795188020694e-06, "loss": 0.1964, "step": 268375 }, { "epoch": 2.64, "grad_norm": 9.221975326538086, "learning_rate": 3.7176710655664456e-06, "loss": 0.1487, "step": 268400 }, { "epoch": 2.64, "grad_norm": 5.766631126403809, "learning_rate": 3.717546943112197e-06, "loss": 0.2218, "step": 268425 }, { "epoch": 2.64, "grad_norm": 16.30333709716797, "learning_rate": 3.717422820657949e-06, "loss": 0.143, "step": 268450 }, { "epoch": 2.64, "grad_norm": 4.388567924499512, "learning_rate": 3.7172986982037e-06, "loss": 0.2311, "step": 268475 }, { "epoch": 2.64, "grad_norm": 10.379419326782227, "learning_rate": 3.7171745757494513e-06, "loss": 0.1063, "step": 268500 }, { "epoch": 2.64, "grad_norm": 0.8279392719268799, "learning_rate": 3.7170504532952034e-06, "loss": 0.217, "step": 268525 }, { "epoch": 2.64, "grad_norm": 15.69168758392334, "learning_rate": 3.7169263308409546e-06, "loss": 0.1538, "step": 268550 }, { "epoch": 2.64, "grad_norm": 6.826336860656738, "learning_rate": 3.7168022083867062e-06, "loss": 0.2154, "step": 268575 }, { "epoch": 2.64, "grad_norm": 7.306760787963867, "learning_rate": 3.716678085932458e-06, "loss": 0.0917, "step": 268600 }, { "epoch": 2.64, "grad_norm": 8.483778953552246, "learning_rate": 3.7165539634782095e-06, "loss": 0.2255, "step": 268625 }, { "epoch": 2.64, "grad_norm": 10.887641906738281, "learning_rate": 3.7164298410239607e-06, "loss": 0.1577, "step": 268650 }, { "epoch": 2.64, "grad_norm": 7.159594535827637, "learning_rate": 3.7163057185697128e-06, "loss": 0.2076, "step": 268675 }, { "epoch": 2.64, "grad_norm": 8.315766334533691, "learning_rate": 3.716181596115464e-06, "loss": 0.1136, "step": 268700 }, { "epoch": 2.64, "grad_norm": 3.1266114711761475, "learning_rate": 3.716057473661215e-06, "loss": 0.215, "step": 268725 }, { "epoch": 2.64, "grad_norm": 11.281288146972656, "learning_rate": 3.7159333512069672e-06, "loss": 0.1224, "step": 268750 }, { "epoch": 2.64, "grad_norm": 6.814062595367432, "learning_rate": 3.7158092287527185e-06, "loss": 0.2144, "step": 268775 }, { "epoch": 2.64, "grad_norm": 9.737277030944824, "learning_rate": 3.71568510629847e-06, "loss": 0.1139, "step": 268800 }, { "epoch": 2.64, "grad_norm": 3.0176682472229004, "learning_rate": 3.7155609838442217e-06, "loss": 0.2914, "step": 268825 }, { "epoch": 2.64, "grad_norm": 14.206323623657227, "learning_rate": 3.7154368613899734e-06, "loss": 0.1335, "step": 268850 }, { "epoch": 2.64, "grad_norm": 1.9932454824447632, "learning_rate": 3.7153127389357246e-06, "loss": 0.2182, "step": 268875 }, { "epoch": 2.64, "grad_norm": 9.374287605285645, "learning_rate": 3.715188616481476e-06, "loss": 0.0899, "step": 268900 }, { "epoch": 2.64, "grad_norm": 5.458852291107178, "learning_rate": 3.715064494027228e-06, "loss": 0.2521, "step": 268925 }, { "epoch": 2.64, "grad_norm": 12.344718933105469, "learning_rate": 3.7149403715729795e-06, "loss": 0.1419, "step": 268950 }, { "epoch": 2.64, "grad_norm": 8.537222862243652, "learning_rate": 3.7148162491187307e-06, "loss": 0.239, "step": 268975 }, { "epoch": 2.64, "grad_norm": 0.6910183429718018, "learning_rate": 3.7146921266644827e-06, "loss": 0.1066, "step": 269000 }, { "epoch": 2.65, "grad_norm": 7.146838188171387, "learning_rate": 3.714568004210234e-06, "loss": 0.2075, "step": 269025 }, { "epoch": 2.65, "grad_norm": 7.679080009460449, "learning_rate": 3.714443881755985e-06, "loss": 0.1108, "step": 269050 }, { "epoch": 2.65, "grad_norm": 2.1279032230377197, "learning_rate": 3.7143197593017372e-06, "loss": 0.2204, "step": 269075 }, { "epoch": 2.65, "grad_norm": 10.543340682983398, "learning_rate": 3.7141956368474884e-06, "loss": 0.1362, "step": 269100 }, { "epoch": 2.65, "grad_norm": 4.020378589630127, "learning_rate": 3.71407151439324e-06, "loss": 0.2129, "step": 269125 }, { "epoch": 2.65, "grad_norm": 11.22330379486084, "learning_rate": 3.7139473919389917e-06, "loss": 0.1103, "step": 269150 }, { "epoch": 2.65, "grad_norm": 3.351773262023926, "learning_rate": 3.7138232694847433e-06, "loss": 0.2128, "step": 269175 }, { "epoch": 2.65, "grad_norm": 12.538805961608887, "learning_rate": 3.7136991470304946e-06, "loss": 0.1151, "step": 269200 }, { "epoch": 2.65, "grad_norm": 2.923248767852783, "learning_rate": 3.7135750245762466e-06, "loss": 0.2034, "step": 269225 }, { "epoch": 2.65, "grad_norm": 7.891654014587402, "learning_rate": 3.713450902121998e-06, "loss": 0.138, "step": 269250 }, { "epoch": 2.65, "grad_norm": 6.217968940734863, "learning_rate": 3.713326779667749e-06, "loss": 0.1981, "step": 269275 }, { "epoch": 2.65, "grad_norm": 11.399755477905273, "learning_rate": 3.713202657213501e-06, "loss": 0.1312, "step": 269300 }, { "epoch": 2.65, "grad_norm": 2.827399969100952, "learning_rate": 3.7130785347592523e-06, "loss": 0.2374, "step": 269325 }, { "epoch": 2.65, "grad_norm": 10.880547523498535, "learning_rate": 3.712954412305004e-06, "loss": 0.1432, "step": 269350 }, { "epoch": 2.65, "grad_norm": 1.755077838897705, "learning_rate": 3.7128302898507556e-06, "loss": 0.2302, "step": 269375 }, { "epoch": 2.65, "grad_norm": 8.347006797790527, "learning_rate": 3.712706167396507e-06, "loss": 0.1211, "step": 269400 }, { "epoch": 2.65, "grad_norm": 4.777340888977051, "learning_rate": 3.7125820449422584e-06, "loss": 0.2165, "step": 269425 }, { "epoch": 2.65, "grad_norm": 16.186281204223633, "learning_rate": 3.7124579224880105e-06, "loss": 0.1439, "step": 269450 }, { "epoch": 2.65, "grad_norm": 5.852056503295898, "learning_rate": 3.7123338000337617e-06, "loss": 0.1741, "step": 269475 }, { "epoch": 2.65, "grad_norm": 11.882834434509277, "learning_rate": 3.712209677579513e-06, "loss": 0.1165, "step": 269500 }, { "epoch": 2.65, "grad_norm": 1.337633490562439, "learning_rate": 3.712085555125265e-06, "loss": 0.2324, "step": 269525 }, { "epoch": 2.65, "grad_norm": 9.3213472366333, "learning_rate": 3.711961432671016e-06, "loss": 0.1345, "step": 269550 }, { "epoch": 2.65, "grad_norm": 4.241107940673828, "learning_rate": 3.711837310216768e-06, "loss": 0.2117, "step": 269575 }, { "epoch": 2.65, "grad_norm": 11.833989143371582, "learning_rate": 3.7117131877625194e-06, "loss": 0.1052, "step": 269600 }, { "epoch": 2.65, "grad_norm": 3.087089776992798, "learning_rate": 3.711589065308271e-06, "loss": 0.235, "step": 269625 }, { "epoch": 2.65, "grad_norm": 10.012373924255371, "learning_rate": 3.7114649428540223e-06, "loss": 0.1298, "step": 269650 }, { "epoch": 2.65, "grad_norm": 5.887344837188721, "learning_rate": 3.7113408203997743e-06, "loss": 0.2574, "step": 269675 }, { "epoch": 2.65, "grad_norm": 15.495054244995117, "learning_rate": 3.7112166979455256e-06, "loss": 0.1067, "step": 269700 }, { "epoch": 2.65, "grad_norm": 7.174941062927246, "learning_rate": 3.7110925754912768e-06, "loss": 0.2308, "step": 269725 }, { "epoch": 2.65, "grad_norm": 13.336481094360352, "learning_rate": 3.7109684530370284e-06, "loss": 0.1388, "step": 269750 }, { "epoch": 2.65, "grad_norm": 5.705691337585449, "learning_rate": 3.71084433058278e-06, "loss": 0.2301, "step": 269775 }, { "epoch": 2.65, "grad_norm": 9.353179931640625, "learning_rate": 3.7107202081285317e-06, "loss": 0.1218, "step": 269800 }, { "epoch": 2.65, "grad_norm": 1.9697275161743164, "learning_rate": 3.710596085674283e-06, "loss": 0.2615, "step": 269825 }, { "epoch": 2.65, "grad_norm": 8.916321754455566, "learning_rate": 3.710471963220035e-06, "loss": 0.1282, "step": 269850 }, { "epoch": 2.65, "grad_norm": 7.2752685546875, "learning_rate": 3.710347840765786e-06, "loss": 0.2255, "step": 269875 }, { "epoch": 2.65, "grad_norm": 8.819718360900879, "learning_rate": 3.7102237183115374e-06, "loss": 0.1599, "step": 269900 }, { "epoch": 2.65, "grad_norm": 4.434284210205078, "learning_rate": 3.7100995958572894e-06, "loss": 0.2826, "step": 269925 }, { "epoch": 2.65, "grad_norm": 11.206536293029785, "learning_rate": 3.7099754734030406e-06, "loss": 0.1092, "step": 269950 }, { "epoch": 2.65, "grad_norm": 4.995991230010986, "learning_rate": 3.7098513509487923e-06, "loss": 0.2204, "step": 269975 }, { "epoch": 2.65, "grad_norm": 13.192519187927246, "learning_rate": 3.709727228494544e-06, "loss": 0.1698, "step": 270000 }, { "epoch": 2.65, "grad_norm": 5.001618385314941, "learning_rate": 3.7096031060402955e-06, "loss": 0.1651, "step": 270025 }, { "epoch": 2.66, "grad_norm": 13.726956367492676, "learning_rate": 3.7094789835860468e-06, "loss": 0.1153, "step": 270050 }, { "epoch": 2.66, "grad_norm": 4.738820552825928, "learning_rate": 3.709354861131799e-06, "loss": 0.1794, "step": 270075 }, { "epoch": 2.66, "grad_norm": 11.29616641998291, "learning_rate": 3.70923073867755e-06, "loss": 0.1665, "step": 270100 }, { "epoch": 2.66, "grad_norm": 4.523717880249023, "learning_rate": 3.709111581121472e-06, "loss": 0.1996, "step": 270125 }, { "epoch": 2.66, "grad_norm": 10.06881046295166, "learning_rate": 3.708987458667223e-06, "loss": 0.1246, "step": 270150 }, { "epoch": 2.66, "grad_norm": 1.8597835302352905, "learning_rate": 3.7088633362129743e-06, "loss": 0.1849, "step": 270175 }, { "epoch": 2.66, "grad_norm": 4.538570404052734, "learning_rate": 3.7087392137587264e-06, "loss": 0.1294, "step": 270200 }, { "epoch": 2.66, "grad_norm": 3.840031385421753, "learning_rate": 3.7086150913044776e-06, "loss": 0.1888, "step": 270225 }, { "epoch": 2.66, "grad_norm": 10.720273971557617, "learning_rate": 3.7084909688502292e-06, "loss": 0.1115, "step": 270250 }, { "epoch": 2.66, "grad_norm": 6.183406352996826, "learning_rate": 3.708366846395981e-06, "loss": 0.2552, "step": 270275 }, { "epoch": 2.66, "grad_norm": 13.70215129852295, "learning_rate": 3.7082427239417325e-06, "loss": 0.1002, "step": 270300 }, { "epoch": 2.66, "grad_norm": 6.454707145690918, "learning_rate": 3.7081186014874837e-06, "loss": 0.2558, "step": 270325 }, { "epoch": 2.66, "grad_norm": 10.791616439819336, "learning_rate": 3.707994479033235e-06, "loss": 0.1058, "step": 270350 }, { "epoch": 2.66, "grad_norm": 6.702480316162109, "learning_rate": 3.707870356578987e-06, "loss": 0.1854, "step": 270375 }, { "epoch": 2.66, "grad_norm": 25.269502639770508, "learning_rate": 3.707746234124738e-06, "loss": 0.1483, "step": 270400 }, { "epoch": 2.66, "grad_norm": 7.561361312866211, "learning_rate": 3.70762211167049e-06, "loss": 0.1919, "step": 270425 }, { "epoch": 2.66, "grad_norm": 9.8367338180542, "learning_rate": 3.7074979892162415e-06, "loss": 0.1144, "step": 270450 }, { "epoch": 2.66, "grad_norm": 2.8709137439727783, "learning_rate": 3.707373866761993e-06, "loss": 0.1878, "step": 270475 }, { "epoch": 2.66, "grad_norm": 10.687017440795898, "learning_rate": 3.7072497443077443e-06, "loss": 0.1167, "step": 270500 }, { "epoch": 2.66, "grad_norm": 4.290029048919678, "learning_rate": 3.7071256218534964e-06, "loss": 0.2016, "step": 270525 }, { "epoch": 2.66, "grad_norm": 9.031669616699219, "learning_rate": 3.7070014993992476e-06, "loss": 0.1491, "step": 270550 }, { "epoch": 2.66, "grad_norm": 6.16087532043457, "learning_rate": 3.7068773769449988e-06, "loss": 0.2144, "step": 270575 }, { "epoch": 2.66, "grad_norm": 10.601402282714844, "learning_rate": 3.706753254490751e-06, "loss": 0.128, "step": 270600 }, { "epoch": 2.66, "grad_norm": 2.119030237197876, "learning_rate": 3.706629132036502e-06, "loss": 0.2044, "step": 270625 }, { "epoch": 2.66, "grad_norm": 14.997909545898438, "learning_rate": 3.7065050095822537e-06, "loss": 0.1053, "step": 270650 }, { "epoch": 2.66, "grad_norm": 9.561781883239746, "learning_rate": 3.7063808871280053e-06, "loss": 0.2519, "step": 270675 }, { "epoch": 2.66, "grad_norm": 11.243896484375, "learning_rate": 3.706256764673757e-06, "loss": 0.1372, "step": 270700 }, { "epoch": 2.66, "grad_norm": 6.260600566864014, "learning_rate": 3.706132642219508e-06, "loss": 0.2335, "step": 270725 }, { "epoch": 2.66, "grad_norm": 9.150700569152832, "learning_rate": 3.7060085197652602e-06, "loss": 0.1308, "step": 270750 }, { "epoch": 2.66, "grad_norm": 4.43092679977417, "learning_rate": 3.7058843973110114e-06, "loss": 0.2398, "step": 270775 }, { "epoch": 2.66, "grad_norm": 8.539558410644531, "learning_rate": 3.7057602748567627e-06, "loss": 0.1427, "step": 270800 }, { "epoch": 2.66, "grad_norm": 6.013651371002197, "learning_rate": 3.7056361524025147e-06, "loss": 0.2209, "step": 270825 }, { "epoch": 2.66, "grad_norm": 11.786882400512695, "learning_rate": 3.705512029948266e-06, "loss": 0.1007, "step": 270850 }, { "epoch": 2.66, "grad_norm": 2.7183704376220703, "learning_rate": 3.7053879074940176e-06, "loss": 0.1787, "step": 270875 }, { "epoch": 2.66, "grad_norm": 12.274431228637695, "learning_rate": 3.705263785039769e-06, "loss": 0.1353, "step": 270900 }, { "epoch": 2.66, "grad_norm": 6.187800407409668, "learning_rate": 3.705139662585521e-06, "loss": 0.2451, "step": 270925 }, { "epoch": 2.66, "grad_norm": 5.791947841644287, "learning_rate": 3.705015540131272e-06, "loss": 0.1385, "step": 270950 }, { "epoch": 2.66, "grad_norm": 4.8143815994262695, "learning_rate": 3.704891417677024e-06, "loss": 0.2505, "step": 270975 }, { "epoch": 2.66, "grad_norm": 10.724206924438477, "learning_rate": 3.7047672952227753e-06, "loss": 0.1346, "step": 271000 }, { "epoch": 2.66, "grad_norm": 10.372790336608887, "learning_rate": 3.7046431727685265e-06, "loss": 0.2155, "step": 271025 }, { "epoch": 2.67, "grad_norm": 12.457777976989746, "learning_rate": 3.7045190503142786e-06, "loss": 0.1382, "step": 271050 }, { "epoch": 2.67, "grad_norm": 3.457792043685913, "learning_rate": 3.70439492786003e-06, "loss": 0.2061, "step": 271075 }, { "epoch": 2.67, "grad_norm": 4.927308082580566, "learning_rate": 3.7042708054057814e-06, "loss": 0.1175, "step": 271100 }, { "epoch": 2.67, "grad_norm": 5.537426948547363, "learning_rate": 3.704146682951533e-06, "loss": 0.2443, "step": 271125 }, { "epoch": 2.67, "grad_norm": 12.521957397460938, "learning_rate": 3.7040225604972847e-06, "loss": 0.1216, "step": 271150 }, { "epoch": 2.67, "grad_norm": 6.426520824432373, "learning_rate": 3.703898438043036e-06, "loss": 0.2772, "step": 271175 }, { "epoch": 2.67, "grad_norm": 16.68511390686035, "learning_rate": 3.703774315588787e-06, "loss": 0.1249, "step": 271200 }, { "epoch": 2.67, "grad_norm": 7.309777736663818, "learning_rate": 3.703650193134539e-06, "loss": 0.2237, "step": 271225 }, { "epoch": 2.67, "grad_norm": 9.84567928314209, "learning_rate": 3.7035260706802904e-06, "loss": 0.1455, "step": 271250 }, { "epoch": 2.67, "grad_norm": 4.415724277496338, "learning_rate": 3.703401948226042e-06, "loss": 0.2708, "step": 271275 }, { "epoch": 2.67, "grad_norm": 13.887601852416992, "learning_rate": 3.7032778257717937e-06, "loss": 0.1387, "step": 271300 }, { "epoch": 2.67, "grad_norm": 5.501457691192627, "learning_rate": 3.7031537033175453e-06, "loss": 0.208, "step": 271325 }, { "epoch": 2.67, "grad_norm": 6.564538478851318, "learning_rate": 3.7030295808632965e-06, "loss": 0.1282, "step": 271350 }, { "epoch": 2.67, "grad_norm": 3.3770675659179688, "learning_rate": 3.7029054584090486e-06, "loss": 0.2328, "step": 271375 }, { "epoch": 2.67, "grad_norm": 4.883184909820557, "learning_rate": 3.7027813359547998e-06, "loss": 0.1308, "step": 271400 }, { "epoch": 2.67, "grad_norm": 6.387158393859863, "learning_rate": 3.702657213500551e-06, "loss": 0.2393, "step": 271425 }, { "epoch": 2.67, "grad_norm": 12.40465259552002, "learning_rate": 3.702533091046303e-06, "loss": 0.11, "step": 271450 }, { "epoch": 2.67, "grad_norm": 2.6177120208740234, "learning_rate": 3.7024089685920543e-06, "loss": 0.2349, "step": 271475 }, { "epoch": 2.67, "grad_norm": 12.608278274536133, "learning_rate": 3.702284846137806e-06, "loss": 0.1364, "step": 271500 }, { "epoch": 2.67, "grad_norm": 4.042668342590332, "learning_rate": 3.7021607236835575e-06, "loss": 0.1923, "step": 271525 }, { "epoch": 2.67, "grad_norm": 8.99612808227539, "learning_rate": 3.702036601229309e-06, "loss": 0.1336, "step": 271550 }, { "epoch": 2.67, "grad_norm": 4.854290008544922, "learning_rate": 3.7019124787750604e-06, "loss": 0.2463, "step": 271575 }, { "epoch": 2.67, "grad_norm": 17.835725784301758, "learning_rate": 3.7017883563208124e-06, "loss": 0.1459, "step": 271600 }, { "epoch": 2.67, "grad_norm": 6.616650104522705, "learning_rate": 3.7016642338665636e-06, "loss": 0.2446, "step": 271625 }, { "epoch": 2.67, "grad_norm": 10.565912246704102, "learning_rate": 3.701540111412315e-06, "loss": 0.1139, "step": 271650 }, { "epoch": 2.67, "grad_norm": 5.153144836425781, "learning_rate": 3.701415988958067e-06, "loss": 0.2014, "step": 271675 }, { "epoch": 2.67, "grad_norm": 8.792264938354492, "learning_rate": 3.701291866503818e-06, "loss": 0.1257, "step": 271700 }, { "epoch": 2.67, "grad_norm": 0.3920532763004303, "learning_rate": 3.7011677440495698e-06, "loss": 0.2275, "step": 271725 }, { "epoch": 2.67, "grad_norm": 11.800313949584961, "learning_rate": 3.7010436215953214e-06, "loss": 0.1365, "step": 271750 }, { "epoch": 2.67, "grad_norm": 1.936704397201538, "learning_rate": 3.700919499141073e-06, "loss": 0.2224, "step": 271775 }, { "epoch": 2.67, "grad_norm": 10.714700698852539, "learning_rate": 3.7007953766868242e-06, "loss": 0.1554, "step": 271800 }, { "epoch": 2.67, "grad_norm": 1.174049973487854, "learning_rate": 3.7006712542325763e-06, "loss": 0.2535, "step": 271825 }, { "epoch": 2.67, "grad_norm": 12.014025688171387, "learning_rate": 3.7005471317783275e-06, "loss": 0.1107, "step": 271850 }, { "epoch": 2.67, "grad_norm": 5.580353260040283, "learning_rate": 3.700423009324079e-06, "loss": 0.1948, "step": 271875 }, { "epoch": 2.67, "grad_norm": 16.66851234436035, "learning_rate": 3.7002988868698308e-06, "loss": 0.1655, "step": 271900 }, { "epoch": 2.67, "grad_norm": 5.166383266448975, "learning_rate": 3.7001747644155824e-06, "loss": 0.2043, "step": 271925 }, { "epoch": 2.67, "grad_norm": 10.646799087524414, "learning_rate": 3.7000506419613336e-06, "loss": 0.1221, "step": 271950 }, { "epoch": 2.67, "grad_norm": 4.4325175285339355, "learning_rate": 3.6999265195070857e-06, "loss": 0.2542, "step": 271975 }, { "epoch": 2.67, "grad_norm": 16.92889404296875, "learning_rate": 3.699802397052837e-06, "loss": 0.1298, "step": 272000 }, { "epoch": 2.67, "grad_norm": 3.2548956871032715, "learning_rate": 3.699678274598588e-06, "loss": 0.2013, "step": 272025 }, { "epoch": 2.67, "grad_norm": 6.970791339874268, "learning_rate": 3.6995541521443397e-06, "loss": 0.1193, "step": 272050 }, { "epoch": 2.68, "grad_norm": 2.7089319229125977, "learning_rate": 3.6994300296900914e-06, "loss": 0.2109, "step": 272075 }, { "epoch": 2.68, "grad_norm": 13.675244331359863, "learning_rate": 3.699305907235843e-06, "loss": 0.1407, "step": 272100 }, { "epoch": 2.68, "grad_norm": 4.266735076904297, "learning_rate": 3.6991817847815942e-06, "loss": 0.1671, "step": 272125 }, { "epoch": 2.68, "grad_norm": 13.872719764709473, "learning_rate": 3.6990576623273463e-06, "loss": 0.1421, "step": 272150 }, { "epoch": 2.68, "grad_norm": 6.019747734069824, "learning_rate": 3.6989335398730975e-06, "loss": 0.1997, "step": 272175 }, { "epoch": 2.68, "grad_norm": 11.462742805480957, "learning_rate": 3.6988094174188487e-06, "loss": 0.0917, "step": 272200 }, { "epoch": 2.68, "grad_norm": 6.250259876251221, "learning_rate": 3.6986852949646008e-06, "loss": 0.2092, "step": 272225 }, { "epoch": 2.68, "grad_norm": 10.985250473022461, "learning_rate": 3.698561172510352e-06, "loss": 0.1405, "step": 272250 }, { "epoch": 2.68, "grad_norm": 1.0487549304962158, "learning_rate": 3.6984370500561036e-06, "loss": 0.1966, "step": 272275 }, { "epoch": 2.68, "grad_norm": 16.612396240234375, "learning_rate": 3.6983129276018552e-06, "loss": 0.1108, "step": 272300 }, { "epoch": 2.68, "grad_norm": 3.5810375213623047, "learning_rate": 3.698188805147607e-06, "loss": 0.2021, "step": 272325 }, { "epoch": 2.68, "grad_norm": 17.27700424194336, "learning_rate": 3.698064682693358e-06, "loss": 0.1262, "step": 272350 }, { "epoch": 2.68, "grad_norm": 2.238398313522339, "learning_rate": 3.69794056023911e-06, "loss": 0.2459, "step": 272375 }, { "epoch": 2.68, "grad_norm": 10.902647018432617, "learning_rate": 3.6978164377848614e-06, "loss": 0.1332, "step": 272400 }, { "epoch": 2.68, "grad_norm": 2.132673740386963, "learning_rate": 3.6976923153306126e-06, "loss": 0.2135, "step": 272425 }, { "epoch": 2.68, "grad_norm": 10.123231887817383, "learning_rate": 3.6975681928763646e-06, "loss": 0.1246, "step": 272450 }, { "epoch": 2.68, "grad_norm": 0.059809815138578415, "learning_rate": 3.697444070422116e-06, "loss": 0.1845, "step": 272475 }, { "epoch": 2.68, "grad_norm": 10.079150199890137, "learning_rate": 3.6973199479678675e-06, "loss": 0.112, "step": 272500 }, { "epoch": 2.68, "grad_norm": 7.17829704284668, "learning_rate": 3.697195825513619e-06, "loss": 0.1908, "step": 272525 }, { "epoch": 2.68, "grad_norm": 10.619507789611816, "learning_rate": 3.6970717030593708e-06, "loss": 0.1799, "step": 272550 }, { "epoch": 2.68, "grad_norm": 4.273934364318848, "learning_rate": 3.696947580605122e-06, "loss": 0.2171, "step": 272575 }, { "epoch": 2.68, "grad_norm": 13.439780235290527, "learning_rate": 3.696823458150874e-06, "loss": 0.1127, "step": 272600 }, { "epoch": 2.68, "grad_norm": 5.716407299041748, "learning_rate": 3.6966993356966252e-06, "loss": 0.2267, "step": 272625 }, { "epoch": 2.68, "grad_norm": 7.998359203338623, "learning_rate": 3.6965752132423764e-06, "loss": 0.1347, "step": 272650 }, { "epoch": 2.68, "grad_norm": 3.9573605060577393, "learning_rate": 3.6964510907881285e-06, "loss": 0.1957, "step": 272675 }, { "epoch": 2.68, "grad_norm": 9.732440948486328, "learning_rate": 3.6963269683338797e-06, "loss": 0.133, "step": 272700 }, { "epoch": 2.68, "grad_norm": 1.9735310077667236, "learning_rate": 3.6962028458796313e-06, "loss": 0.2197, "step": 272725 }, { "epoch": 2.68, "grad_norm": 10.78640079498291, "learning_rate": 3.696078723425383e-06, "loss": 0.1354, "step": 272750 }, { "epoch": 2.68, "grad_norm": 7.097686767578125, "learning_rate": 3.6959546009711346e-06, "loss": 0.209, "step": 272775 }, { "epoch": 2.68, "grad_norm": 10.709075927734375, "learning_rate": 3.695830478516886e-06, "loss": 0.1321, "step": 272800 }, { "epoch": 2.68, "grad_norm": 1.8588160276412964, "learning_rate": 3.695706356062638e-06, "loss": 0.2473, "step": 272825 }, { "epoch": 2.68, "grad_norm": 7.162574291229248, "learning_rate": 3.695582233608389e-06, "loss": 0.1247, "step": 272850 }, { "epoch": 2.68, "grad_norm": 1.7117350101470947, "learning_rate": 3.6954581111541403e-06, "loss": 0.1972, "step": 272875 }, { "epoch": 2.68, "grad_norm": 9.846585273742676, "learning_rate": 3.695333988699892e-06, "loss": 0.1261, "step": 272900 }, { "epoch": 2.68, "grad_norm": 4.586402893066406, "learning_rate": 3.6952098662456436e-06, "loss": 0.2353, "step": 272925 }, { "epoch": 2.68, "grad_norm": 9.359145164489746, "learning_rate": 3.6950857437913952e-06, "loss": 0.0949, "step": 272950 }, { "epoch": 2.68, "grad_norm": 4.437508583068848, "learning_rate": 3.6949616213371464e-06, "loss": 0.2486, "step": 272975 }, { "epoch": 2.68, "grad_norm": 10.907161712646484, "learning_rate": 3.6948374988828985e-06, "loss": 0.1339, "step": 273000 }, { "epoch": 2.68, "grad_norm": 4.098989009857178, "learning_rate": 3.6947183413268195e-06, "loss": 0.2056, "step": 273025 }, { "epoch": 2.68, "grad_norm": 10.03680419921875, "learning_rate": 3.6945942188725716e-06, "loss": 0.1517, "step": 273050 }, { "epoch": 2.68, "grad_norm": 7.951204776763916, "learning_rate": 3.6944700964183228e-06, "loss": 0.2427, "step": 273075 }, { "epoch": 2.69, "grad_norm": 12.419595718383789, "learning_rate": 3.694345973964074e-06, "loss": 0.1098, "step": 273100 }, { "epoch": 2.69, "grad_norm": 3.3436596393585205, "learning_rate": 3.694221851509826e-06, "loss": 0.1667, "step": 273125 }, { "epoch": 2.69, "grad_norm": 13.342666625976562, "learning_rate": 3.6940977290555773e-06, "loss": 0.1068, "step": 273150 }, { "epoch": 2.69, "grad_norm": 3.981020927429199, "learning_rate": 3.693973606601329e-06, "loss": 0.2417, "step": 273175 }, { "epoch": 2.69, "grad_norm": 11.664223670959473, "learning_rate": 3.6938494841470805e-06, "loss": 0.1698, "step": 273200 }, { "epoch": 2.69, "grad_norm": 4.7818379402160645, "learning_rate": 3.693725361692832e-06, "loss": 0.2158, "step": 273225 }, { "epoch": 2.69, "grad_norm": 7.226295471191406, "learning_rate": 3.6936012392385834e-06, "loss": 0.1136, "step": 273250 }, { "epoch": 2.69, "grad_norm": 2.905287265777588, "learning_rate": 3.6934771167843354e-06, "loss": 0.2177, "step": 273275 }, { "epoch": 2.69, "grad_norm": 14.159616470336914, "learning_rate": 3.6933529943300867e-06, "loss": 0.2042, "step": 273300 }, { "epoch": 2.69, "grad_norm": 4.9975128173828125, "learning_rate": 3.693228871875838e-06, "loss": 0.2023, "step": 273325 }, { "epoch": 2.69, "grad_norm": 5.689822673797607, "learning_rate": 3.69310474942159e-06, "loss": 0.1128, "step": 273350 }, { "epoch": 2.69, "grad_norm": 0.1552303582429886, "learning_rate": 3.692980626967341e-06, "loss": 0.1867, "step": 273375 }, { "epoch": 2.69, "grad_norm": 12.18762493133545, "learning_rate": 3.6928565045130928e-06, "loss": 0.1761, "step": 273400 }, { "epoch": 2.69, "grad_norm": 7.488840579986572, "learning_rate": 3.6927323820588444e-06, "loss": 0.2179, "step": 273425 }, { "epoch": 2.69, "grad_norm": 15.814765930175781, "learning_rate": 3.692608259604596e-06, "loss": 0.1345, "step": 273450 }, { "epoch": 2.69, "grad_norm": 5.5568695068359375, "learning_rate": 3.6924841371503472e-06, "loss": 0.2215, "step": 273475 }, { "epoch": 2.69, "grad_norm": 8.650568008422852, "learning_rate": 3.6923600146960985e-06, "loss": 0.1397, "step": 273500 }, { "epoch": 2.69, "grad_norm": 1.8376933336257935, "learning_rate": 3.6922358922418505e-06, "loss": 0.2293, "step": 273525 }, { "epoch": 2.69, "grad_norm": 7.969248294830322, "learning_rate": 3.6921117697876017e-06, "loss": 0.0829, "step": 273550 }, { "epoch": 2.69, "grad_norm": 0.5956903696060181, "learning_rate": 3.6919876473333534e-06, "loss": 0.2444, "step": 273575 }, { "epoch": 2.69, "grad_norm": 8.424725532531738, "learning_rate": 3.691863524879105e-06, "loss": 0.0963, "step": 273600 }, { "epoch": 2.69, "grad_norm": 3.9350459575653076, "learning_rate": 3.6917394024248566e-06, "loss": 0.2514, "step": 273625 }, { "epoch": 2.69, "grad_norm": 13.57016372680664, "learning_rate": 3.691615279970608e-06, "loss": 0.1315, "step": 273650 }, { "epoch": 2.69, "grad_norm": 3.1767232418060303, "learning_rate": 3.69149115751636e-06, "loss": 0.2585, "step": 273675 }, { "epoch": 2.69, "grad_norm": 12.862263679504395, "learning_rate": 3.691367035062111e-06, "loss": 0.1386, "step": 273700 }, { "epoch": 2.69, "grad_norm": 7.622387886047363, "learning_rate": 3.6912429126078623e-06, "loss": 0.2311, "step": 273725 }, { "epoch": 2.69, "grad_norm": 17.47822380065918, "learning_rate": 3.6911187901536144e-06, "loss": 0.1256, "step": 273750 }, { "epoch": 2.69, "grad_norm": 3.6055526733398438, "learning_rate": 3.6909946676993656e-06, "loss": 0.216, "step": 273775 }, { "epoch": 2.69, "grad_norm": 11.656436920166016, "learning_rate": 3.6908705452451172e-06, "loss": 0.144, "step": 273800 }, { "epoch": 2.69, "grad_norm": 5.085608005523682, "learning_rate": 3.690746422790869e-06, "loss": 0.2132, "step": 273825 }, { "epoch": 2.69, "grad_norm": 12.367162704467773, "learning_rate": 3.6906223003366205e-06, "loss": 0.1178, "step": 273850 }, { "epoch": 2.69, "grad_norm": 3.938021421432495, "learning_rate": 3.6904981778823717e-06, "loss": 0.2211, "step": 273875 }, { "epoch": 2.69, "grad_norm": 14.370814323425293, "learning_rate": 3.6903740554281238e-06, "loss": 0.1339, "step": 273900 }, { "epoch": 2.69, "grad_norm": 0.13307444751262665, "learning_rate": 3.690249932973875e-06, "loss": 0.1977, "step": 273925 }, { "epoch": 2.69, "grad_norm": 17.41277503967285, "learning_rate": 3.690125810519626e-06, "loss": 0.1345, "step": 273950 }, { "epoch": 2.69, "grad_norm": 2.4171042442321777, "learning_rate": 3.6900016880653783e-06, "loss": 0.21, "step": 273975 }, { "epoch": 2.69, "grad_norm": 4.5374579429626465, "learning_rate": 3.6898775656111295e-06, "loss": 0.1119, "step": 274000 }, { "epoch": 2.69, "grad_norm": 3.290412664413452, "learning_rate": 3.689753443156881e-06, "loss": 0.2523, "step": 274025 }, { "epoch": 2.69, "grad_norm": 11.13371467590332, "learning_rate": 3.6896293207026327e-06, "loss": 0.1463, "step": 274050 }, { "epoch": 2.69, "grad_norm": 6.745367527008057, "learning_rate": 3.6895051982483844e-06, "loss": 0.2308, "step": 274075 }, { "epoch": 2.69, "grad_norm": 14.990610122680664, "learning_rate": 3.6893810757941356e-06, "loss": 0.1399, "step": 274100 }, { "epoch": 2.7, "grad_norm": 3.63069224357605, "learning_rate": 3.6892569533398876e-06, "loss": 0.2001, "step": 274125 }, { "epoch": 2.7, "grad_norm": 11.56670093536377, "learning_rate": 3.689132830885639e-06, "loss": 0.1279, "step": 274150 }, { "epoch": 2.7, "grad_norm": 2.768143892288208, "learning_rate": 3.68900870843139e-06, "loss": 0.2215, "step": 274175 }, { "epoch": 2.7, "grad_norm": 5.4364333152771, "learning_rate": 3.688884585977142e-06, "loss": 0.1226, "step": 274200 }, { "epoch": 2.7, "grad_norm": 9.821746826171875, "learning_rate": 3.6887604635228933e-06, "loss": 0.1923, "step": 274225 }, { "epoch": 2.7, "grad_norm": 12.695527076721191, "learning_rate": 3.688636341068645e-06, "loss": 0.1501, "step": 274250 }, { "epoch": 2.7, "grad_norm": 4.762502193450928, "learning_rate": 3.6885122186143966e-06, "loss": 0.2133, "step": 274275 }, { "epoch": 2.7, "grad_norm": 12.41254711151123, "learning_rate": 3.6883880961601482e-06, "loss": 0.1022, "step": 274300 }, { "epoch": 2.7, "grad_norm": 9.733417510986328, "learning_rate": 3.6882639737058995e-06, "loss": 0.1987, "step": 274325 }, { "epoch": 2.7, "grad_norm": 14.974756240844727, "learning_rate": 3.6881398512516507e-06, "loss": 0.1255, "step": 274350 }, { "epoch": 2.7, "grad_norm": 3.541966438293457, "learning_rate": 3.6880157287974027e-06, "loss": 0.2293, "step": 274375 }, { "epoch": 2.7, "grad_norm": 11.956031799316406, "learning_rate": 3.687891606343154e-06, "loss": 0.1147, "step": 274400 }, { "epoch": 2.7, "grad_norm": 8.887520790100098, "learning_rate": 3.6877674838889056e-06, "loss": 0.2133, "step": 274425 }, { "epoch": 2.7, "grad_norm": 9.233466148376465, "learning_rate": 3.687643361434657e-06, "loss": 0.1234, "step": 274450 }, { "epoch": 2.7, "grad_norm": 4.845803260803223, "learning_rate": 3.687519238980409e-06, "loss": 0.2441, "step": 274475 }, { "epoch": 2.7, "grad_norm": 12.175435066223145, "learning_rate": 3.68739511652616e-06, "loss": 0.1319, "step": 274500 }, { "epoch": 2.7, "grad_norm": 7.243373870849609, "learning_rate": 3.687270994071912e-06, "loss": 0.2143, "step": 274525 }, { "epoch": 2.7, "grad_norm": 15.25744342803955, "learning_rate": 3.6871468716176633e-06, "loss": 0.1234, "step": 274550 }, { "epoch": 2.7, "grad_norm": 0.46260032057762146, "learning_rate": 3.6870227491634145e-06, "loss": 0.1944, "step": 274575 }, { "epoch": 2.7, "grad_norm": 11.512055397033691, "learning_rate": 3.6868986267091666e-06, "loss": 0.1187, "step": 274600 }, { "epoch": 2.7, "grad_norm": 2.420053482055664, "learning_rate": 3.686774504254918e-06, "loss": 0.2243, "step": 274625 }, { "epoch": 2.7, "grad_norm": 10.038840293884277, "learning_rate": 3.6866503818006694e-06, "loss": 0.1309, "step": 274650 }, { "epoch": 2.7, "grad_norm": 3.9071688652038574, "learning_rate": 3.686526259346421e-06, "loss": 0.2194, "step": 274675 }, { "epoch": 2.7, "grad_norm": 16.210451126098633, "learning_rate": 3.6864021368921727e-06, "loss": 0.1136, "step": 274700 }, { "epoch": 2.7, "grad_norm": 2.7108025550842285, "learning_rate": 3.686278014437924e-06, "loss": 0.247, "step": 274725 }, { "epoch": 2.7, "grad_norm": 7.89874267578125, "learning_rate": 3.686153891983676e-06, "loss": 0.1071, "step": 274750 }, { "epoch": 2.7, "grad_norm": 38.92472839355469, "learning_rate": 3.686029769529427e-06, "loss": 0.2318, "step": 274775 }, { "epoch": 2.7, "grad_norm": 5.058573246002197, "learning_rate": 3.685905647075179e-06, "loss": 0.1222, "step": 274800 }, { "epoch": 2.7, "grad_norm": 5.663005828857422, "learning_rate": 3.6857815246209305e-06, "loss": 0.2317, "step": 274825 }, { "epoch": 2.7, "grad_norm": 12.371112823486328, "learning_rate": 3.685657402166682e-06, "loss": 0.17, "step": 274850 }, { "epoch": 2.7, "grad_norm": 7.119688510894775, "learning_rate": 3.6855332797124333e-06, "loss": 0.1906, "step": 274875 }, { "epoch": 2.7, "grad_norm": 6.287571430206299, "learning_rate": 3.6854091572581854e-06, "loss": 0.1293, "step": 274900 }, { "epoch": 2.7, "grad_norm": 9.304455757141113, "learning_rate": 3.6852850348039366e-06, "loss": 0.1638, "step": 274925 }, { "epoch": 2.7, "grad_norm": 13.539443016052246, "learning_rate": 3.6851609123496878e-06, "loss": 0.1683, "step": 274950 }, { "epoch": 2.7, "grad_norm": 4.4739155769348145, "learning_rate": 3.68503678989544e-06, "loss": 0.1904, "step": 274975 }, { "epoch": 2.7, "grad_norm": 10.124593734741211, "learning_rate": 3.684912667441191e-06, "loss": 0.1614, "step": 275000 }, { "epoch": 2.7, "grad_norm": 6.2078399658203125, "learning_rate": 3.6847885449869427e-06, "loss": 0.2454, "step": 275025 }, { "epoch": 2.7, "grad_norm": 13.638301849365234, "learning_rate": 3.6846644225326943e-06, "loss": 0.1441, "step": 275050 }, { "epoch": 2.7, "grad_norm": 2.927046775817871, "learning_rate": 3.684540300078446e-06, "loss": 0.197, "step": 275075 }, { "epoch": 2.7, "grad_norm": 9.707730293273926, "learning_rate": 3.684416177624197e-06, "loss": 0.1112, "step": 275100 }, { "epoch": 2.71, "grad_norm": 4.373705863952637, "learning_rate": 3.6842920551699492e-06, "loss": 0.2888, "step": 275125 }, { "epoch": 2.71, "grad_norm": 20.492645263671875, "learning_rate": 3.6841679327157004e-06, "loss": 0.1506, "step": 275150 }, { "epoch": 2.71, "grad_norm": 3.133789539337158, "learning_rate": 3.6840487751596215e-06, "loss": 0.2189, "step": 275175 }, { "epoch": 2.71, "grad_norm": 9.62082290649414, "learning_rate": 3.6839246527053735e-06, "loss": 0.1459, "step": 275200 }, { "epoch": 2.71, "grad_norm": 5.545595169067383, "learning_rate": 3.6838005302511247e-06, "loss": 0.2061, "step": 275225 }, { "epoch": 2.71, "grad_norm": 10.003279685974121, "learning_rate": 3.6836764077968764e-06, "loss": 0.1399, "step": 275250 }, { "epoch": 2.71, "grad_norm": 4.415332794189453, "learning_rate": 3.683552285342628e-06, "loss": 0.2155, "step": 275275 }, { "epoch": 2.71, "grad_norm": 29.59264373779297, "learning_rate": 3.6834281628883796e-06, "loss": 0.1954, "step": 275300 }, { "epoch": 2.71, "grad_norm": 3.506279706954956, "learning_rate": 3.683304040434131e-06, "loss": 0.2464, "step": 275325 }, { "epoch": 2.71, "grad_norm": 13.154886245727539, "learning_rate": 3.683179917979883e-06, "loss": 0.1589, "step": 275350 }, { "epoch": 2.71, "grad_norm": 2.625450372695923, "learning_rate": 3.683055795525634e-06, "loss": 0.2735, "step": 275375 }, { "epoch": 2.71, "grad_norm": 6.809314250946045, "learning_rate": 3.6829316730713853e-06, "loss": 0.0977, "step": 275400 }, { "epoch": 2.71, "grad_norm": 4.726353645324707, "learning_rate": 3.6828075506171374e-06, "loss": 0.2059, "step": 275425 }, { "epoch": 2.71, "grad_norm": 10.269765853881836, "learning_rate": 3.6826834281628886e-06, "loss": 0.1151, "step": 275450 }, { "epoch": 2.71, "grad_norm": 8.176592826843262, "learning_rate": 3.6825593057086402e-06, "loss": 0.2523, "step": 275475 }, { "epoch": 2.71, "grad_norm": 15.046248435974121, "learning_rate": 3.682435183254392e-06, "loss": 0.1255, "step": 275500 }, { "epoch": 2.71, "grad_norm": 2.491044521331787, "learning_rate": 3.6823110608001435e-06, "loss": 0.2071, "step": 275525 }, { "epoch": 2.71, "grad_norm": 10.87311840057373, "learning_rate": 3.6821869383458947e-06, "loss": 0.1138, "step": 275550 }, { "epoch": 2.71, "grad_norm": 6.260429382324219, "learning_rate": 3.6820628158916468e-06, "loss": 0.2323, "step": 275575 }, { "epoch": 2.71, "grad_norm": 8.87090015411377, "learning_rate": 3.681938693437398e-06, "loss": 0.1561, "step": 275600 }, { "epoch": 2.71, "grad_norm": 4.143669128417969, "learning_rate": 3.681814570983149e-06, "loss": 0.2158, "step": 275625 }, { "epoch": 2.71, "grad_norm": 7.127479553222656, "learning_rate": 3.6816904485289013e-06, "loss": 0.1104, "step": 275650 }, { "epoch": 2.71, "grad_norm": 6.136851787567139, "learning_rate": 3.6815663260746525e-06, "loss": 0.1816, "step": 275675 }, { "epoch": 2.71, "grad_norm": 9.270650863647461, "learning_rate": 3.681442203620404e-06, "loss": 0.1522, "step": 275700 }, { "epoch": 2.71, "grad_norm": 3.3010153770446777, "learning_rate": 3.6813180811661557e-06, "loss": 0.2403, "step": 275725 }, { "epoch": 2.71, "grad_norm": 14.39452838897705, "learning_rate": 3.6811939587119074e-06, "loss": 0.149, "step": 275750 }, { "epoch": 2.71, "grad_norm": 6.830204963684082, "learning_rate": 3.6810698362576586e-06, "loss": 0.2069, "step": 275775 }, { "epoch": 2.71, "grad_norm": 9.829949378967285, "learning_rate": 3.68094571380341e-06, "loss": 0.122, "step": 275800 }, { "epoch": 2.71, "grad_norm": 3.807683229446411, "learning_rate": 3.680821591349162e-06, "loss": 0.1748, "step": 275825 }, { "epoch": 2.71, "grad_norm": 12.70596981048584, "learning_rate": 3.680697468894913e-06, "loss": 0.1444, "step": 275850 }, { "epoch": 2.71, "grad_norm": 3.4389145374298096, "learning_rate": 3.6805733464406647e-06, "loss": 0.2464, "step": 275875 }, { "epoch": 2.71, "grad_norm": 7.261146068572998, "learning_rate": 3.6804492239864163e-06, "loss": 0.1308, "step": 275900 }, { "epoch": 2.71, "grad_norm": 5.854070663452148, "learning_rate": 3.680325101532168e-06, "loss": 0.243, "step": 275925 }, { "epoch": 2.71, "grad_norm": 5.5608110427856445, "learning_rate": 3.680200979077919e-06, "loss": 0.1057, "step": 275950 }, { "epoch": 2.71, "grad_norm": 2.1580116748809814, "learning_rate": 3.6800768566236712e-06, "loss": 0.2159, "step": 275975 }, { "epoch": 2.71, "grad_norm": 7.622768402099609, "learning_rate": 3.6799527341694225e-06, "loss": 0.1388, "step": 276000 }, { "epoch": 2.71, "grad_norm": 5.412698745727539, "learning_rate": 3.6798286117151737e-06, "loss": 0.2092, "step": 276025 }, { "epoch": 2.71, "grad_norm": 13.75460147857666, "learning_rate": 3.6797044892609257e-06, "loss": 0.1242, "step": 276050 }, { "epoch": 2.71, "grad_norm": 4.86389684677124, "learning_rate": 3.679580366806677e-06, "loss": 0.2112, "step": 276075 }, { "epoch": 2.71, "grad_norm": 11.444056510925293, "learning_rate": 3.6794562443524286e-06, "loss": 0.1177, "step": 276100 }, { "epoch": 2.71, "grad_norm": 0.7776277661323547, "learning_rate": 3.67933212189818e-06, "loss": 0.2012, "step": 276125 }, { "epoch": 2.72, "grad_norm": 19.555156707763672, "learning_rate": 3.679207999443932e-06, "loss": 0.1853, "step": 276150 }, { "epoch": 2.72, "grad_norm": 5.507565498352051, "learning_rate": 3.679083876989683e-06, "loss": 0.221, "step": 276175 }, { "epoch": 2.72, "grad_norm": 13.34722900390625, "learning_rate": 3.678959754535435e-06, "loss": 0.1597, "step": 276200 }, { "epoch": 2.72, "grad_norm": 3.835749387741089, "learning_rate": 3.6788356320811863e-06, "loss": 0.2334, "step": 276225 }, { "epoch": 2.72, "grad_norm": 15.423036575317383, "learning_rate": 3.6787115096269375e-06, "loss": 0.147, "step": 276250 }, { "epoch": 2.72, "grad_norm": 6.2578840255737305, "learning_rate": 3.6785873871726896e-06, "loss": 0.2145, "step": 276275 }, { "epoch": 2.72, "grad_norm": 9.922979354858398, "learning_rate": 3.678463264718441e-06, "loss": 0.1347, "step": 276300 }, { "epoch": 2.72, "grad_norm": 5.939695358276367, "learning_rate": 3.6783391422641924e-06, "loss": 0.2382, "step": 276325 }, { "epoch": 2.72, "grad_norm": 12.8628568649292, "learning_rate": 3.678215019809944e-06, "loss": 0.1306, "step": 276350 }, { "epoch": 2.72, "grad_norm": 7.986391544342041, "learning_rate": 3.6780908973556957e-06, "loss": 0.2005, "step": 276375 }, { "epoch": 2.72, "grad_norm": 14.532833099365234, "learning_rate": 3.677966774901447e-06, "loss": 0.1452, "step": 276400 }, { "epoch": 2.72, "grad_norm": 5.229794979095459, "learning_rate": 3.677842652447199e-06, "loss": 0.2421, "step": 276425 }, { "epoch": 2.72, "grad_norm": 22.693208694458008, "learning_rate": 3.67771852999295e-06, "loss": 0.1409, "step": 276450 }, { "epoch": 2.72, "grad_norm": 16.301164627075195, "learning_rate": 3.6775944075387014e-06, "loss": 0.2379, "step": 276475 }, { "epoch": 2.72, "grad_norm": 15.52479362487793, "learning_rate": 3.6774702850844535e-06, "loss": 0.1626, "step": 276500 }, { "epoch": 2.72, "grad_norm": 5.2674665451049805, "learning_rate": 3.6773461626302047e-06, "loss": 0.1884, "step": 276525 }, { "epoch": 2.72, "grad_norm": 9.363718032836914, "learning_rate": 3.6772220401759563e-06, "loss": 0.1383, "step": 276550 }, { "epoch": 2.72, "grad_norm": 3.9148943424224854, "learning_rate": 3.677097917721708e-06, "loss": 0.2321, "step": 276575 }, { "epoch": 2.72, "grad_norm": 9.27212142944336, "learning_rate": 3.6769737952674596e-06, "loss": 0.1274, "step": 276600 }, { "epoch": 2.72, "grad_norm": 5.460045337677002, "learning_rate": 3.676849672813211e-06, "loss": 0.2322, "step": 276625 }, { "epoch": 2.72, "grad_norm": 4.376732349395752, "learning_rate": 3.676725550358962e-06, "loss": 0.0922, "step": 276650 }, { "epoch": 2.72, "grad_norm": 6.602365970611572, "learning_rate": 3.676601427904714e-06, "loss": 0.24, "step": 276675 }, { "epoch": 2.72, "grad_norm": 14.118924140930176, "learning_rate": 3.6764773054504653e-06, "loss": 0.152, "step": 276700 }, { "epoch": 2.72, "grad_norm": 8.91157341003418, "learning_rate": 3.676353182996217e-06, "loss": 0.2628, "step": 276725 }, { "epoch": 2.72, "grad_norm": 10.633910179138184, "learning_rate": 3.6762290605419685e-06, "loss": 0.1393, "step": 276750 }, { "epoch": 2.72, "grad_norm": 4.678252220153809, "learning_rate": 3.67610493808772e-06, "loss": 0.1905, "step": 276775 }, { "epoch": 2.72, "grad_norm": 13.25390911102295, "learning_rate": 3.6759808156334714e-06, "loss": 0.1386, "step": 276800 }, { "epoch": 2.72, "grad_norm": 4.118924617767334, "learning_rate": 3.6758566931792234e-06, "loss": 0.2172, "step": 276825 }, { "epoch": 2.72, "grad_norm": 13.150505065917969, "learning_rate": 3.6757325707249747e-06, "loss": 0.1183, "step": 276850 }, { "epoch": 2.72, "grad_norm": 4.575206279754639, "learning_rate": 3.675608448270726e-06, "loss": 0.2506, "step": 276875 }, { "epoch": 2.72, "grad_norm": 10.804388046264648, "learning_rate": 3.675484325816478e-06, "loss": 0.1076, "step": 276900 }, { "epoch": 2.72, "grad_norm": 7.804728031158447, "learning_rate": 3.675360203362229e-06, "loss": 0.2159, "step": 276925 }, { "epoch": 2.72, "grad_norm": 15.88430118560791, "learning_rate": 3.6752360809079808e-06, "loss": 0.1507, "step": 276950 }, { "epoch": 2.72, "grad_norm": 3.5538933277130127, "learning_rate": 3.6751119584537324e-06, "loss": 0.1984, "step": 276975 }, { "epoch": 2.72, "grad_norm": 11.178086280822754, "learning_rate": 3.674987835999484e-06, "loss": 0.11, "step": 277000 }, { "epoch": 2.72, "grad_norm": 3.4576029777526855, "learning_rate": 3.6748637135452353e-06, "loss": 0.1972, "step": 277025 }, { "epoch": 2.72, "grad_norm": 8.697070121765137, "learning_rate": 3.6747395910909873e-06, "loss": 0.1123, "step": 277050 }, { "epoch": 2.72, "grad_norm": 7.223643779754639, "learning_rate": 3.6746154686367385e-06, "loss": 0.2119, "step": 277075 }, { "epoch": 2.72, "grad_norm": 9.597511291503906, "learning_rate": 3.6744913461824897e-06, "loss": 0.1283, "step": 277100 }, { "epoch": 2.72, "grad_norm": 5.098857879638672, "learning_rate": 3.674367223728242e-06, "loss": 0.2472, "step": 277125 }, { "epoch": 2.72, "grad_norm": 14.1552095413208, "learning_rate": 3.674243101273993e-06, "loss": 0.1234, "step": 277150 }, { "epoch": 2.73, "grad_norm": 1.8570016622543335, "learning_rate": 3.674123943717915e-06, "loss": 0.2003, "step": 277175 }, { "epoch": 2.73, "grad_norm": 8.44602108001709, "learning_rate": 3.673999821263666e-06, "loss": 0.1587, "step": 277200 }, { "epoch": 2.73, "grad_norm": 1.746260404586792, "learning_rate": 3.6738756988094177e-06, "loss": 0.1944, "step": 277225 }, { "epoch": 2.73, "grad_norm": 10.402971267700195, "learning_rate": 3.673751576355169e-06, "loss": 0.1411, "step": 277250 }, { "epoch": 2.73, "grad_norm": 6.144651889801025, "learning_rate": 3.673627453900921e-06, "loss": 0.2176, "step": 277275 }, { "epoch": 2.73, "grad_norm": 9.513668060302734, "learning_rate": 3.673503331446672e-06, "loss": 0.1222, "step": 277300 }, { "epoch": 2.73, "grad_norm": 6.803610324859619, "learning_rate": 3.6733792089924234e-06, "loss": 0.1936, "step": 277325 }, { "epoch": 2.73, "grad_norm": 14.659972190856934, "learning_rate": 3.6732550865381755e-06, "loss": 0.123, "step": 277350 }, { "epoch": 2.73, "grad_norm": 6.624017238616943, "learning_rate": 3.6731309640839267e-06, "loss": 0.2248, "step": 277375 }, { "epoch": 2.73, "grad_norm": 9.049031257629395, "learning_rate": 3.6730068416296783e-06, "loss": 0.106, "step": 277400 }, { "epoch": 2.73, "grad_norm": 5.895029544830322, "learning_rate": 3.67288271917543e-06, "loss": 0.2363, "step": 277425 }, { "epoch": 2.73, "grad_norm": 13.295744895935059, "learning_rate": 3.6727585967211816e-06, "loss": 0.1292, "step": 277450 }, { "epoch": 2.73, "grad_norm": 3.0329761505126953, "learning_rate": 3.672634474266933e-06, "loss": 0.2345, "step": 277475 }, { "epoch": 2.73, "grad_norm": 10.516045570373535, "learning_rate": 3.672510351812685e-06, "loss": 0.1208, "step": 277500 }, { "epoch": 2.73, "grad_norm": 5.082508087158203, "learning_rate": 3.672386229358436e-06, "loss": 0.2, "step": 277525 }, { "epoch": 2.73, "grad_norm": 9.060900688171387, "learning_rate": 3.6722621069041873e-06, "loss": 0.1422, "step": 277550 }, { "epoch": 2.73, "grad_norm": 8.605733871459961, "learning_rate": 3.6721379844499393e-06, "loss": 0.2324, "step": 277575 }, { "epoch": 2.73, "grad_norm": 5.959202766418457, "learning_rate": 3.6720138619956906e-06, "loss": 0.1488, "step": 277600 }, { "epoch": 2.73, "grad_norm": 11.31299114227295, "learning_rate": 3.671889739541442e-06, "loss": 0.2616, "step": 277625 }, { "epoch": 2.73, "grad_norm": 14.761855125427246, "learning_rate": 3.671765617087194e-06, "loss": 0.1461, "step": 277650 }, { "epoch": 2.73, "grad_norm": 6.9127516746521, "learning_rate": 3.6716414946329455e-06, "loss": 0.1978, "step": 277675 }, { "epoch": 2.73, "grad_norm": 12.220741271972656, "learning_rate": 3.6715173721786967e-06, "loss": 0.126, "step": 277700 }, { "epoch": 2.73, "grad_norm": 6.204025745391846, "learning_rate": 3.6713932497244487e-06, "loss": 0.2563, "step": 277725 }, { "epoch": 2.73, "grad_norm": 11.741707801818848, "learning_rate": 3.6712691272702e-06, "loss": 0.1428, "step": 277750 }, { "epoch": 2.73, "grad_norm": 6.370670318603516, "learning_rate": 3.6711450048159516e-06, "loss": 0.2379, "step": 277775 }, { "epoch": 2.73, "grad_norm": 14.566912651062012, "learning_rate": 3.6710208823617032e-06, "loss": 0.1288, "step": 277800 }, { "epoch": 2.73, "grad_norm": 4.820865154266357, "learning_rate": 3.670896759907455e-06, "loss": 0.2271, "step": 277825 }, { "epoch": 2.73, "grad_norm": 12.651788711547852, "learning_rate": 3.670772637453206e-06, "loss": 0.1511, "step": 277850 }, { "epoch": 2.73, "grad_norm": 4.110888957977295, "learning_rate": 3.670648514998958e-06, "loss": 0.237, "step": 277875 }, { "epoch": 2.73, "grad_norm": 14.176527976989746, "learning_rate": 3.6705243925447093e-06, "loss": 0.1412, "step": 277900 }, { "epoch": 2.73, "grad_norm": 3.9586877822875977, "learning_rate": 3.6704002700904605e-06, "loss": 0.2166, "step": 277925 }, { "epoch": 2.73, "grad_norm": 8.124967575073242, "learning_rate": 3.6702761476362126e-06, "loss": 0.121, "step": 277950 }, { "epoch": 2.73, "grad_norm": 3.034282684326172, "learning_rate": 3.670152025181964e-06, "loss": 0.2003, "step": 277975 }, { "epoch": 2.73, "grad_norm": 13.679404258728027, "learning_rate": 3.6700279027277154e-06, "loss": 0.1392, "step": 278000 }, { "epoch": 2.73, "grad_norm": 5.146483421325684, "learning_rate": 3.669903780273467e-06, "loss": 0.2099, "step": 278025 }, { "epoch": 2.73, "grad_norm": 14.011789321899414, "learning_rate": 3.6697796578192187e-06, "loss": 0.1248, "step": 278050 }, { "epoch": 2.73, "grad_norm": 4.6525139808654785, "learning_rate": 3.66965553536497e-06, "loss": 0.2392, "step": 278075 }, { "epoch": 2.73, "grad_norm": 9.932416915893555, "learning_rate": 3.669531412910721e-06, "loss": 0.1202, "step": 278100 }, { "epoch": 2.73, "grad_norm": 3.273721933364868, "learning_rate": 3.669407290456473e-06, "loss": 0.1974, "step": 278125 }, { "epoch": 2.73, "grad_norm": 15.247907638549805, "learning_rate": 3.6692831680022244e-06, "loss": 0.1382, "step": 278150 }, { "epoch": 2.74, "grad_norm": 4.3753814697265625, "learning_rate": 3.669159045547976e-06, "loss": 0.2134, "step": 278175 }, { "epoch": 2.74, "grad_norm": 10.36819076538086, "learning_rate": 3.6690349230937277e-06, "loss": 0.1064, "step": 278200 }, { "epoch": 2.74, "grad_norm": 10.663818359375, "learning_rate": 3.6689108006394793e-06, "loss": 0.2287, "step": 278225 }, { "epoch": 2.74, "grad_norm": 10.002961158752441, "learning_rate": 3.6687866781852305e-06, "loss": 0.1224, "step": 278250 }, { "epoch": 2.74, "grad_norm": 4.323837757110596, "learning_rate": 3.6686625557309826e-06, "loss": 0.2316, "step": 278275 }, { "epoch": 2.74, "grad_norm": 9.244914054870605, "learning_rate": 3.668538433276734e-06, "loss": 0.1225, "step": 278300 }, { "epoch": 2.74, "grad_norm": 6.813148021697998, "learning_rate": 3.668414310822485e-06, "loss": 0.2184, "step": 278325 }, { "epoch": 2.74, "grad_norm": 15.265390396118164, "learning_rate": 3.668290188368237e-06, "loss": 0.1821, "step": 278350 }, { "epoch": 2.74, "grad_norm": 3.294539451599121, "learning_rate": 3.6681660659139883e-06, "loss": 0.2071, "step": 278375 }, { "epoch": 2.74, "grad_norm": 13.423832893371582, "learning_rate": 3.66804194345974e-06, "loss": 0.1071, "step": 278400 }, { "epoch": 2.74, "grad_norm": 3.5410690307617188, "learning_rate": 3.6679178210054915e-06, "loss": 0.2071, "step": 278425 }, { "epoch": 2.74, "grad_norm": 7.797341346740723, "learning_rate": 3.667793698551243e-06, "loss": 0.13, "step": 278450 }, { "epoch": 2.74, "grad_norm": 3.8789615631103516, "learning_rate": 3.6676695760969944e-06, "loss": 0.2556, "step": 278475 }, { "epoch": 2.74, "grad_norm": 14.753374099731445, "learning_rate": 3.6675454536427465e-06, "loss": 0.1534, "step": 278500 }, { "epoch": 2.74, "grad_norm": 2.348883867263794, "learning_rate": 3.6674213311884977e-06, "loss": 0.1809, "step": 278525 }, { "epoch": 2.74, "grad_norm": 8.85682487487793, "learning_rate": 3.667297208734249e-06, "loss": 0.1336, "step": 278550 }, { "epoch": 2.74, "grad_norm": 1.1916131973266602, "learning_rate": 3.667173086280001e-06, "loss": 0.1944, "step": 278575 }, { "epoch": 2.74, "grad_norm": 12.914254188537598, "learning_rate": 3.667048963825752e-06, "loss": 0.1514, "step": 278600 }, { "epoch": 2.74, "grad_norm": 4.874932289123535, "learning_rate": 3.6669248413715038e-06, "loss": 0.2178, "step": 278625 }, { "epoch": 2.74, "grad_norm": 12.989602088928223, "learning_rate": 3.6668007189172554e-06, "loss": 0.1364, "step": 278650 }, { "epoch": 2.74, "grad_norm": 3.4532084465026855, "learning_rate": 3.666676596463007e-06, "loss": 0.2387, "step": 278675 }, { "epoch": 2.74, "grad_norm": 14.755705833435059, "learning_rate": 3.6665524740087583e-06, "loss": 0.1414, "step": 278700 }, { "epoch": 2.74, "grad_norm": 5.329434871673584, "learning_rate": 3.6664283515545103e-06, "loss": 0.1825, "step": 278725 }, { "epoch": 2.74, "grad_norm": 11.876904487609863, "learning_rate": 3.6663042291002615e-06, "loss": 0.127, "step": 278750 }, { "epoch": 2.74, "grad_norm": 5.478477478027344, "learning_rate": 3.6661801066460127e-06, "loss": 0.1942, "step": 278775 }, { "epoch": 2.74, "grad_norm": 14.533363342285156, "learning_rate": 3.666055984191765e-06, "loss": 0.1208, "step": 278800 }, { "epoch": 2.74, "grad_norm": 4.514535903930664, "learning_rate": 3.665931861737516e-06, "loss": 0.238, "step": 278825 }, { "epoch": 2.74, "grad_norm": 8.217733383178711, "learning_rate": 3.6658077392832676e-06, "loss": 0.1049, "step": 278850 }, { "epoch": 2.74, "grad_norm": 2.5230376720428467, "learning_rate": 3.6656836168290193e-06, "loss": 0.2785, "step": 278875 }, { "epoch": 2.74, "grad_norm": 8.865294456481934, "learning_rate": 3.665559494374771e-06, "loss": 0.1275, "step": 278900 }, { "epoch": 2.74, "grad_norm": 2.4595983028411865, "learning_rate": 3.665435371920522e-06, "loss": 0.1881, "step": 278925 }, { "epoch": 2.74, "grad_norm": 4.937795639038086, "learning_rate": 3.6653112494662733e-06, "loss": 0.1139, "step": 278950 }, { "epoch": 2.74, "grad_norm": 4.6469807624816895, "learning_rate": 3.6651871270120254e-06, "loss": 0.2102, "step": 278975 }, { "epoch": 2.74, "grad_norm": 11.350931167602539, "learning_rate": 3.6650630045577766e-06, "loss": 0.1724, "step": 279000 }, { "epoch": 2.74, "grad_norm": 6.355751991271973, "learning_rate": 3.6649388821035282e-06, "loss": 0.1933, "step": 279025 }, { "epoch": 2.74, "grad_norm": 11.025232315063477, "learning_rate": 3.66481475964928e-06, "loss": 0.1297, "step": 279050 }, { "epoch": 2.74, "grad_norm": 6.773374080657959, "learning_rate": 3.6646906371950315e-06, "loss": 0.2049, "step": 279075 }, { "epoch": 2.74, "grad_norm": 7.940882682800293, "learning_rate": 3.6645665147407827e-06, "loss": 0.1539, "step": 279100 }, { "epoch": 2.74, "grad_norm": 2.6477408409118652, "learning_rate": 3.6644423922865348e-06, "loss": 0.2238, "step": 279125 }, { "epoch": 2.74, "grad_norm": 17.921932220458984, "learning_rate": 3.664318269832286e-06, "loss": 0.1393, "step": 279150 }, { "epoch": 2.74, "grad_norm": 3.804824113845825, "learning_rate": 3.664194147378037e-06, "loss": 0.1759, "step": 279175 }, { "epoch": 2.75, "grad_norm": 12.392290115356445, "learning_rate": 3.6640700249237893e-06, "loss": 0.131, "step": 279200 }, { "epoch": 2.75, "grad_norm": 5.9213032722473145, "learning_rate": 3.6639508673677103e-06, "loss": 0.2298, "step": 279225 }, { "epoch": 2.75, "grad_norm": 16.358478546142578, "learning_rate": 3.6638267449134624e-06, "loss": 0.1237, "step": 279250 }, { "epoch": 2.75, "grad_norm": 1.7328972816467285, "learning_rate": 3.6637026224592136e-06, "loss": 0.2103, "step": 279275 }, { "epoch": 2.75, "grad_norm": 18.386260986328125, "learning_rate": 3.663578500004965e-06, "loss": 0.1679, "step": 279300 }, { "epoch": 2.75, "grad_norm": 3.342925548553467, "learning_rate": 3.663454377550717e-06, "loss": 0.1914, "step": 279325 }, { "epoch": 2.75, "grad_norm": 12.952763557434082, "learning_rate": 3.6633302550964685e-06, "loss": 0.1187, "step": 279350 }, { "epoch": 2.75, "grad_norm": 2.0360472202301025, "learning_rate": 3.6632061326422197e-06, "loss": 0.1994, "step": 279375 }, { "epoch": 2.75, "grad_norm": 12.065855026245117, "learning_rate": 3.6630820101879717e-06, "loss": 0.1197, "step": 279400 }, { "epoch": 2.75, "grad_norm": 5.517395496368408, "learning_rate": 3.662957887733723e-06, "loss": 0.2369, "step": 279425 }, { "epoch": 2.75, "grad_norm": 12.025093078613281, "learning_rate": 3.662833765279474e-06, "loss": 0.1378, "step": 279450 }, { "epoch": 2.75, "grad_norm": 4.5131354331970215, "learning_rate": 3.6627096428252262e-06, "loss": 0.2186, "step": 279475 }, { "epoch": 2.75, "grad_norm": 11.760538101196289, "learning_rate": 3.6625855203709774e-06, "loss": 0.1163, "step": 279500 }, { "epoch": 2.75, "grad_norm": 7.006706714630127, "learning_rate": 3.662461397916729e-06, "loss": 0.1752, "step": 279525 }, { "epoch": 2.75, "grad_norm": 9.097673416137695, "learning_rate": 3.6623372754624803e-06, "loss": 0.1313, "step": 279550 }, { "epoch": 2.75, "grad_norm": 3.5367300510406494, "learning_rate": 3.6622131530082323e-06, "loss": 0.1711, "step": 279575 }, { "epoch": 2.75, "grad_norm": 13.82163143157959, "learning_rate": 3.6620890305539835e-06, "loss": 0.1429, "step": 279600 }, { "epoch": 2.75, "grad_norm": 3.8794922828674316, "learning_rate": 3.6619649080997348e-06, "loss": 0.2058, "step": 279625 }, { "epoch": 2.75, "grad_norm": 24.25577735900879, "learning_rate": 3.661840785645487e-06, "loss": 0.1305, "step": 279650 }, { "epoch": 2.75, "grad_norm": 3.3851165771484375, "learning_rate": 3.661716663191238e-06, "loss": 0.2265, "step": 279675 }, { "epoch": 2.75, "grad_norm": 13.074548721313477, "learning_rate": 3.6615925407369897e-06, "loss": 0.1432, "step": 279700 }, { "epoch": 2.75, "grad_norm": 4.924089431762695, "learning_rate": 3.6614684182827413e-06, "loss": 0.2377, "step": 279725 }, { "epoch": 2.75, "grad_norm": 6.957344055175781, "learning_rate": 3.661344295828493e-06, "loss": 0.1335, "step": 279750 }, { "epoch": 2.75, "grad_norm": 4.973004341125488, "learning_rate": 3.661220173374244e-06, "loss": 0.208, "step": 279775 }, { "epoch": 2.75, "grad_norm": 17.37660789489746, "learning_rate": 3.661096050919996e-06, "loss": 0.1133, "step": 279800 }, { "epoch": 2.75, "grad_norm": 1.5175528526306152, "learning_rate": 3.6609719284657474e-06, "loss": 0.2009, "step": 279825 }, { "epoch": 2.75, "grad_norm": 8.96983814239502, "learning_rate": 3.6608478060114986e-06, "loss": 0.1322, "step": 279850 }, { "epoch": 2.75, "grad_norm": 5.784505844116211, "learning_rate": 3.6607236835572507e-06, "loss": 0.2394, "step": 279875 }, { "epoch": 2.75, "grad_norm": 15.752903938293457, "learning_rate": 3.660599561103002e-06, "loss": 0.1313, "step": 279900 }, { "epoch": 2.75, "grad_norm": 4.707602500915527, "learning_rate": 3.6604754386487535e-06, "loss": 0.2066, "step": 279925 }, { "epoch": 2.75, "grad_norm": 6.832334041595459, "learning_rate": 3.660351316194505e-06, "loss": 0.1088, "step": 279950 }, { "epoch": 2.75, "grad_norm": 2.7863075733184814, "learning_rate": 3.660227193740257e-06, "loss": 0.2142, "step": 279975 }, { "epoch": 2.75, "grad_norm": 11.241717338562012, "learning_rate": 3.660103071286008e-06, "loss": 0.1383, "step": 280000 }, { "epoch": 2.75, "eval_loss": 0.5914446115493774, "eval_runtime": 6043.9859, "eval_samples_per_second": 1.566, "eval_steps_per_second": 0.196, "eval_wer": 0.12125796690916114, "step": 280000 }, { "epoch": 2.75, "grad_norm": 1.3952761888504028, "learning_rate": 3.65997894883176e-06, "loss": 0.2256, "step": 280025 }, { "epoch": 2.75, "grad_norm": 10.772749900817871, "learning_rate": 3.6598548263775113e-06, "loss": 0.1338, "step": 280050 }, { "epoch": 2.75, "grad_norm": 5.5389204025268555, "learning_rate": 3.6597307039232625e-06, "loss": 0.2215, "step": 280075 }, { "epoch": 2.75, "grad_norm": 14.221858024597168, "learning_rate": 3.6596065814690146e-06, "loss": 0.101, "step": 280100 }, { "epoch": 2.75, "grad_norm": 4.92519998550415, "learning_rate": 3.6594824590147658e-06, "loss": 0.1785, "step": 280125 }, { "epoch": 2.75, "grad_norm": 8.418288230895996, "learning_rate": 3.6593583365605174e-06, "loss": 0.1401, "step": 280150 }, { "epoch": 2.75, "grad_norm": 3.9093191623687744, "learning_rate": 3.659234214106269e-06, "loss": 0.2326, "step": 280175 }, { "epoch": 2.75, "grad_norm": 14.920296669006348, "learning_rate": 3.6591100916520207e-06, "loss": 0.1199, "step": 280200 }, { "epoch": 2.76, "grad_norm": 9.74343490600586, "learning_rate": 3.658985969197772e-06, "loss": 0.2143, "step": 280225 }, { "epoch": 2.76, "grad_norm": 13.59263801574707, "learning_rate": 3.658861846743524e-06, "loss": 0.1033, "step": 280250 }, { "epoch": 2.76, "grad_norm": 4.284630298614502, "learning_rate": 3.658737724289275e-06, "loss": 0.1971, "step": 280275 }, { "epoch": 2.76, "grad_norm": 10.050153732299805, "learning_rate": 3.6586136018350264e-06, "loss": 0.1621, "step": 280300 }, { "epoch": 2.76, "grad_norm": 5.7413458824157715, "learning_rate": 3.6584894793807784e-06, "loss": 0.2142, "step": 280325 }, { "epoch": 2.76, "grad_norm": 8.464579582214355, "learning_rate": 3.6583653569265296e-06, "loss": 0.172, "step": 280350 }, { "epoch": 2.76, "grad_norm": 3.303565263748169, "learning_rate": 3.6582412344722813e-06, "loss": 0.2104, "step": 280375 }, { "epoch": 2.76, "grad_norm": 12.771162986755371, "learning_rate": 3.6581171120180325e-06, "loss": 0.0915, "step": 280400 }, { "epoch": 2.76, "grad_norm": 3.8470427989959717, "learning_rate": 3.6579929895637845e-06, "loss": 0.2122, "step": 280425 }, { "epoch": 2.76, "grad_norm": 8.532930374145508, "learning_rate": 3.6578688671095357e-06, "loss": 0.14, "step": 280450 }, { "epoch": 2.76, "grad_norm": 2.8496954441070557, "learning_rate": 3.657744744655287e-06, "loss": 0.2172, "step": 280475 }, { "epoch": 2.76, "grad_norm": 6.472612380981445, "learning_rate": 3.657620622201039e-06, "loss": 0.1074, "step": 280500 }, { "epoch": 2.76, "grad_norm": 7.421526908874512, "learning_rate": 3.6574964997467902e-06, "loss": 0.2127, "step": 280525 }, { "epoch": 2.76, "grad_norm": 5.397661209106445, "learning_rate": 3.657372377292542e-06, "loss": 0.1413, "step": 280550 }, { "epoch": 2.76, "grad_norm": 6.260892391204834, "learning_rate": 3.6572482548382935e-06, "loss": 0.2181, "step": 280575 }, { "epoch": 2.76, "grad_norm": 16.535051345825195, "learning_rate": 3.657124132384045e-06, "loss": 0.1526, "step": 280600 }, { "epoch": 2.76, "grad_norm": 2.298795461654663, "learning_rate": 3.6570000099297963e-06, "loss": 0.2334, "step": 280625 }, { "epoch": 2.76, "grad_norm": 13.043130874633789, "learning_rate": 3.6568758874755484e-06, "loss": 0.1618, "step": 280650 }, { "epoch": 2.76, "grad_norm": 6.547994136810303, "learning_rate": 3.6567517650212996e-06, "loss": 0.2364, "step": 280675 }, { "epoch": 2.76, "grad_norm": 8.230029106140137, "learning_rate": 3.6566276425670513e-06, "loss": 0.1506, "step": 280700 }, { "epoch": 2.76, "grad_norm": 9.678531646728516, "learning_rate": 3.656503520112803e-06, "loss": 0.1834, "step": 280725 }, { "epoch": 2.76, "grad_norm": 11.147993087768555, "learning_rate": 3.6563793976585545e-06, "loss": 0.1029, "step": 280750 }, { "epoch": 2.76, "grad_norm": 2.817396640777588, "learning_rate": 3.6562552752043057e-06, "loss": 0.2108, "step": 280775 }, { "epoch": 2.76, "grad_norm": 10.345524787902832, "learning_rate": 3.656131152750058e-06, "loss": 0.1554, "step": 280800 }, { "epoch": 2.76, "grad_norm": 5.951668739318848, "learning_rate": 3.656007030295809e-06, "loss": 0.2519, "step": 280825 }, { "epoch": 2.76, "grad_norm": 17.759668350219727, "learning_rate": 3.6558829078415602e-06, "loss": 0.1339, "step": 280850 }, { "epoch": 2.76, "grad_norm": 6.031090259552002, "learning_rate": 3.6557587853873123e-06, "loss": 0.2253, "step": 280875 }, { "epoch": 2.76, "grad_norm": 7.744526386260986, "learning_rate": 3.6556346629330635e-06, "loss": 0.0793, "step": 280900 }, { "epoch": 2.76, "grad_norm": 5.095221996307373, "learning_rate": 3.655510540478815e-06, "loss": 0.2265, "step": 280925 }, { "epoch": 2.76, "grad_norm": 11.900420188903809, "learning_rate": 3.6553864180245668e-06, "loss": 0.1087, "step": 280950 }, { "epoch": 2.76, "grad_norm": 4.601036071777344, "learning_rate": 3.6552622955703184e-06, "loss": 0.2091, "step": 280975 }, { "epoch": 2.76, "grad_norm": 11.986382484436035, "learning_rate": 3.6551381731160696e-06, "loss": 0.1196, "step": 281000 }, { "epoch": 2.76, "grad_norm": 3.901420831680298, "learning_rate": 3.6550140506618217e-06, "loss": 0.1538, "step": 281025 }, { "epoch": 2.76, "grad_norm": 8.405177116394043, "learning_rate": 3.654889928207573e-06, "loss": 0.1441, "step": 281050 }, { "epoch": 2.76, "grad_norm": 5.062704086303711, "learning_rate": 3.654765805753324e-06, "loss": 0.2379, "step": 281075 }, { "epoch": 2.76, "grad_norm": 16.205585479736328, "learning_rate": 3.654641683299076e-06, "loss": 0.1249, "step": 281100 }, { "epoch": 2.76, "grad_norm": 4.93729829788208, "learning_rate": 3.6545175608448274e-06, "loss": 0.2007, "step": 281125 }, { "epoch": 2.76, "grad_norm": 11.593887329101562, "learning_rate": 3.654393438390579e-06, "loss": 0.1331, "step": 281150 }, { "epoch": 2.76, "grad_norm": 4.076699733734131, "learning_rate": 3.6542693159363306e-06, "loss": 0.1725, "step": 281175 }, { "epoch": 2.76, "grad_norm": 10.251103401184082, "learning_rate": 3.6541451934820823e-06, "loss": 0.096, "step": 281200 }, { "epoch": 2.77, "grad_norm": 2.9095137119293213, "learning_rate": 3.6540210710278335e-06, "loss": 0.2118, "step": 281225 }, { "epoch": 2.77, "grad_norm": 10.174578666687012, "learning_rate": 3.6538969485735847e-06, "loss": 0.1107, "step": 281250 }, { "epoch": 2.77, "grad_norm": 7.278975486755371, "learning_rate": 3.6537728261193367e-06, "loss": 0.2156, "step": 281275 }, { "epoch": 2.77, "grad_norm": 11.18313217163086, "learning_rate": 3.653648703665088e-06, "loss": 0.1187, "step": 281300 }, { "epoch": 2.77, "grad_norm": 5.139817237854004, "learning_rate": 3.6535245812108396e-06, "loss": 0.235, "step": 281325 }, { "epoch": 2.77, "grad_norm": 13.719741821289062, "learning_rate": 3.6534004587565912e-06, "loss": 0.1203, "step": 281350 }, { "epoch": 2.77, "grad_norm": 5.749675273895264, "learning_rate": 3.653276336302343e-06, "loss": 0.2194, "step": 281375 }, { "epoch": 2.77, "grad_norm": 11.96860122680664, "learning_rate": 3.653152213848094e-06, "loss": 0.0871, "step": 281400 }, { "epoch": 2.77, "grad_norm": 7.210005760192871, "learning_rate": 3.653028091393846e-06, "loss": 0.215, "step": 281425 }, { "epoch": 2.77, "grad_norm": 8.955204010009766, "learning_rate": 3.6529039689395973e-06, "loss": 0.145, "step": 281450 }, { "epoch": 2.77, "grad_norm": 0.529490053653717, "learning_rate": 3.6527798464853485e-06, "loss": 0.2161, "step": 281475 }, { "epoch": 2.77, "grad_norm": 14.76755142211914, "learning_rate": 3.6526557240311006e-06, "loss": 0.1634, "step": 281500 }, { "epoch": 2.77, "grad_norm": 6.266073226928711, "learning_rate": 3.652531601576852e-06, "loss": 0.2116, "step": 281525 }, { "epoch": 2.77, "grad_norm": 12.888535499572754, "learning_rate": 3.6524074791226035e-06, "loss": 0.1415, "step": 281550 }, { "epoch": 2.77, "grad_norm": 6.614340305328369, "learning_rate": 3.652283356668355e-06, "loss": 0.2418, "step": 281575 }, { "epoch": 2.77, "grad_norm": 10.203564643859863, "learning_rate": 3.6521592342141067e-06, "loss": 0.141, "step": 281600 }, { "epoch": 2.77, "grad_norm": 5.210264205932617, "learning_rate": 3.652035111759858e-06, "loss": 0.2329, "step": 281625 }, { "epoch": 2.77, "grad_norm": 11.392343521118164, "learning_rate": 3.65191098930561e-06, "loss": 0.14, "step": 281650 }, { "epoch": 2.77, "grad_norm": 0.060020849108695984, "learning_rate": 3.651786866851361e-06, "loss": 0.2282, "step": 281675 }, { "epoch": 2.77, "grad_norm": 12.602075576782227, "learning_rate": 3.6516627443971124e-06, "loss": 0.1447, "step": 281700 }, { "epoch": 2.77, "grad_norm": 4.767915725708008, "learning_rate": 3.6515386219428645e-06, "loss": 0.2402, "step": 281725 }, { "epoch": 2.77, "grad_norm": 10.002038955688477, "learning_rate": 3.6514144994886157e-06, "loss": 0.1388, "step": 281750 }, { "epoch": 2.77, "grad_norm": 7.128610134124756, "learning_rate": 3.6512903770343673e-06, "loss": 0.2186, "step": 281775 }, { "epoch": 2.77, "grad_norm": 19.379358291625977, "learning_rate": 3.651166254580119e-06, "loss": 0.1205, "step": 281800 }, { "epoch": 2.77, "grad_norm": 5.38571834564209, "learning_rate": 3.6510421321258706e-06, "loss": 0.2056, "step": 281825 }, { "epoch": 2.77, "grad_norm": 12.599306106567383, "learning_rate": 3.650918009671622e-06, "loss": 0.1075, "step": 281850 }, { "epoch": 2.77, "grad_norm": 8.28485107421875, "learning_rate": 3.650793887217374e-06, "loss": 0.2143, "step": 281875 }, { "epoch": 2.77, "grad_norm": 12.15394115447998, "learning_rate": 3.650669764763125e-06, "loss": 0.1435, "step": 281900 }, { "epoch": 2.77, "grad_norm": 3.0393593311309814, "learning_rate": 3.6505456423088763e-06, "loss": 0.263, "step": 281925 }, { "epoch": 2.77, "grad_norm": 7.9898152351379395, "learning_rate": 3.6504215198546283e-06, "loss": 0.1516, "step": 281950 }, { "epoch": 2.77, "grad_norm": 4.690572261810303, "learning_rate": 3.6502973974003796e-06, "loss": 0.2204, "step": 281975 }, { "epoch": 2.77, "grad_norm": 16.140657424926758, "learning_rate": 3.650173274946131e-06, "loss": 0.1089, "step": 282000 }, { "epoch": 2.77, "grad_norm": 4.508689880371094, "learning_rate": 3.650049152491883e-06, "loss": 0.2149, "step": 282025 }, { "epoch": 2.77, "grad_norm": 5.574522018432617, "learning_rate": 3.6499250300376345e-06, "loss": 0.12, "step": 282050 }, { "epoch": 2.77, "grad_norm": 5.780768871307373, "learning_rate": 3.6498009075833857e-06, "loss": 0.2078, "step": 282075 }, { "epoch": 2.77, "grad_norm": 20.285629272460938, "learning_rate": 3.649676785129137e-06, "loss": 0.1083, "step": 282100 }, { "epoch": 2.77, "grad_norm": 2.503438949584961, "learning_rate": 3.6495576275730588e-06, "loss": 0.2974, "step": 282125 }, { "epoch": 2.77, "grad_norm": 12.259966850280762, "learning_rate": 3.64943350511881e-06, "loss": 0.1221, "step": 282150 }, { "epoch": 2.77, "grad_norm": 4.867554187774658, "learning_rate": 3.649309382664562e-06, "loss": 0.1761, "step": 282175 }, { "epoch": 2.77, "grad_norm": 11.311474800109863, "learning_rate": 3.6491852602103132e-06, "loss": 0.118, "step": 282200 }, { "epoch": 2.77, "grad_norm": 5.9902167320251465, "learning_rate": 3.649061137756065e-06, "loss": 0.2303, "step": 282225 }, { "epoch": 2.78, "grad_norm": 20.064485549926758, "learning_rate": 3.6489370153018165e-06, "loss": 0.1528, "step": 282250 }, { "epoch": 2.78, "grad_norm": 3.5616774559020996, "learning_rate": 3.648812892847568e-06, "loss": 0.2077, "step": 282275 }, { "epoch": 2.78, "grad_norm": 10.01075553894043, "learning_rate": 3.6486887703933194e-06, "loss": 0.1299, "step": 282300 }, { "epoch": 2.78, "grad_norm": 7.401010036468506, "learning_rate": 3.6485646479390714e-06, "loss": 0.1734, "step": 282325 }, { "epoch": 2.78, "grad_norm": 13.681921005249023, "learning_rate": 3.6484405254848226e-06, "loss": 0.1466, "step": 282350 }, { "epoch": 2.78, "grad_norm": 4.676408767700195, "learning_rate": 3.648316403030574e-06, "loss": 0.2461, "step": 282375 }, { "epoch": 2.78, "grad_norm": 14.079795837402344, "learning_rate": 3.648192280576326e-06, "loss": 0.1232, "step": 282400 }, { "epoch": 2.78, "grad_norm": 6.930002212524414, "learning_rate": 3.648068158122077e-06, "loss": 0.2323, "step": 282425 }, { "epoch": 2.78, "grad_norm": 8.912615776062012, "learning_rate": 3.6479440356678287e-06, "loss": 0.1093, "step": 282450 }, { "epoch": 2.78, "grad_norm": 2.005478858947754, "learning_rate": 3.6478199132135804e-06, "loss": 0.2232, "step": 282475 }, { "epoch": 2.78, "grad_norm": 11.618247985839844, "learning_rate": 3.647695790759332e-06, "loss": 0.1197, "step": 282500 }, { "epoch": 2.78, "grad_norm": 2.6555347442626953, "learning_rate": 3.6475716683050832e-06, "loss": 0.2119, "step": 282525 }, { "epoch": 2.78, "grad_norm": 11.037918090820312, "learning_rate": 3.6474475458508353e-06, "loss": 0.1202, "step": 282550 }, { "epoch": 2.78, "grad_norm": 2.2086262702941895, "learning_rate": 3.6473234233965865e-06, "loss": 0.2269, "step": 282575 }, { "epoch": 2.78, "grad_norm": 10.113533020019531, "learning_rate": 3.6471993009423377e-06, "loss": 0.1361, "step": 282600 }, { "epoch": 2.78, "grad_norm": 4.601705074310303, "learning_rate": 3.6470751784880898e-06, "loss": 0.1678, "step": 282625 }, { "epoch": 2.78, "grad_norm": 10.642990112304688, "learning_rate": 3.646951056033841e-06, "loss": 0.1221, "step": 282650 }, { "epoch": 2.78, "grad_norm": 5.272817134857178, "learning_rate": 3.6468269335795926e-06, "loss": 0.1912, "step": 282675 }, { "epoch": 2.78, "grad_norm": 11.774554252624512, "learning_rate": 3.646702811125344e-06, "loss": 0.1661, "step": 282700 }, { "epoch": 2.78, "grad_norm": 6.473571300506592, "learning_rate": 3.646578688671096e-06, "loss": 0.2802, "step": 282725 }, { "epoch": 2.78, "grad_norm": 11.375261306762695, "learning_rate": 3.646454566216847e-06, "loss": 0.1171, "step": 282750 }, { "epoch": 2.78, "grad_norm": 5.1331400871276855, "learning_rate": 3.6463304437625983e-06, "loss": 0.2207, "step": 282775 }, { "epoch": 2.78, "grad_norm": 8.068185806274414, "learning_rate": 3.6462063213083504e-06, "loss": 0.1067, "step": 282800 }, { "epoch": 2.78, "grad_norm": 2.764345645904541, "learning_rate": 3.6460821988541016e-06, "loss": 0.2076, "step": 282825 }, { "epoch": 2.78, "grad_norm": 14.643577575683594, "learning_rate": 3.645958076399853e-06, "loss": 0.1175, "step": 282850 }, { "epoch": 2.78, "grad_norm": 6.757000923156738, "learning_rate": 3.645833953945605e-06, "loss": 0.2009, "step": 282875 }, { "epoch": 2.78, "grad_norm": 13.068033218383789, "learning_rate": 3.6457098314913565e-06, "loss": 0.1068, "step": 282900 }, { "epoch": 2.78, "grad_norm": 3.2447845935821533, "learning_rate": 3.6455857090371077e-06, "loss": 0.241, "step": 282925 }, { "epoch": 2.78, "grad_norm": 6.838975429534912, "learning_rate": 3.6454615865828597e-06, "loss": 0.1178, "step": 282950 }, { "epoch": 2.78, "grad_norm": 5.184457302093506, "learning_rate": 3.645337464128611e-06, "loss": 0.201, "step": 282975 }, { "epoch": 2.78, "grad_norm": 16.351055145263672, "learning_rate": 3.645213341674362e-06, "loss": 0.1213, "step": 283000 }, { "epoch": 2.78, "grad_norm": 3.976724863052368, "learning_rate": 3.6450892192201142e-06, "loss": 0.2315, "step": 283025 }, { "epoch": 2.78, "grad_norm": 10.410261154174805, "learning_rate": 3.6449650967658654e-06, "loss": 0.1383, "step": 283050 }, { "epoch": 2.78, "grad_norm": 3.337554931640625, "learning_rate": 3.644840974311617e-06, "loss": 0.2197, "step": 283075 }, { "epoch": 2.78, "grad_norm": 13.849100112915039, "learning_rate": 3.6447168518573687e-06, "loss": 0.1446, "step": 283100 }, { "epoch": 2.78, "grad_norm": 5.350471496582031, "learning_rate": 3.6445927294031203e-06, "loss": 0.1755, "step": 283125 }, { "epoch": 2.78, "grad_norm": 9.41474723815918, "learning_rate": 3.6444686069488716e-06, "loss": 0.1141, "step": 283150 }, { "epoch": 2.78, "grad_norm": 4.380053520202637, "learning_rate": 3.6443444844946236e-06, "loss": 0.2117, "step": 283175 }, { "epoch": 2.78, "grad_norm": 14.468592643737793, "learning_rate": 3.644220362040375e-06, "loss": 0.1633, "step": 283200 }, { "epoch": 2.78, "grad_norm": 5.2440080642700195, "learning_rate": 3.644096239586126e-06, "loss": 0.2109, "step": 283225 }, { "epoch": 2.78, "grad_norm": 9.952942848205566, "learning_rate": 3.643972117131878e-06, "loss": 0.1344, "step": 283250 }, { "epoch": 2.79, "grad_norm": 5.283945083618164, "learning_rate": 3.6438479946776293e-06, "loss": 0.2083, "step": 283275 }, { "epoch": 2.79, "grad_norm": 5.2525224685668945, "learning_rate": 3.643723872223381e-06, "loss": 0.0992, "step": 283300 }, { "epoch": 2.79, "grad_norm": 6.826289176940918, "learning_rate": 3.6435997497691326e-06, "loss": 0.2256, "step": 283325 }, { "epoch": 2.79, "grad_norm": 11.154938697814941, "learning_rate": 3.643475627314884e-06, "loss": 0.1201, "step": 283350 }, { "epoch": 2.79, "grad_norm": 2.3860960006713867, "learning_rate": 3.6433515048606354e-06, "loss": 0.21, "step": 283375 }, { "epoch": 2.79, "grad_norm": 8.808172225952148, "learning_rate": 3.6432273824063875e-06, "loss": 0.1137, "step": 283400 }, { "epoch": 2.79, "grad_norm": 1.7683520317077637, "learning_rate": 3.6431032599521387e-06, "loss": 0.2328, "step": 283425 }, { "epoch": 2.79, "grad_norm": 17.756637573242188, "learning_rate": 3.6429791374978903e-06, "loss": 0.1499, "step": 283450 }, { "epoch": 2.79, "grad_norm": 9.72623062133789, "learning_rate": 3.642855015043642e-06, "loss": 0.249, "step": 283475 }, { "epoch": 2.79, "grad_norm": 8.323747634887695, "learning_rate": 3.6427308925893936e-06, "loss": 0.133, "step": 283500 }, { "epoch": 2.79, "grad_norm": 7.811105251312256, "learning_rate": 3.642606770135145e-06, "loss": 0.2457, "step": 283525 }, { "epoch": 2.79, "grad_norm": 9.715909004211426, "learning_rate": 3.642482647680896e-06, "loss": 0.1504, "step": 283550 }, { "epoch": 2.79, "grad_norm": 5.845432758331299, "learning_rate": 3.642358525226648e-06, "loss": 0.2883, "step": 283575 }, { "epoch": 2.79, "grad_norm": 8.606589317321777, "learning_rate": 3.6422344027723993e-06, "loss": 0.1288, "step": 283600 }, { "epoch": 2.79, "grad_norm": 3.952439546585083, "learning_rate": 3.642110280318151e-06, "loss": 0.231, "step": 283625 }, { "epoch": 2.79, "grad_norm": 11.8035249710083, "learning_rate": 3.6419861578639026e-06, "loss": 0.1026, "step": 283650 }, { "epoch": 2.79, "grad_norm": 4.337858200073242, "learning_rate": 3.641862035409654e-06, "loss": 0.2199, "step": 283675 }, { "epoch": 2.79, "grad_norm": 9.499526023864746, "learning_rate": 3.6417379129554054e-06, "loss": 0.1519, "step": 283700 }, { "epoch": 2.79, "grad_norm": 6.0677409172058105, "learning_rate": 3.6416137905011575e-06, "loss": 0.1962, "step": 283725 }, { "epoch": 2.79, "grad_norm": 13.778653144836426, "learning_rate": 3.6414896680469087e-06, "loss": 0.166, "step": 283750 }, { "epoch": 2.79, "grad_norm": 5.6634979248046875, "learning_rate": 3.64136554559266e-06, "loss": 0.1915, "step": 283775 }, { "epoch": 2.79, "grad_norm": 9.651495933532715, "learning_rate": 3.641241423138412e-06, "loss": 0.1184, "step": 283800 }, { "epoch": 2.79, "grad_norm": 2.0527572631835938, "learning_rate": 3.641117300684163e-06, "loss": 0.232, "step": 283825 }, { "epoch": 2.79, "grad_norm": 8.113773345947266, "learning_rate": 3.640993178229915e-06, "loss": 0.1219, "step": 283850 }, { "epoch": 2.79, "grad_norm": 8.152593612670898, "learning_rate": 3.6408690557756664e-06, "loss": 0.2708, "step": 283875 }, { "epoch": 2.79, "grad_norm": 8.36991024017334, "learning_rate": 3.640744933321418e-06, "loss": 0.1205, "step": 283900 }, { "epoch": 2.79, "grad_norm": 5.96282958984375, "learning_rate": 3.6406208108671693e-06, "loss": 0.1652, "step": 283925 }, { "epoch": 2.79, "grad_norm": 8.517400741577148, "learning_rate": 3.6404966884129213e-06, "loss": 0.1442, "step": 283950 }, { "epoch": 2.79, "grad_norm": 2.5985357761383057, "learning_rate": 3.6403725659586725e-06, "loss": 0.2139, "step": 283975 }, { "epoch": 2.79, "grad_norm": 15.268941879272461, "learning_rate": 3.6402484435044238e-06, "loss": 0.1391, "step": 284000 }, { "epoch": 2.79, "grad_norm": 5.686523914337158, "learning_rate": 3.640124321050176e-06, "loss": 0.2245, "step": 284025 }, { "epoch": 2.79, "grad_norm": 9.35644245147705, "learning_rate": 3.640000198595927e-06, "loss": 0.0939, "step": 284050 }, { "epoch": 2.79, "grad_norm": 4.379866123199463, "learning_rate": 3.6398760761416787e-06, "loss": 0.2647, "step": 284075 }, { "epoch": 2.79, "grad_norm": 10.020744323730469, "learning_rate": 3.6397519536874303e-06, "loss": 0.1252, "step": 284100 }, { "epoch": 2.79, "grad_norm": 2.9045276641845703, "learning_rate": 3.639627831233182e-06, "loss": 0.2332, "step": 284125 }, { "epoch": 2.79, "grad_norm": 10.12477970123291, "learning_rate": 3.639503708778933e-06, "loss": 0.1645, "step": 284150 }, { "epoch": 2.79, "grad_norm": 2.520125150680542, "learning_rate": 3.639379586324685e-06, "loss": 0.2197, "step": 284175 }, { "epoch": 2.79, "grad_norm": 7.6929240226745605, "learning_rate": 3.6392554638704364e-06, "loss": 0.0917, "step": 284200 }, { "epoch": 2.79, "grad_norm": 3.7098617553710938, "learning_rate": 3.6391313414161876e-06, "loss": 0.2131, "step": 284225 }, { "epoch": 2.79, "grad_norm": 8.761673927307129, "learning_rate": 3.6390072189619397e-06, "loss": 0.1266, "step": 284250 }, { "epoch": 2.8, "grad_norm": 2.815437078475952, "learning_rate": 3.6388880614058607e-06, "loss": 0.2181, "step": 284275 }, { "epoch": 2.8, "grad_norm": 8.173386573791504, "learning_rate": 3.6387639389516123e-06, "loss": 0.1194, "step": 284300 }, { "epoch": 2.8, "grad_norm": 1.919928789138794, "learning_rate": 3.638639816497364e-06, "loss": 0.18, "step": 284325 }, { "epoch": 2.8, "grad_norm": 15.368486404418945, "learning_rate": 3.6385156940431156e-06, "loss": 0.1794, "step": 284350 }, { "epoch": 2.8, "grad_norm": 6.589108467102051, "learning_rate": 3.638391571588867e-06, "loss": 0.2362, "step": 284375 }, { "epoch": 2.8, "grad_norm": 13.050298690795898, "learning_rate": 3.638267449134619e-06, "loss": 0.123, "step": 284400 }, { "epoch": 2.8, "grad_norm": 3.544402599334717, "learning_rate": 3.63814332668037e-06, "loss": 0.2072, "step": 284425 }, { "epoch": 2.8, "grad_norm": 5.242176055908203, "learning_rate": 3.6380192042261213e-06, "loss": 0.1281, "step": 284450 }, { "epoch": 2.8, "grad_norm": 8.651176452636719, "learning_rate": 3.6378950817718734e-06, "loss": 0.209, "step": 284475 }, { "epoch": 2.8, "grad_norm": 10.856661796569824, "learning_rate": 3.6377709593176246e-06, "loss": 0.1207, "step": 284500 }, { "epoch": 2.8, "grad_norm": 0.4514458477497101, "learning_rate": 3.6376468368633762e-06, "loss": 0.1947, "step": 284525 }, { "epoch": 2.8, "grad_norm": 12.347933769226074, "learning_rate": 3.637522714409128e-06, "loss": 0.1072, "step": 284550 }, { "epoch": 2.8, "grad_norm": 7.559095859527588, "learning_rate": 3.6373985919548795e-06, "loss": 0.2347, "step": 284575 }, { "epoch": 2.8, "grad_norm": 11.168743133544922, "learning_rate": 3.6372744695006307e-06, "loss": 0.111, "step": 284600 }, { "epoch": 2.8, "grad_norm": 4.694808483123779, "learning_rate": 3.6371503470463828e-06, "loss": 0.1845, "step": 284625 }, { "epoch": 2.8, "grad_norm": 12.235424041748047, "learning_rate": 3.637026224592134e-06, "loss": 0.1536, "step": 284650 }, { "epoch": 2.8, "grad_norm": 6.70759391784668, "learning_rate": 3.636902102137885e-06, "loss": 0.2034, "step": 284675 }, { "epoch": 2.8, "grad_norm": 9.87175178527832, "learning_rate": 3.6367779796836372e-06, "loss": 0.1569, "step": 284700 }, { "epoch": 2.8, "grad_norm": 4.189244747161865, "learning_rate": 3.6366538572293884e-06, "loss": 0.186, "step": 284725 }, { "epoch": 2.8, "grad_norm": 13.312176704406738, "learning_rate": 3.63652973477514e-06, "loss": 0.1228, "step": 284750 }, { "epoch": 2.8, "grad_norm": 72.41368865966797, "learning_rate": 3.6364056123208917e-06, "loss": 0.2206, "step": 284775 }, { "epoch": 2.8, "grad_norm": 16.744077682495117, "learning_rate": 3.6362814898666434e-06, "loss": 0.1799, "step": 284800 }, { "epoch": 2.8, "grad_norm": 5.268740653991699, "learning_rate": 3.6361573674123946e-06, "loss": 0.2128, "step": 284825 }, { "epoch": 2.8, "grad_norm": 14.081637382507324, "learning_rate": 3.6360332449581466e-06, "loss": 0.1311, "step": 284850 }, { "epoch": 2.8, "grad_norm": 5.569352626800537, "learning_rate": 3.635909122503898e-06, "loss": 0.1615, "step": 284875 }, { "epoch": 2.8, "grad_norm": 9.436817169189453, "learning_rate": 3.635785000049649e-06, "loss": 0.156, "step": 284900 }, { "epoch": 2.8, "grad_norm": 4.940009117126465, "learning_rate": 3.6356608775954007e-06, "loss": 0.2447, "step": 284925 }, { "epoch": 2.8, "grad_norm": 10.458898544311523, "learning_rate": 3.6355367551411523e-06, "loss": 0.121, "step": 284950 }, { "epoch": 2.8, "grad_norm": 3.604156494140625, "learning_rate": 3.635412632686904e-06, "loss": 0.2375, "step": 284975 }, { "epoch": 2.8, "grad_norm": 13.637158393859863, "learning_rate": 3.635288510232655e-06, "loss": 0.1469, "step": 285000 }, { "epoch": 2.8, "grad_norm": 2.705906867980957, "learning_rate": 3.6351643877784072e-06, "loss": 0.2264, "step": 285025 }, { "epoch": 2.8, "grad_norm": 15.751299858093262, "learning_rate": 3.6350402653241584e-06, "loss": 0.1415, "step": 285050 }, { "epoch": 2.8, "grad_norm": 5.06045389175415, "learning_rate": 3.6349161428699096e-06, "loss": 0.1819, "step": 285075 }, { "epoch": 2.8, "grad_norm": 15.310534477233887, "learning_rate": 3.6347920204156617e-06, "loss": 0.1397, "step": 285100 }, { "epoch": 2.8, "grad_norm": 4.053846836090088, "learning_rate": 3.634667897961413e-06, "loss": 0.207, "step": 285125 }, { "epoch": 2.8, "grad_norm": 15.226154327392578, "learning_rate": 3.6345437755071645e-06, "loss": 0.1481, "step": 285150 }, { "epoch": 2.8, "grad_norm": 2.6656577587127686, "learning_rate": 3.634419653052916e-06, "loss": 0.2264, "step": 285175 }, { "epoch": 2.8, "grad_norm": 11.633223533630371, "learning_rate": 3.634295530598668e-06, "loss": 0.1151, "step": 285200 }, { "epoch": 2.8, "grad_norm": 10.69573974609375, "learning_rate": 3.634171408144419e-06, "loss": 0.2204, "step": 285225 }, { "epoch": 2.8, "grad_norm": 7.262001991271973, "learning_rate": 3.634047285690171e-06, "loss": 0.1001, "step": 285250 }, { "epoch": 2.8, "grad_norm": 5.134212970733643, "learning_rate": 3.6339231632359223e-06, "loss": 0.1932, "step": 285275 }, { "epoch": 2.81, "grad_norm": 11.365349769592285, "learning_rate": 3.6337990407816735e-06, "loss": 0.1365, "step": 285300 }, { "epoch": 2.81, "grad_norm": 5.302720546722412, "learning_rate": 3.6336749183274256e-06, "loss": 0.2164, "step": 285325 }, { "epoch": 2.81, "grad_norm": 10.579732894897461, "learning_rate": 3.6335507958731768e-06, "loss": 0.1664, "step": 285350 }, { "epoch": 2.81, "grad_norm": 2.9426608085632324, "learning_rate": 3.6334266734189284e-06, "loss": 0.2321, "step": 285375 }, { "epoch": 2.81, "grad_norm": 15.659832954406738, "learning_rate": 3.63330255096468e-06, "loss": 0.133, "step": 285400 }, { "epoch": 2.81, "grad_norm": 5.1434245109558105, "learning_rate": 3.6331784285104317e-06, "loss": 0.2222, "step": 285425 }, { "epoch": 2.81, "grad_norm": 9.559778213500977, "learning_rate": 3.633054306056183e-06, "loss": 0.1372, "step": 285450 }, { "epoch": 2.81, "grad_norm": 2.539532423019409, "learning_rate": 3.632930183601935e-06, "loss": 0.2461, "step": 285475 }, { "epoch": 2.81, "grad_norm": 11.419366836547852, "learning_rate": 3.632806061147686e-06, "loss": 0.1356, "step": 285500 }, { "epoch": 2.81, "grad_norm": 4.991747856140137, "learning_rate": 3.632686903591607e-06, "loss": 0.1842, "step": 285525 }, { "epoch": 2.81, "grad_norm": 12.961212158203125, "learning_rate": 3.6325627811373593e-06, "loss": 0.1166, "step": 285550 }, { "epoch": 2.81, "grad_norm": 5.166221618652344, "learning_rate": 3.6324386586831105e-06, "loss": 0.2379, "step": 285575 }, { "epoch": 2.81, "grad_norm": 17.90041732788086, "learning_rate": 3.632314536228862e-06, "loss": 0.134, "step": 285600 }, { "epoch": 2.81, "grad_norm": 0.32812821865081787, "learning_rate": 3.6321904137746137e-06, "loss": 0.1815, "step": 285625 }, { "epoch": 2.81, "grad_norm": 17.840158462524414, "learning_rate": 3.6320662913203654e-06, "loss": 0.1729, "step": 285650 }, { "epoch": 2.81, "grad_norm": 3.9268786907196045, "learning_rate": 3.6319421688661166e-06, "loss": 0.2182, "step": 285675 }, { "epoch": 2.81, "grad_norm": 13.784686088562012, "learning_rate": 3.6318180464118686e-06, "loss": 0.1552, "step": 285700 }, { "epoch": 2.81, "grad_norm": 6.94962215423584, "learning_rate": 3.63169392395762e-06, "loss": 0.2218, "step": 285725 }, { "epoch": 2.81, "grad_norm": 36.683048248291016, "learning_rate": 3.631569801503371e-06, "loss": 0.1678, "step": 285750 }, { "epoch": 2.81, "grad_norm": 6.544336318969727, "learning_rate": 3.631445679049123e-06, "loss": 0.2454, "step": 285775 }, { "epoch": 2.81, "grad_norm": 13.194809913635254, "learning_rate": 3.6313215565948743e-06, "loss": 0.1285, "step": 285800 }, { "epoch": 2.81, "grad_norm": 1.9260772466659546, "learning_rate": 3.631197434140626e-06, "loss": 0.2082, "step": 285825 }, { "epoch": 2.81, "grad_norm": 10.954002380371094, "learning_rate": 3.6310733116863776e-06, "loss": 0.1282, "step": 285850 }, { "epoch": 2.81, "grad_norm": 8.09177303314209, "learning_rate": 3.6309491892321292e-06, "loss": 0.2754, "step": 285875 }, { "epoch": 2.81, "grad_norm": 6.676877498626709, "learning_rate": 3.6308250667778804e-06, "loss": 0.1394, "step": 285900 }, { "epoch": 2.81, "grad_norm": 7.5056257247924805, "learning_rate": 3.6307009443236325e-06, "loss": 0.2038, "step": 285925 }, { "epoch": 2.81, "grad_norm": 11.898747444152832, "learning_rate": 3.6305768218693837e-06, "loss": 0.1302, "step": 285950 }, { "epoch": 2.81, "grad_norm": 6.125164985656738, "learning_rate": 3.630452699415135e-06, "loss": 0.2257, "step": 285975 }, { "epoch": 2.81, "grad_norm": 18.130325317382812, "learning_rate": 3.630328576960887e-06, "loss": 0.1122, "step": 286000 }, { "epoch": 2.81, "grad_norm": 7.8840789794921875, "learning_rate": 3.630204454506638e-06, "loss": 0.2399, "step": 286025 }, { "epoch": 2.81, "grad_norm": 13.256669998168945, "learning_rate": 3.63008033205239e-06, "loss": 0.1587, "step": 286050 }, { "epoch": 2.81, "grad_norm": 5.991014003753662, "learning_rate": 3.6299562095981415e-06, "loss": 0.221, "step": 286075 }, { "epoch": 2.81, "grad_norm": 7.861156940460205, "learning_rate": 3.629832087143893e-06, "loss": 0.1089, "step": 286100 }, { "epoch": 2.81, "grad_norm": 6.344436168670654, "learning_rate": 3.6297079646896443e-06, "loss": 0.2599, "step": 286125 }, { "epoch": 2.81, "grad_norm": 16.20574951171875, "learning_rate": 3.6295838422353964e-06, "loss": 0.0992, "step": 286150 }, { "epoch": 2.81, "grad_norm": 4.309108257293701, "learning_rate": 3.6294597197811476e-06, "loss": 0.1974, "step": 286175 }, { "epoch": 2.81, "grad_norm": 9.202783584594727, "learning_rate": 3.629335597326899e-06, "loss": 0.1585, "step": 286200 }, { "epoch": 2.81, "grad_norm": 2.9695045948028564, "learning_rate": 3.629211474872651e-06, "loss": 0.2261, "step": 286225 }, { "epoch": 2.81, "grad_norm": 11.913796424865723, "learning_rate": 3.629087352418402e-06, "loss": 0.1244, "step": 286250 }, { "epoch": 2.81, "grad_norm": 5.623747825622559, "learning_rate": 3.6289632299641537e-06, "loss": 0.2045, "step": 286275 }, { "epoch": 2.81, "grad_norm": 13.579384803771973, "learning_rate": 3.6288391075099053e-06, "loss": 0.0909, "step": 286300 }, { "epoch": 2.82, "grad_norm": 2.943784236907959, "learning_rate": 3.628714985055657e-06, "loss": 0.2751, "step": 286325 }, { "epoch": 2.82, "grad_norm": 10.070684432983398, "learning_rate": 3.628590862601408e-06, "loss": 0.1148, "step": 286350 }, { "epoch": 2.82, "grad_norm": 3.652256488800049, "learning_rate": 3.62846674014716e-06, "loss": 0.2032, "step": 286375 }, { "epoch": 2.82, "grad_norm": 10.705464363098145, "learning_rate": 3.6283426176929115e-06, "loss": 0.1361, "step": 286400 }, { "epoch": 2.82, "grad_norm": 8.353647232055664, "learning_rate": 3.6282184952386627e-06, "loss": 0.2624, "step": 286425 }, { "epoch": 2.82, "grad_norm": 12.0042142868042, "learning_rate": 3.6280943727844143e-06, "loss": 0.1364, "step": 286450 }, { "epoch": 2.82, "grad_norm": 8.035869598388672, "learning_rate": 3.627970250330166e-06, "loss": 0.251, "step": 286475 }, { "epoch": 2.82, "grad_norm": 12.860942840576172, "learning_rate": 3.6278461278759176e-06, "loss": 0.1201, "step": 286500 }, { "epoch": 2.82, "grad_norm": 4.324123382568359, "learning_rate": 3.6277220054216688e-06, "loss": 0.206, "step": 286525 }, { "epoch": 2.82, "grad_norm": 7.643028259277344, "learning_rate": 3.627597882967421e-06, "loss": 0.1161, "step": 286550 }, { "epoch": 2.82, "grad_norm": 5.9643449783325195, "learning_rate": 3.627473760513172e-06, "loss": 0.1816, "step": 286575 }, { "epoch": 2.82, "grad_norm": 13.62214183807373, "learning_rate": 3.6273496380589237e-06, "loss": 0.1471, "step": 286600 }, { "epoch": 2.82, "grad_norm": 4.645349025726318, "learning_rate": 3.6272255156046753e-06, "loss": 0.2292, "step": 286625 }, { "epoch": 2.82, "grad_norm": 9.954902648925781, "learning_rate": 3.627101393150427e-06, "loss": 0.1301, "step": 286650 }, { "epoch": 2.82, "grad_norm": 4.895573139190674, "learning_rate": 3.626977270696178e-06, "loss": 0.1999, "step": 286675 }, { "epoch": 2.82, "grad_norm": 10.516822814941406, "learning_rate": 3.6268531482419302e-06, "loss": 0.1332, "step": 286700 }, { "epoch": 2.82, "grad_norm": 7.639601230621338, "learning_rate": 3.6267290257876814e-06, "loss": 0.1887, "step": 286725 }, { "epoch": 2.82, "grad_norm": 11.766219139099121, "learning_rate": 3.6266049033334326e-06, "loss": 0.1547, "step": 286750 }, { "epoch": 2.82, "grad_norm": 2.993595600128174, "learning_rate": 3.6264807808791847e-06, "loss": 0.2421, "step": 286775 }, { "epoch": 2.82, "grad_norm": 10.219619750976562, "learning_rate": 3.626356658424936e-06, "loss": 0.1298, "step": 286800 }, { "epoch": 2.82, "grad_norm": 0.24938100576400757, "learning_rate": 3.6262325359706876e-06, "loss": 0.2079, "step": 286825 }, { "epoch": 2.82, "grad_norm": 6.229327201843262, "learning_rate": 3.626108413516439e-06, "loss": 0.1339, "step": 286850 }, { "epoch": 2.82, "grad_norm": 3.651614189147949, "learning_rate": 3.625984291062191e-06, "loss": 0.215, "step": 286875 }, { "epoch": 2.82, "grad_norm": 7.3309125900268555, "learning_rate": 3.625860168607942e-06, "loss": 0.1045, "step": 286900 }, { "epoch": 2.82, "grad_norm": 1.6979492902755737, "learning_rate": 3.625736046153694e-06, "loss": 0.2024, "step": 286925 }, { "epoch": 2.82, "grad_norm": 9.776058197021484, "learning_rate": 3.6256119236994453e-06, "loss": 0.1017, "step": 286950 }, { "epoch": 2.82, "grad_norm": 8.614356994628906, "learning_rate": 3.6254878012451965e-06, "loss": 0.2574, "step": 286975 }, { "epoch": 2.82, "grad_norm": 8.57882022857666, "learning_rate": 3.6253636787909486e-06, "loss": 0.1154, "step": 287000 }, { "epoch": 2.82, "grad_norm": 3.599534511566162, "learning_rate": 3.6252395563366998e-06, "loss": 0.2172, "step": 287025 }, { "epoch": 2.82, "grad_norm": 8.666579246520996, "learning_rate": 3.6251154338824514e-06, "loss": 0.1193, "step": 287050 }, { "epoch": 2.82, "grad_norm": 5.161220550537109, "learning_rate": 3.624991311428203e-06, "loss": 0.2515, "step": 287075 }, { "epoch": 2.82, "grad_norm": 10.252264022827148, "learning_rate": 3.6248671889739547e-06, "loss": 0.111, "step": 287100 }, { "epoch": 2.82, "grad_norm": 4.267103672027588, "learning_rate": 3.624743066519706e-06, "loss": 0.1969, "step": 287125 }, { "epoch": 2.82, "grad_norm": 31.941043853759766, "learning_rate": 3.624618944065458e-06, "loss": 0.1357, "step": 287150 }, { "epoch": 2.82, "grad_norm": 7.8925862312316895, "learning_rate": 3.624494821611209e-06, "loss": 0.2707, "step": 287175 }, { "epoch": 2.82, "grad_norm": 14.867817878723145, "learning_rate": 3.6243706991569604e-06, "loss": 0.1328, "step": 287200 }, { "epoch": 2.82, "grad_norm": 4.732635021209717, "learning_rate": 3.624246576702712e-06, "loss": 0.2597, "step": 287225 }, { "epoch": 2.82, "grad_norm": 12.391532897949219, "learning_rate": 3.6241224542484637e-06, "loss": 0.1217, "step": 287250 }, { "epoch": 2.82, "grad_norm": 2.5074853897094727, "learning_rate": 3.6239983317942153e-06, "loss": 0.1861, "step": 287275 }, { "epoch": 2.82, "grad_norm": 7.220678329467773, "learning_rate": 3.6238742093399665e-06, "loss": 0.1507, "step": 287300 }, { "epoch": 2.83, "grad_norm": 8.349897384643555, "learning_rate": 3.6237500868857186e-06, "loss": 0.2483, "step": 287325 }, { "epoch": 2.83, "grad_norm": 11.624712944030762, "learning_rate": 3.6236259644314698e-06, "loss": 0.1318, "step": 287350 }, { "epoch": 2.83, "grad_norm": 5.164602279663086, "learning_rate": 3.623501841977221e-06, "loss": 0.1744, "step": 287375 }, { "epoch": 2.83, "grad_norm": 7.906404495239258, "learning_rate": 3.623377719522973e-06, "loss": 0.0784, "step": 287400 }, { "epoch": 2.83, "grad_norm": 4.171570777893066, "learning_rate": 3.6232535970687243e-06, "loss": 0.2141, "step": 287425 }, { "epoch": 2.83, "grad_norm": 8.218633651733398, "learning_rate": 3.623129474614476e-06, "loss": 0.1355, "step": 287450 }, { "epoch": 2.83, "grad_norm": 17.848485946655273, "learning_rate": 3.6230053521602275e-06, "loss": 0.2524, "step": 287475 }, { "epoch": 2.83, "grad_norm": 7.4622039794921875, "learning_rate": 3.622881229705979e-06, "loss": 0.105, "step": 287500 }, { "epoch": 2.83, "grad_norm": 2.633849620819092, "learning_rate": 3.6227571072517304e-06, "loss": 0.1954, "step": 287525 }, { "epoch": 2.83, "grad_norm": 6.82990026473999, "learning_rate": 3.6226329847974824e-06, "loss": 0.1307, "step": 287550 }, { "epoch": 2.83, "grad_norm": 7.104459285736084, "learning_rate": 3.6225088623432336e-06, "loss": 0.1916, "step": 287575 }, { "epoch": 2.83, "grad_norm": 11.7179594039917, "learning_rate": 3.622384739888985e-06, "loss": 0.1236, "step": 287600 }, { "epoch": 2.83, "grad_norm": 3.8512399196624756, "learning_rate": 3.622260617434737e-06, "loss": 0.2282, "step": 287625 }, { "epoch": 2.83, "grad_norm": 9.478657722473145, "learning_rate": 3.622136494980488e-06, "loss": 0.1361, "step": 287650 }, { "epoch": 2.83, "grad_norm": 2.8103435039520264, "learning_rate": 3.6220123725262398e-06, "loss": 0.2321, "step": 287675 }, { "epoch": 2.83, "grad_norm": 12.817917823791504, "learning_rate": 3.6218882500719914e-06, "loss": 0.1072, "step": 287700 }, { "epoch": 2.83, "grad_norm": 1.5215688943862915, "learning_rate": 3.621764127617743e-06, "loss": 0.2354, "step": 287725 }, { "epoch": 2.83, "grad_norm": 19.019834518432617, "learning_rate": 3.6216400051634942e-06, "loss": 0.1193, "step": 287750 }, { "epoch": 2.83, "grad_norm": 4.716217517852783, "learning_rate": 3.6215158827092463e-06, "loss": 0.1895, "step": 287775 }, { "epoch": 2.83, "grad_norm": 14.2461576461792, "learning_rate": 3.6213917602549975e-06, "loss": 0.1234, "step": 287800 }, { "epoch": 2.83, "grad_norm": 1.3325269222259521, "learning_rate": 3.6212676378007487e-06, "loss": 0.2203, "step": 287825 }, { "epoch": 2.83, "grad_norm": 11.331852912902832, "learning_rate": 3.6211435153465008e-06, "loss": 0.1189, "step": 287850 }, { "epoch": 2.83, "grad_norm": 5.410025596618652, "learning_rate": 3.621019392892252e-06, "loss": 0.2377, "step": 287875 }, { "epoch": 2.83, "grad_norm": 16.437572479248047, "learning_rate": 3.6208952704380036e-06, "loss": 0.0982, "step": 287900 }, { "epoch": 2.83, "grad_norm": 1.8742643594741821, "learning_rate": 3.6207711479837553e-06, "loss": 0.2088, "step": 287925 }, { "epoch": 2.83, "grad_norm": 11.43671989440918, "learning_rate": 3.620647025529507e-06, "loss": 0.1056, "step": 287950 }, { "epoch": 2.83, "grad_norm": 5.499362468719482, "learning_rate": 3.620522903075258e-06, "loss": 0.2373, "step": 287975 }, { "epoch": 2.83, "grad_norm": 3.388784885406494, "learning_rate": 3.62039878062101e-06, "loss": 0.0975, "step": 288000 }, { "epoch": 2.83, "grad_norm": 2.4156575202941895, "learning_rate": 3.6202746581667614e-06, "loss": 0.1799, "step": 288025 }, { "epoch": 2.83, "grad_norm": 11.947430610656738, "learning_rate": 3.6201505357125126e-06, "loss": 0.1125, "step": 288050 }, { "epoch": 2.83, "grad_norm": 3.49969482421875, "learning_rate": 3.6200264132582642e-06, "loss": 0.2285, "step": 288075 }, { "epoch": 2.83, "grad_norm": 18.010465621948242, "learning_rate": 3.619902290804016e-06, "loss": 0.1233, "step": 288100 }, { "epoch": 2.83, "grad_norm": 1.5836974382400513, "learning_rate": 3.6197781683497675e-06, "loss": 0.2351, "step": 288125 }, { "epoch": 2.83, "grad_norm": 8.077611923217773, "learning_rate": 3.6196540458955187e-06, "loss": 0.1236, "step": 288150 }, { "epoch": 2.83, "grad_norm": 5.923636436462402, "learning_rate": 3.6195299234412708e-06, "loss": 0.2357, "step": 288175 }, { "epoch": 2.83, "grad_norm": 20.197856903076172, "learning_rate": 3.619405800987022e-06, "loss": 0.1325, "step": 288200 }, { "epoch": 2.83, "grad_norm": 7.04625129699707, "learning_rate": 3.619281678532773e-06, "loss": 0.192, "step": 288225 }, { "epoch": 2.83, "grad_norm": 3.5195565223693848, "learning_rate": 3.6191575560785252e-06, "loss": 0.1153, "step": 288250 }, { "epoch": 2.83, "grad_norm": 2.8423118591308594, "learning_rate": 3.6190334336242765e-06, "loss": 0.2247, "step": 288275 }, { "epoch": 2.83, "grad_norm": 4.247724533081055, "learning_rate": 3.618909311170028e-06, "loss": 0.1267, "step": 288300 }, { "epoch": 2.83, "grad_norm": 3.9646923542022705, "learning_rate": 3.6187851887157797e-06, "loss": 0.1935, "step": 288325 }, { "epoch": 2.84, "grad_norm": 7.3415350914001465, "learning_rate": 3.6186610662615314e-06, "loss": 0.1217, "step": 288350 }, { "epoch": 2.84, "grad_norm": 2.6614246368408203, "learning_rate": 3.6185369438072826e-06, "loss": 0.2053, "step": 288375 }, { "epoch": 2.84, "grad_norm": 8.312679290771484, "learning_rate": 3.6184128213530346e-06, "loss": 0.1169, "step": 288400 }, { "epoch": 2.84, "grad_norm": 4.210657119750977, "learning_rate": 3.618288698898786e-06, "loss": 0.2175, "step": 288425 }, { "epoch": 2.84, "grad_norm": 13.899917602539062, "learning_rate": 3.618164576444537e-06, "loss": 0.1275, "step": 288450 }, { "epoch": 2.84, "grad_norm": 4.489987850189209, "learning_rate": 3.618040453990289e-06, "loss": 0.2063, "step": 288475 }, { "epoch": 2.84, "grad_norm": 4.802126407623291, "learning_rate": 3.6179163315360403e-06, "loss": 0.1349, "step": 288500 }, { "epoch": 2.84, "grad_norm": 5.011629581451416, "learning_rate": 3.617792209081792e-06, "loss": 0.2536, "step": 288525 }, { "epoch": 2.84, "grad_norm": 6.201585292816162, "learning_rate": 3.6176680866275436e-06, "loss": 0.1214, "step": 288550 }, { "epoch": 2.84, "grad_norm": 5.504988670349121, "learning_rate": 3.6175439641732952e-06, "loss": 0.2182, "step": 288575 }, { "epoch": 2.84, "grad_norm": 11.817023277282715, "learning_rate": 3.6174198417190464e-06, "loss": 0.1478, "step": 288600 }, { "epoch": 2.84, "grad_norm": 4.41533899307251, "learning_rate": 3.6172957192647985e-06, "loss": 0.2758, "step": 288625 }, { "epoch": 2.84, "grad_norm": 11.099417686462402, "learning_rate": 3.6171715968105497e-06, "loss": 0.1503, "step": 288650 }, { "epoch": 2.84, "grad_norm": 6.010300159454346, "learning_rate": 3.617047474356301e-06, "loss": 0.2198, "step": 288675 }, { "epoch": 2.84, "grad_norm": 12.129218101501465, "learning_rate": 3.616923351902053e-06, "loss": 0.1371, "step": 288700 }, { "epoch": 2.84, "grad_norm": 4.132564544677734, "learning_rate": 3.616799229447804e-06, "loss": 0.1976, "step": 288725 }, { "epoch": 2.84, "grad_norm": 12.945209503173828, "learning_rate": 3.616675106993556e-06, "loss": 0.1091, "step": 288750 }, { "epoch": 2.84, "grad_norm": 6.793957233428955, "learning_rate": 3.6165509845393075e-06, "loss": 0.1917, "step": 288775 }, { "epoch": 2.84, "grad_norm": 13.560667037963867, "learning_rate": 3.616426862085059e-06, "loss": 0.1377, "step": 288800 }, { "epoch": 2.84, "grad_norm": 7.624295234680176, "learning_rate": 3.6163027396308103e-06, "loss": 0.2085, "step": 288825 }, { "epoch": 2.84, "grad_norm": 9.048306465148926, "learning_rate": 3.6161786171765624e-06, "loss": 0.1042, "step": 288850 }, { "epoch": 2.84, "grad_norm": 2.8369128704071045, "learning_rate": 3.6160544947223136e-06, "loss": 0.2343, "step": 288875 }, { "epoch": 2.84, "grad_norm": 11.099796295166016, "learning_rate": 3.6159303722680648e-06, "loss": 0.1014, "step": 288900 }, { "epoch": 2.84, "grad_norm": 4.0324225425720215, "learning_rate": 3.6158062498138164e-06, "loss": 0.1835, "step": 288925 }, { "epoch": 2.84, "grad_norm": 7.664016246795654, "learning_rate": 3.615682127359568e-06, "loss": 0.1431, "step": 288950 }, { "epoch": 2.84, "grad_norm": 5.243117809295654, "learning_rate": 3.6155580049053197e-06, "loss": 0.2464, "step": 288975 }, { "epoch": 2.84, "grad_norm": 9.307934761047363, "learning_rate": 3.615433882451071e-06, "loss": 0.1254, "step": 289000 }, { "epoch": 2.84, "grad_norm": 6.066481590270996, "learning_rate": 3.615309759996823e-06, "loss": 0.2269, "step": 289025 }, { "epoch": 2.84, "grad_norm": 11.494053840637207, "learning_rate": 3.615185637542574e-06, "loss": 0.1317, "step": 289050 }, { "epoch": 2.84, "grad_norm": 6.453550815582275, "learning_rate": 3.6150615150883254e-06, "loss": 0.1573, "step": 289075 }, { "epoch": 2.84, "grad_norm": 9.02432632446289, "learning_rate": 3.6149373926340774e-06, "loss": 0.166, "step": 289100 }, { "epoch": 2.84, "grad_norm": 3.0236623287200928, "learning_rate": 3.6148132701798287e-06, "loss": 0.2084, "step": 289125 }, { "epoch": 2.84, "grad_norm": 21.756271362304688, "learning_rate": 3.6146891477255803e-06, "loss": 0.1423, "step": 289150 }, { "epoch": 2.84, "grad_norm": 6.348914623260498, "learning_rate": 3.614565025271332e-06, "loss": 0.2443, "step": 289175 }, { "epoch": 2.84, "grad_norm": 8.872563362121582, "learning_rate": 3.6144409028170836e-06, "loss": 0.1327, "step": 289200 }, { "epoch": 2.84, "grad_norm": 1.8613011837005615, "learning_rate": 3.614321745261005e-06, "loss": 0.2104, "step": 289225 }, { "epoch": 2.84, "grad_norm": 7.250047206878662, "learning_rate": 3.6141976228067566e-06, "loss": 0.0983, "step": 289250 }, { "epoch": 2.84, "grad_norm": 6.6710286140441895, "learning_rate": 3.614073500352508e-06, "loss": 0.1995, "step": 289275 }, { "epoch": 2.84, "grad_norm": 17.849958419799805, "learning_rate": 3.61394937789826e-06, "loss": 0.1773, "step": 289300 }, { "epoch": 2.84, "grad_norm": 5.928658962249756, "learning_rate": 3.613825255444011e-06, "loss": 0.2119, "step": 289325 }, { "epoch": 2.84, "grad_norm": 7.194325923919678, "learning_rate": 3.6137011329897628e-06, "loss": 0.1241, "step": 289350 }, { "epoch": 2.85, "grad_norm": 2.5463576316833496, "learning_rate": 3.6135770105355144e-06, "loss": 0.2193, "step": 289375 }, { "epoch": 2.85, "grad_norm": 11.704577445983887, "learning_rate": 3.613452888081266e-06, "loss": 0.1335, "step": 289400 }, { "epoch": 2.85, "grad_norm": 5.421964168548584, "learning_rate": 3.6133287656270172e-06, "loss": 0.1681, "step": 289425 }, { "epoch": 2.85, "grad_norm": 11.001925468444824, "learning_rate": 3.6132046431727693e-06, "loss": 0.1333, "step": 289450 }, { "epoch": 2.85, "grad_norm": 5.591315269470215, "learning_rate": 3.6130805207185205e-06, "loss": 0.2365, "step": 289475 }, { "epoch": 2.85, "grad_norm": 9.810614585876465, "learning_rate": 3.6129563982642717e-06, "loss": 0.1095, "step": 289500 }, { "epoch": 2.85, "grad_norm": 3.648618698120117, "learning_rate": 3.6128322758100234e-06, "loss": 0.2499, "step": 289525 }, { "epoch": 2.85, "grad_norm": 12.307090759277344, "learning_rate": 3.612708153355775e-06, "loss": 0.1336, "step": 289550 }, { "epoch": 2.85, "grad_norm": 2.3242688179016113, "learning_rate": 3.6125840309015266e-06, "loss": 0.2497, "step": 289575 }, { "epoch": 2.85, "grad_norm": 13.57144832611084, "learning_rate": 3.612459908447278e-06, "loss": 0.1413, "step": 289600 }, { "epoch": 2.85, "grad_norm": 15.705476760864258, "learning_rate": 3.61233578599303e-06, "loss": 0.2484, "step": 289625 }, { "epoch": 2.85, "grad_norm": 14.557129859924316, "learning_rate": 3.612211663538781e-06, "loss": 0.1559, "step": 289650 }, { "epoch": 2.85, "grad_norm": 4.844060897827148, "learning_rate": 3.6120875410845323e-06, "loss": 0.1787, "step": 289675 }, { "epoch": 2.85, "grad_norm": 9.212325096130371, "learning_rate": 3.6119634186302844e-06, "loss": 0.1631, "step": 289700 }, { "epoch": 2.85, "grad_norm": 5.119649887084961, "learning_rate": 3.6118392961760356e-06, "loss": 0.2366, "step": 289725 }, { "epoch": 2.85, "grad_norm": 14.080361366271973, "learning_rate": 3.6117151737217872e-06, "loss": 0.1019, "step": 289750 }, { "epoch": 2.85, "grad_norm": 2.107576847076416, "learning_rate": 3.611591051267539e-06, "loss": 0.2035, "step": 289775 }, { "epoch": 2.85, "grad_norm": 13.450063705444336, "learning_rate": 3.6114669288132905e-06, "loss": 0.1108, "step": 289800 }, { "epoch": 2.85, "grad_norm": 4.89363956451416, "learning_rate": 3.6113428063590417e-06, "loss": 0.2153, "step": 289825 }, { "epoch": 2.85, "grad_norm": 10.555927276611328, "learning_rate": 3.6112186839047938e-06, "loss": 0.1031, "step": 289850 }, { "epoch": 2.85, "grad_norm": 11.387234687805176, "learning_rate": 3.611094561450545e-06, "loss": 0.2065, "step": 289875 }, { "epoch": 2.85, "grad_norm": 11.174098014831543, "learning_rate": 3.610970438996296e-06, "loss": 0.1502, "step": 289900 }, { "epoch": 2.85, "grad_norm": 1.7424994707107544, "learning_rate": 3.6108463165420482e-06, "loss": 0.2118, "step": 289925 }, { "epoch": 2.85, "grad_norm": 10.59371280670166, "learning_rate": 3.6107221940877995e-06, "loss": 0.1531, "step": 289950 }, { "epoch": 2.85, "grad_norm": 3.2146360874176025, "learning_rate": 3.610598071633551e-06, "loss": 0.2479, "step": 289975 }, { "epoch": 2.85, "grad_norm": 11.549264907836914, "learning_rate": 3.6104739491793027e-06, "loss": 0.1134, "step": 290000 }, { "epoch": 2.85, "grad_norm": 5.097687244415283, "learning_rate": 3.6103498267250544e-06, "loss": 0.2275, "step": 290025 }, { "epoch": 2.85, "grad_norm": 11.069452285766602, "learning_rate": 3.6102257042708056e-06, "loss": 0.1271, "step": 290050 }, { "epoch": 2.85, "grad_norm": 1.109602451324463, "learning_rate": 3.6101015818165576e-06, "loss": 0.2072, "step": 290075 }, { "epoch": 2.85, "grad_norm": 16.64283561706543, "learning_rate": 3.609977459362309e-06, "loss": 0.1308, "step": 290100 }, { "epoch": 2.85, "grad_norm": 3.6530022621154785, "learning_rate": 3.60985333690806e-06, "loss": 0.1886, "step": 290125 }, { "epoch": 2.85, "grad_norm": 16.21420669555664, "learning_rate": 3.609729214453812e-06, "loss": 0.1256, "step": 290150 }, { "epoch": 2.85, "grad_norm": 0.8301347494125366, "learning_rate": 3.6096050919995633e-06, "loss": 0.2165, "step": 290175 }, { "epoch": 2.85, "grad_norm": 11.712188720703125, "learning_rate": 3.609480969545315e-06, "loss": 0.1287, "step": 290200 }, { "epoch": 2.85, "grad_norm": 6.080759048461914, "learning_rate": 3.6093568470910666e-06, "loss": 0.222, "step": 290225 }, { "epoch": 2.85, "grad_norm": 8.059551239013672, "learning_rate": 3.6092327246368182e-06, "loss": 0.1083, "step": 290250 }, { "epoch": 2.85, "grad_norm": 2.7291359901428223, "learning_rate": 3.6091086021825694e-06, "loss": 0.1621, "step": 290275 }, { "epoch": 2.85, "grad_norm": 5.1754984855651855, "learning_rate": 3.6089844797283215e-06, "loss": 0.1166, "step": 290300 }, { "epoch": 2.85, "grad_norm": 7.993831157684326, "learning_rate": 3.6088603572740727e-06, "loss": 0.2313, "step": 290325 }, { "epoch": 2.85, "grad_norm": 8.186898231506348, "learning_rate": 3.608736234819824e-06, "loss": 0.1227, "step": 290350 }, { "epoch": 2.86, "grad_norm": 4.355130672454834, "learning_rate": 3.6086121123655756e-06, "loss": 0.1847, "step": 290375 }, { "epoch": 2.86, "grad_norm": 7.9089179039001465, "learning_rate": 3.608487989911327e-06, "loss": 0.1444, "step": 290400 }, { "epoch": 2.86, "grad_norm": 9.826467514038086, "learning_rate": 3.608363867457079e-06, "loss": 0.2474, "step": 290425 }, { "epoch": 2.86, "grad_norm": 11.129597663879395, "learning_rate": 3.60823974500283e-06, "loss": 0.1029, "step": 290450 }, { "epoch": 2.86, "grad_norm": 2.793034076690674, "learning_rate": 3.608115622548582e-06, "loss": 0.2136, "step": 290475 }, { "epoch": 2.86, "grad_norm": 9.348326683044434, "learning_rate": 3.6079915000943333e-06, "loss": 0.129, "step": 290500 }, { "epoch": 2.86, "grad_norm": 0.749882161617279, "learning_rate": 3.6078673776400845e-06, "loss": 0.2135, "step": 290525 }, { "epoch": 2.86, "grad_norm": 7.589458465576172, "learning_rate": 3.6077432551858366e-06, "loss": 0.1365, "step": 290550 }, { "epoch": 2.86, "grad_norm": 5.487229347229004, "learning_rate": 3.607619132731588e-06, "loss": 0.2337, "step": 290575 }, { "epoch": 2.86, "grad_norm": 8.986303329467773, "learning_rate": 3.6074950102773394e-06, "loss": 0.1036, "step": 290600 }, { "epoch": 2.86, "grad_norm": 0.6824688911437988, "learning_rate": 3.607370887823091e-06, "loss": 0.2199, "step": 290625 }, { "epoch": 2.86, "grad_norm": 11.777019500732422, "learning_rate": 3.6072467653688427e-06, "loss": 0.1561, "step": 290650 }, { "epoch": 2.86, "grad_norm": 3.2981035709381104, "learning_rate": 3.607122642914594e-06, "loss": 0.2291, "step": 290675 }, { "epoch": 2.86, "grad_norm": 9.618616104125977, "learning_rate": 3.606998520460346e-06, "loss": 0.144, "step": 290700 }, { "epoch": 2.86, "grad_norm": 4.556008815765381, "learning_rate": 3.606874398006097e-06, "loss": 0.2608, "step": 290725 }, { "epoch": 2.86, "grad_norm": 7.319541931152344, "learning_rate": 3.6067502755518484e-06, "loss": 0.1155, "step": 290750 }, { "epoch": 2.86, "grad_norm": 9.347058296203613, "learning_rate": 3.6066261530976004e-06, "loss": 0.254, "step": 290775 }, { "epoch": 2.86, "grad_norm": 15.014399528503418, "learning_rate": 3.6065020306433517e-06, "loss": 0.1715, "step": 290800 }, { "epoch": 2.86, "grad_norm": 5.477941513061523, "learning_rate": 3.6063779081891033e-06, "loss": 0.2493, "step": 290825 }, { "epoch": 2.86, "grad_norm": 9.946210861206055, "learning_rate": 3.606253785734855e-06, "loss": 0.0919, "step": 290850 }, { "epoch": 2.86, "grad_norm": 6.0839667320251465, "learning_rate": 3.6061296632806066e-06, "loss": 0.2119, "step": 290875 }, { "epoch": 2.86, "grad_norm": 7.9984893798828125, "learning_rate": 3.6060055408263578e-06, "loss": 0.1387, "step": 290900 }, { "epoch": 2.86, "grad_norm": 1.0294674634933472, "learning_rate": 3.60588141837211e-06, "loss": 0.2064, "step": 290925 }, { "epoch": 2.86, "grad_norm": 15.389212608337402, "learning_rate": 3.605757295917861e-06, "loss": 0.1409, "step": 290950 }, { "epoch": 2.86, "grad_norm": 3.2734551429748535, "learning_rate": 3.6056331734636123e-06, "loss": 0.2413, "step": 290975 }, { "epoch": 2.86, "grad_norm": 16.319265365600586, "learning_rate": 3.6055090510093643e-06, "loss": 0.1458, "step": 291000 }, { "epoch": 2.86, "grad_norm": 4.663798809051514, "learning_rate": 3.6053849285551155e-06, "loss": 0.1853, "step": 291025 }, { "epoch": 2.86, "grad_norm": 9.936532974243164, "learning_rate": 3.605260806100867e-06, "loss": 0.1548, "step": 291050 }, { "epoch": 2.86, "grad_norm": 5.34428596496582, "learning_rate": 3.605136683646619e-06, "loss": 0.2154, "step": 291075 }, { "epoch": 2.86, "grad_norm": 11.506084442138672, "learning_rate": 3.6050125611923704e-06, "loss": 0.1872, "step": 291100 }, { "epoch": 2.86, "grad_norm": 3.947526693344116, "learning_rate": 3.6048884387381216e-06, "loss": 0.206, "step": 291125 }, { "epoch": 2.86, "grad_norm": 8.278449058532715, "learning_rate": 3.6047643162838737e-06, "loss": 0.1218, "step": 291150 }, { "epoch": 2.86, "grad_norm": 7.141385078430176, "learning_rate": 3.604640193829625e-06, "loss": 0.2555, "step": 291175 }, { "epoch": 2.86, "grad_norm": 8.49079704284668, "learning_rate": 3.604516071375376e-06, "loss": 0.1352, "step": 291200 }, { "epoch": 2.86, "grad_norm": 7.999627113342285, "learning_rate": 3.6043919489211278e-06, "loss": 0.1874, "step": 291225 }, { "epoch": 2.86, "grad_norm": 11.752049446105957, "learning_rate": 3.6042678264668794e-06, "loss": 0.1479, "step": 291250 }, { "epoch": 2.86, "grad_norm": 6.070516109466553, "learning_rate": 3.604143704012631e-06, "loss": 0.1824, "step": 291275 }, { "epoch": 2.86, "grad_norm": 13.483259201049805, "learning_rate": 3.6040195815583822e-06, "loss": 0.1417, "step": 291300 }, { "epoch": 2.86, "grad_norm": 7.765868663787842, "learning_rate": 3.6038954591041343e-06, "loss": 0.2133, "step": 291325 }, { "epoch": 2.86, "grad_norm": 7.6441450119018555, "learning_rate": 3.6037713366498855e-06, "loss": 0.1471, "step": 291350 }, { "epoch": 2.86, "grad_norm": 5.882936477661133, "learning_rate": 3.6036472141956367e-06, "loss": 0.2011, "step": 291375 }, { "epoch": 2.87, "grad_norm": 14.186738967895508, "learning_rate": 3.6035230917413888e-06, "loss": 0.1298, "step": 291400 }, { "epoch": 2.87, "grad_norm": 3.1536524295806885, "learning_rate": 3.60339896928714e-06, "loss": 0.2104, "step": 291425 }, { "epoch": 2.87, "grad_norm": 9.36993408203125, "learning_rate": 3.6032748468328916e-06, "loss": 0.1532, "step": 291450 }, { "epoch": 2.87, "grad_norm": 7.132637977600098, "learning_rate": 3.6031507243786433e-06, "loss": 0.2296, "step": 291475 }, { "epoch": 2.87, "grad_norm": 8.970880508422852, "learning_rate": 3.603026601924395e-06, "loss": 0.1493, "step": 291500 }, { "epoch": 2.87, "grad_norm": 3.794027328491211, "learning_rate": 3.602902479470146e-06, "loss": 0.2174, "step": 291525 }, { "epoch": 2.87, "grad_norm": 9.383960723876953, "learning_rate": 3.602778357015898e-06, "loss": 0.1193, "step": 291550 }, { "epoch": 2.87, "grad_norm": 4.533374786376953, "learning_rate": 3.6026542345616494e-06, "loss": 0.2008, "step": 291575 }, { "epoch": 2.87, "grad_norm": 13.929715156555176, "learning_rate": 3.6025301121074006e-06, "loss": 0.1087, "step": 291600 }, { "epoch": 2.87, "grad_norm": 3.7816810607910156, "learning_rate": 3.6024059896531526e-06, "loss": 0.2058, "step": 291625 }, { "epoch": 2.87, "grad_norm": 16.950672149658203, "learning_rate": 3.602281867198904e-06, "loss": 0.1535, "step": 291650 }, { "epoch": 2.87, "grad_norm": 1.4514811038970947, "learning_rate": 3.6021577447446555e-06, "loss": 0.2039, "step": 291675 }, { "epoch": 2.87, "grad_norm": 14.592690467834473, "learning_rate": 3.602033622290407e-06, "loss": 0.1178, "step": 291700 }, { "epoch": 2.87, "grad_norm": 4.768588066101074, "learning_rate": 3.6019094998361588e-06, "loss": 0.1885, "step": 291725 }, { "epoch": 2.87, "grad_norm": 14.846309661865234, "learning_rate": 3.60178537738191e-06, "loss": 0.1713, "step": 291750 }, { "epoch": 2.87, "grad_norm": 2.891416072845459, "learning_rate": 3.601661254927662e-06, "loss": 0.2398, "step": 291775 }, { "epoch": 2.87, "grad_norm": 4.960709095001221, "learning_rate": 3.6015371324734132e-06, "loss": 0.0992, "step": 291800 }, { "epoch": 2.87, "grad_norm": 6.600253582000732, "learning_rate": 3.6014130100191645e-06, "loss": 0.2066, "step": 291825 }, { "epoch": 2.87, "grad_norm": 13.568591117858887, "learning_rate": 3.6012888875649165e-06, "loss": 0.1197, "step": 291850 }, { "epoch": 2.87, "grad_norm": 3.9338395595550537, "learning_rate": 3.6011647651106677e-06, "loss": 0.1833, "step": 291875 }, { "epoch": 2.87, "grad_norm": 8.7866792678833, "learning_rate": 3.6010406426564194e-06, "loss": 0.1597, "step": 291900 }, { "epoch": 2.87, "grad_norm": 3.7693183422088623, "learning_rate": 3.600916520202171e-06, "loss": 0.2344, "step": 291925 }, { "epoch": 2.87, "grad_norm": 8.062003135681152, "learning_rate": 3.6007923977479226e-06, "loss": 0.1209, "step": 291950 }, { "epoch": 2.87, "grad_norm": 3.3418397903442383, "learning_rate": 3.600668275293674e-06, "loss": 0.226, "step": 291975 }, { "epoch": 2.87, "grad_norm": 13.027206420898438, "learning_rate": 3.600544152839426e-06, "loss": 0.1145, "step": 292000 }, { "epoch": 2.87, "grad_norm": 0.4519762098789215, "learning_rate": 3.600420030385177e-06, "loss": 0.1985, "step": 292025 }, { "epoch": 2.87, "grad_norm": 11.349051475524902, "learning_rate": 3.6002959079309283e-06, "loss": 0.1153, "step": 292050 }, { "epoch": 2.87, "grad_norm": 1.545423984527588, "learning_rate": 3.60017178547668e-06, "loss": 0.2174, "step": 292075 }, { "epoch": 2.87, "grad_norm": 6.8588433265686035, "learning_rate": 3.6000476630224316e-06, "loss": 0.1262, "step": 292100 }, { "epoch": 2.87, "grad_norm": 2.840773582458496, "learning_rate": 3.5999235405681832e-06, "loss": 0.1809, "step": 292125 }, { "epoch": 2.87, "grad_norm": 18.85576057434082, "learning_rate": 3.5997994181139344e-06, "loss": 0.1473, "step": 292150 }, { "epoch": 2.87, "grad_norm": 5.461143970489502, "learning_rate": 3.5996752956596865e-06, "loss": 0.1863, "step": 292175 }, { "epoch": 2.87, "grad_norm": 10.737250328063965, "learning_rate": 3.5995511732054377e-06, "loss": 0.1213, "step": 292200 }, { "epoch": 2.87, "grad_norm": 3.7891924381256104, "learning_rate": 3.5994270507511893e-06, "loss": 0.2376, "step": 292225 }, { "epoch": 2.87, "grad_norm": 13.614801406860352, "learning_rate": 3.599302928296941e-06, "loss": 0.1384, "step": 292250 }, { "epoch": 2.87, "grad_norm": 2.6299118995666504, "learning_rate": 3.5991788058426926e-06, "loss": 0.2208, "step": 292275 }, { "epoch": 2.87, "grad_norm": 10.681861877441406, "learning_rate": 3.599054683388444e-06, "loss": 0.1324, "step": 292300 }, { "epoch": 2.87, "grad_norm": 3.70394229888916, "learning_rate": 3.598930560934196e-06, "loss": 0.1757, "step": 292325 }, { "epoch": 2.87, "grad_norm": 10.390795707702637, "learning_rate": 3.598806438479947e-06, "loss": 0.1237, "step": 292350 }, { "epoch": 2.87, "grad_norm": 3.6719493865966797, "learning_rate": 3.5986823160256983e-06, "loss": 0.1955, "step": 292375 }, { "epoch": 2.87, "grad_norm": 9.779565811157227, "learning_rate": 3.5985581935714504e-06, "loss": 0.0979, "step": 292400 }, { "epoch": 2.88, "grad_norm": 5.196412563323975, "learning_rate": 3.5984340711172016e-06, "loss": 0.1982, "step": 292425 }, { "epoch": 2.88, "grad_norm": 10.558429718017578, "learning_rate": 3.5983099486629532e-06, "loss": 0.125, "step": 292450 }, { "epoch": 2.88, "grad_norm": 2.739316463470459, "learning_rate": 3.598185826208705e-06, "loss": 0.2305, "step": 292475 }, { "epoch": 2.88, "grad_norm": 11.335089683532715, "learning_rate": 3.5980617037544565e-06, "loss": 0.1078, "step": 292500 }, { "epoch": 2.88, "grad_norm": 7.0147552490234375, "learning_rate": 3.5979375813002077e-06, "loss": 0.2815, "step": 292525 }, { "epoch": 2.88, "grad_norm": 12.210226058959961, "learning_rate": 3.5978134588459598e-06, "loss": 0.1075, "step": 292550 }, { "epoch": 2.88, "grad_norm": 3.365553140640259, "learning_rate": 3.597689336391711e-06, "loss": 0.2224, "step": 292575 }, { "epoch": 2.88, "grad_norm": 13.651025772094727, "learning_rate": 3.597565213937462e-06, "loss": 0.1536, "step": 292600 }, { "epoch": 2.88, "grad_norm": 2.565018892288208, "learning_rate": 3.5974410914832142e-06, "loss": 0.2411, "step": 292625 }, { "epoch": 2.88, "grad_norm": 13.489592552185059, "learning_rate": 3.5973169690289654e-06, "loss": 0.1129, "step": 292650 }, { "epoch": 2.88, "grad_norm": 3.8214261531829834, "learning_rate": 3.597192846574717e-06, "loss": 0.2238, "step": 292675 }, { "epoch": 2.88, "grad_norm": 11.496271133422852, "learning_rate": 3.5970687241204687e-06, "loss": 0.1423, "step": 292700 }, { "epoch": 2.88, "grad_norm": 1.457173228263855, "learning_rate": 3.5969446016662204e-06, "loss": 0.2167, "step": 292725 }, { "epoch": 2.88, "grad_norm": 7.057626724243164, "learning_rate": 3.5968204792119716e-06, "loss": 0.1181, "step": 292750 }, { "epoch": 2.88, "grad_norm": 5.536186218261719, "learning_rate": 3.5966963567577236e-06, "loss": 0.1971, "step": 292775 }, { "epoch": 2.88, "grad_norm": 5.404308319091797, "learning_rate": 3.596572234303475e-06, "loss": 0.105, "step": 292800 }, { "epoch": 2.88, "grad_norm": 6.962239742279053, "learning_rate": 3.596448111849226e-06, "loss": 0.2026, "step": 292825 }, { "epoch": 2.88, "grad_norm": 9.981135368347168, "learning_rate": 3.596323989394978e-06, "loss": 0.1408, "step": 292850 }, { "epoch": 2.88, "grad_norm": 4.385182857513428, "learning_rate": 3.5961998669407293e-06, "loss": 0.1887, "step": 292875 }, { "epoch": 2.88, "grad_norm": 10.633809089660645, "learning_rate": 3.596075744486481e-06, "loss": 0.1113, "step": 292900 }, { "epoch": 2.88, "grad_norm": 21.571901321411133, "learning_rate": 3.595951622032232e-06, "loss": 0.2768, "step": 292925 }, { "epoch": 2.88, "grad_norm": 14.076458930969238, "learning_rate": 3.5958274995779842e-06, "loss": 0.1266, "step": 292950 }, { "epoch": 2.88, "grad_norm": 4.692000389099121, "learning_rate": 3.5957033771237354e-06, "loss": 0.2085, "step": 292975 }, { "epoch": 2.88, "grad_norm": 18.159589767456055, "learning_rate": 3.5955792546694866e-06, "loss": 0.1262, "step": 293000 }, { "epoch": 2.88, "grad_norm": 4.949026584625244, "learning_rate": 3.5954551322152387e-06, "loss": 0.1828, "step": 293025 }, { "epoch": 2.88, "grad_norm": 17.54684066772461, "learning_rate": 3.59533100976099e-06, "loss": 0.1492, "step": 293050 }, { "epoch": 2.88, "grad_norm": 6.043826103210449, "learning_rate": 3.5952068873067415e-06, "loss": 0.2254, "step": 293075 }, { "epoch": 2.88, "grad_norm": 9.33897876739502, "learning_rate": 3.595082764852493e-06, "loss": 0.1373, "step": 293100 }, { "epoch": 2.88, "grad_norm": 3.7964699268341064, "learning_rate": 3.594958642398245e-06, "loss": 0.2514, "step": 293125 }, { "epoch": 2.88, "grad_norm": 14.730362892150879, "learning_rate": 3.594834519943996e-06, "loss": 0.1327, "step": 293150 }, { "epoch": 2.88, "grad_norm": 4.669290065765381, "learning_rate": 3.594710397489748e-06, "loss": 0.2271, "step": 293175 }, { "epoch": 2.88, "grad_norm": 7.945171356201172, "learning_rate": 3.5945862750354993e-06, "loss": 0.0973, "step": 293200 }, { "epoch": 2.88, "grad_norm": 8.971599578857422, "learning_rate": 3.5944621525812505e-06, "loss": 0.2357, "step": 293225 }, { "epoch": 2.88, "grad_norm": 22.230533599853516, "learning_rate": 3.5943380301270026e-06, "loss": 0.1353, "step": 293250 }, { "epoch": 2.88, "grad_norm": 1.570678949356079, "learning_rate": 3.5942139076727538e-06, "loss": 0.1889, "step": 293275 }, { "epoch": 2.88, "grad_norm": 16.9199275970459, "learning_rate": 3.5940897852185054e-06, "loss": 0.1469, "step": 293300 }, { "epoch": 2.88, "grad_norm": 5.932873725891113, "learning_rate": 3.593965662764257e-06, "loss": 0.226, "step": 293325 }, { "epoch": 2.88, "grad_norm": 11.803126335144043, "learning_rate": 3.5938415403100087e-06, "loss": 0.1172, "step": 293350 }, { "epoch": 2.88, "grad_norm": 7.152740001678467, "learning_rate": 3.59372238275393e-06, "loss": 0.2201, "step": 293375 }, { "epoch": 2.88, "grad_norm": 13.353249549865723, "learning_rate": 3.5935982602996818e-06, "loss": 0.1229, "step": 293400 }, { "epoch": 2.89, "grad_norm": 4.624063014984131, "learning_rate": 3.593474137845433e-06, "loss": 0.1938, "step": 293425 }, { "epoch": 2.89, "grad_norm": 14.918014526367188, "learning_rate": 3.593350015391185e-06, "loss": 0.1251, "step": 293450 }, { "epoch": 2.89, "grad_norm": 4.656639575958252, "learning_rate": 3.5932258929369363e-06, "loss": 0.2173, "step": 293475 }, { "epoch": 2.89, "grad_norm": 9.150635719299316, "learning_rate": 3.5931017704826875e-06, "loss": 0.1233, "step": 293500 }, { "epoch": 2.89, "grad_norm": 0.11452677100896835, "learning_rate": 3.592977648028439e-06, "loss": 0.2095, "step": 293525 }, { "epoch": 2.89, "grad_norm": 10.093259811401367, "learning_rate": 3.5928535255741907e-06, "loss": 0.1248, "step": 293550 }, { "epoch": 2.89, "grad_norm": 6.880904674530029, "learning_rate": 3.5927294031199424e-06, "loss": 0.2273, "step": 293575 }, { "epoch": 2.89, "grad_norm": 9.995026588439941, "learning_rate": 3.5926052806656936e-06, "loss": 0.1415, "step": 293600 }, { "epoch": 2.89, "grad_norm": 4.421883583068848, "learning_rate": 3.5924811582114456e-06, "loss": 0.229, "step": 293625 }, { "epoch": 2.89, "grad_norm": 9.465027809143066, "learning_rate": 3.592357035757197e-06, "loss": 0.1277, "step": 293650 }, { "epoch": 2.89, "grad_norm": 1.9968582391738892, "learning_rate": 3.592232913302948e-06, "loss": 0.2165, "step": 293675 }, { "epoch": 2.89, "grad_norm": 7.93750524520874, "learning_rate": 3.5921087908487e-06, "loss": 0.1359, "step": 293700 }, { "epoch": 2.89, "grad_norm": 8.13579273223877, "learning_rate": 3.5919846683944513e-06, "loss": 0.2212, "step": 293725 }, { "epoch": 2.89, "grad_norm": 13.093798637390137, "learning_rate": 3.591860545940203e-06, "loss": 0.1568, "step": 293750 }, { "epoch": 2.89, "grad_norm": 3.0540146827697754, "learning_rate": 3.5917364234859546e-06, "loss": 0.1832, "step": 293775 }, { "epoch": 2.89, "grad_norm": 8.011446952819824, "learning_rate": 3.5916123010317062e-06, "loss": 0.1603, "step": 293800 }, { "epoch": 2.89, "grad_norm": 4.692367076873779, "learning_rate": 3.5914881785774574e-06, "loss": 0.1987, "step": 293825 }, { "epoch": 2.89, "grad_norm": 18.16659164428711, "learning_rate": 3.5913640561232095e-06, "loss": 0.121, "step": 293850 }, { "epoch": 2.89, "grad_norm": 3.8832595348358154, "learning_rate": 3.5912399336689607e-06, "loss": 0.1856, "step": 293875 }, { "epoch": 2.89, "grad_norm": 11.89468002319336, "learning_rate": 3.591115811214712e-06, "loss": 0.1142, "step": 293900 }, { "epoch": 2.89, "grad_norm": 4.807695388793945, "learning_rate": 3.590991688760464e-06, "loss": 0.2331, "step": 293925 }, { "epoch": 2.89, "grad_norm": 14.342996597290039, "learning_rate": 3.590867566306215e-06, "loss": 0.1485, "step": 293950 }, { "epoch": 2.89, "grad_norm": 1.4516443014144897, "learning_rate": 3.590743443851967e-06, "loss": 0.2106, "step": 293975 }, { "epoch": 2.89, "grad_norm": 16.319114685058594, "learning_rate": 3.5906193213977185e-06, "loss": 0.1086, "step": 294000 }, { "epoch": 2.89, "grad_norm": 5.265130996704102, "learning_rate": 3.59049519894347e-06, "loss": 0.24, "step": 294025 }, { "epoch": 2.89, "grad_norm": 8.736527442932129, "learning_rate": 3.5903710764892213e-06, "loss": 0.1034, "step": 294050 }, { "epoch": 2.89, "grad_norm": 5.934265613555908, "learning_rate": 3.5902469540349734e-06, "loss": 0.196, "step": 294075 }, { "epoch": 2.89, "grad_norm": 11.83897590637207, "learning_rate": 3.5901228315807246e-06, "loss": 0.1382, "step": 294100 }, { "epoch": 2.89, "grad_norm": 3.992810010910034, "learning_rate": 3.589998709126476e-06, "loss": 0.2362, "step": 294125 }, { "epoch": 2.89, "grad_norm": 13.621664047241211, "learning_rate": 3.589874586672228e-06, "loss": 0.1383, "step": 294150 }, { "epoch": 2.89, "grad_norm": 7.565913200378418, "learning_rate": 3.589750464217979e-06, "loss": 0.162, "step": 294175 }, { "epoch": 2.89, "grad_norm": 13.22974967956543, "learning_rate": 3.5896263417637307e-06, "loss": 0.1524, "step": 294200 }, { "epoch": 2.89, "grad_norm": 5.16132926940918, "learning_rate": 3.5895022193094823e-06, "loss": 0.2321, "step": 294225 }, { "epoch": 2.89, "grad_norm": 7.810280799865723, "learning_rate": 3.589378096855234e-06, "loss": 0.1191, "step": 294250 }, { "epoch": 2.89, "grad_norm": 8.454130172729492, "learning_rate": 3.589253974400985e-06, "loss": 0.194, "step": 294275 }, { "epoch": 2.89, "grad_norm": 14.41861343383789, "learning_rate": 3.5891298519467372e-06, "loss": 0.1607, "step": 294300 }, { "epoch": 2.89, "grad_norm": 2.222928047180176, "learning_rate": 3.5890057294924885e-06, "loss": 0.2873, "step": 294325 }, { "epoch": 2.89, "grad_norm": 9.257866859436035, "learning_rate": 3.5888816070382397e-06, "loss": 0.131, "step": 294350 }, { "epoch": 2.89, "grad_norm": 9.035439491271973, "learning_rate": 3.5887574845839913e-06, "loss": 0.2185, "step": 294375 }, { "epoch": 2.89, "grad_norm": 7.513270854949951, "learning_rate": 3.588633362129743e-06, "loss": 0.1579, "step": 294400 }, { "epoch": 2.89, "grad_norm": 4.344086647033691, "learning_rate": 3.5885092396754946e-06, "loss": 0.2074, "step": 294425 }, { "epoch": 2.9, "grad_norm": 8.438087463378906, "learning_rate": 3.5883851172212458e-06, "loss": 0.1294, "step": 294450 }, { "epoch": 2.9, "grad_norm": 3.286947250366211, "learning_rate": 3.588260994766998e-06, "loss": 0.2351, "step": 294475 }, { "epoch": 2.9, "grad_norm": 10.052300453186035, "learning_rate": 3.588136872312749e-06, "loss": 0.1494, "step": 294500 }, { "epoch": 2.9, "grad_norm": 1.9610586166381836, "learning_rate": 3.5880127498585003e-06, "loss": 0.2728, "step": 294525 }, { "epoch": 2.9, "grad_norm": 16.87757682800293, "learning_rate": 3.5878886274042523e-06, "loss": 0.1507, "step": 294550 }, { "epoch": 2.9, "grad_norm": 5.851096153259277, "learning_rate": 3.5877645049500035e-06, "loss": 0.2005, "step": 294575 }, { "epoch": 2.9, "grad_norm": 14.745073318481445, "learning_rate": 3.587640382495755e-06, "loss": 0.0892, "step": 294600 }, { "epoch": 2.9, "grad_norm": 4.656767845153809, "learning_rate": 3.587516260041507e-06, "loss": 0.2134, "step": 294625 }, { "epoch": 2.9, "grad_norm": 13.4016695022583, "learning_rate": 3.5873921375872584e-06, "loss": 0.1613, "step": 294650 }, { "epoch": 2.9, "grad_norm": 2.689875841140747, "learning_rate": 3.5872680151330096e-06, "loss": 0.1902, "step": 294675 }, { "epoch": 2.9, "grad_norm": 8.361530303955078, "learning_rate": 3.5871438926787617e-06, "loss": 0.1255, "step": 294700 }, { "epoch": 2.9, "grad_norm": 7.2229204177856445, "learning_rate": 3.587019770224513e-06, "loss": 0.2119, "step": 294725 }, { "epoch": 2.9, "grad_norm": 12.006449699401855, "learning_rate": 3.586895647770264e-06, "loss": 0.1162, "step": 294750 }, { "epoch": 2.9, "grad_norm": 5.140273094177246, "learning_rate": 3.586771525316016e-06, "loss": 0.2573, "step": 294775 }, { "epoch": 2.9, "grad_norm": 13.272305488586426, "learning_rate": 3.5866474028617674e-06, "loss": 0.1458, "step": 294800 }, { "epoch": 2.9, "grad_norm": 5.152512073516846, "learning_rate": 3.586523280407519e-06, "loss": 0.291, "step": 294825 }, { "epoch": 2.9, "grad_norm": 16.30690574645996, "learning_rate": 3.5863991579532707e-06, "loss": 0.1617, "step": 294850 }, { "epoch": 2.9, "grad_norm": 5.250563144683838, "learning_rate": 3.5862750354990223e-06, "loss": 0.2409, "step": 294875 }, { "epoch": 2.9, "grad_norm": 9.453328132629395, "learning_rate": 3.5861509130447735e-06, "loss": 0.1218, "step": 294900 }, { "epoch": 2.9, "grad_norm": 5.0639519691467285, "learning_rate": 3.5860267905905256e-06, "loss": 0.2483, "step": 294925 }, { "epoch": 2.9, "grad_norm": 22.907920837402344, "learning_rate": 3.5859026681362768e-06, "loss": 0.183, "step": 294950 }, { "epoch": 2.9, "grad_norm": 5.281909942626953, "learning_rate": 3.5857785456820284e-06, "loss": 0.2344, "step": 294975 }, { "epoch": 2.9, "grad_norm": 7.64931058883667, "learning_rate": 3.58565442322778e-06, "loss": 0.1217, "step": 295000 }, { "epoch": 2.9, "grad_norm": 6.610376834869385, "learning_rate": 3.5855303007735317e-06, "loss": 0.2516, "step": 295025 }, { "epoch": 2.9, "grad_norm": 13.414438247680664, "learning_rate": 3.585406178319283e-06, "loss": 0.1385, "step": 295050 }, { "epoch": 2.9, "grad_norm": 5.641408920288086, "learning_rate": 3.5852820558650345e-06, "loss": 0.2211, "step": 295075 }, { "epoch": 2.9, "grad_norm": 7.331193923950195, "learning_rate": 3.585157933410786e-06, "loss": 0.1427, "step": 295100 }, { "epoch": 2.9, "grad_norm": 1.6983145475387573, "learning_rate": 3.5850338109565374e-06, "loss": 0.1993, "step": 295125 }, { "epoch": 2.9, "grad_norm": 8.726546287536621, "learning_rate": 3.5849096885022894e-06, "loss": 0.137, "step": 295150 }, { "epoch": 2.9, "grad_norm": 7.256397247314453, "learning_rate": 3.5847855660480407e-06, "loss": 0.2542, "step": 295175 }, { "epoch": 2.9, "grad_norm": 9.552606582641602, "learning_rate": 3.5846614435937923e-06, "loss": 0.1285, "step": 295200 }, { "epoch": 2.9, "grad_norm": 3.5720396041870117, "learning_rate": 3.5845373211395435e-06, "loss": 0.1943, "step": 295225 }, { "epoch": 2.9, "grad_norm": 9.351703643798828, "learning_rate": 3.5844131986852956e-06, "loss": 0.1592, "step": 295250 }, { "epoch": 2.9, "grad_norm": 82.19940948486328, "learning_rate": 3.5842890762310468e-06, "loss": 0.2371, "step": 295275 }, { "epoch": 2.9, "grad_norm": 15.177570343017578, "learning_rate": 3.584164953776798e-06, "loss": 0.1209, "step": 295300 }, { "epoch": 2.9, "grad_norm": 2.140026807785034, "learning_rate": 3.58404083132255e-06, "loss": 0.2011, "step": 295325 }, { "epoch": 2.9, "grad_norm": 10.915820121765137, "learning_rate": 3.5839167088683013e-06, "loss": 0.112, "step": 295350 }, { "epoch": 2.9, "grad_norm": 4.479066848754883, "learning_rate": 3.583792586414053e-06, "loss": 0.1867, "step": 295375 }, { "epoch": 2.9, "grad_norm": 13.76817798614502, "learning_rate": 3.5836684639598045e-06, "loss": 0.1283, "step": 295400 }, { "epoch": 2.9, "grad_norm": 5.382487773895264, "learning_rate": 3.583544341505556e-06, "loss": 0.1656, "step": 295425 }, { "epoch": 2.9, "grad_norm": 5.998510837554932, "learning_rate": 3.5834202190513074e-06, "loss": 0.1186, "step": 295450 }, { "epoch": 2.91, "grad_norm": 5.151294708251953, "learning_rate": 3.5832960965970594e-06, "loss": 0.2118, "step": 295475 }, { "epoch": 2.91, "grad_norm": 9.607588768005371, "learning_rate": 3.5831719741428106e-06, "loss": 0.1353, "step": 295500 }, { "epoch": 2.91, "grad_norm": 3.5573699474334717, "learning_rate": 3.583047851688562e-06, "loss": 0.2268, "step": 295525 }, { "epoch": 2.91, "grad_norm": 12.765803337097168, "learning_rate": 3.582923729234314e-06, "loss": 0.1371, "step": 295550 }, { "epoch": 2.91, "grad_norm": 2.561178207397461, "learning_rate": 3.582799606780065e-06, "loss": 0.1835, "step": 295575 }, { "epoch": 2.91, "grad_norm": 15.433141708374023, "learning_rate": 3.5826754843258168e-06, "loss": 0.1618, "step": 295600 }, { "epoch": 2.91, "grad_norm": 3.553819417953491, "learning_rate": 3.5825513618715684e-06, "loss": 0.2039, "step": 295625 }, { "epoch": 2.91, "grad_norm": 16.83868980407715, "learning_rate": 3.58242723941732e-06, "loss": 0.1511, "step": 295650 }, { "epoch": 2.91, "grad_norm": 5.792967319488525, "learning_rate": 3.5823080818612415e-06, "loss": 0.2251, "step": 295675 }, { "epoch": 2.91, "grad_norm": 7.547850608825684, "learning_rate": 3.582183959406993e-06, "loss": 0.1189, "step": 295700 }, { "epoch": 2.91, "grad_norm": 7.418353080749512, "learning_rate": 3.5820598369527443e-06, "loss": 0.27, "step": 295725 }, { "epoch": 2.91, "grad_norm": 19.463449478149414, "learning_rate": 3.5819357144984964e-06, "loss": 0.1426, "step": 295750 }, { "epoch": 2.91, "grad_norm": 8.947883605957031, "learning_rate": 3.5818115920442476e-06, "loss": 0.2096, "step": 295775 }, { "epoch": 2.91, "grad_norm": 11.23104190826416, "learning_rate": 3.581687469589999e-06, "loss": 0.1047, "step": 295800 }, { "epoch": 2.91, "grad_norm": 5.469025611877441, "learning_rate": 3.5815633471357504e-06, "loss": 0.2438, "step": 295825 }, { "epoch": 2.91, "grad_norm": 8.410758018493652, "learning_rate": 3.581439224681502e-06, "loss": 0.1279, "step": 295850 }, { "epoch": 2.91, "grad_norm": 10.101603507995605, "learning_rate": 3.5813151022272537e-06, "loss": 0.2544, "step": 295875 }, { "epoch": 2.91, "grad_norm": 7.83056116104126, "learning_rate": 3.581190979773005e-06, "loss": 0.1163, "step": 295900 }, { "epoch": 2.91, "grad_norm": 3.9644482135772705, "learning_rate": 3.581066857318757e-06, "loss": 0.2219, "step": 295925 }, { "epoch": 2.91, "grad_norm": 15.151657104492188, "learning_rate": 3.580942734864508e-06, "loss": 0.147, "step": 295950 }, { "epoch": 2.91, "grad_norm": 9.958739280700684, "learning_rate": 3.5808186124102594e-06, "loss": 0.2361, "step": 295975 }, { "epoch": 2.91, "grad_norm": 14.51413631439209, "learning_rate": 3.5806994548541813e-06, "loss": 0.1616, "step": 296000 }, { "epoch": 2.91, "grad_norm": 3.0897159576416016, "learning_rate": 3.5805753323999325e-06, "loss": 0.1992, "step": 296025 }, { "epoch": 2.91, "grad_norm": 12.201966285705566, "learning_rate": 3.5804512099456845e-06, "loss": 0.1, "step": 296050 }, { "epoch": 2.91, "grad_norm": 7.112059593200684, "learning_rate": 3.5803270874914358e-06, "loss": 0.2038, "step": 296075 }, { "epoch": 2.91, "grad_norm": 7.532193660736084, "learning_rate": 3.5802029650371874e-06, "loss": 0.0972, "step": 296100 }, { "epoch": 2.91, "grad_norm": 2.667475700378418, "learning_rate": 3.580078842582939e-06, "loss": 0.2536, "step": 296125 }, { "epoch": 2.91, "grad_norm": 7.054937839508057, "learning_rate": 3.5799547201286907e-06, "loss": 0.1475, "step": 296150 }, { "epoch": 2.91, "grad_norm": 4.010137557983398, "learning_rate": 3.579830597674442e-06, "loss": 0.2271, "step": 296175 }, { "epoch": 2.91, "grad_norm": 7.967220306396484, "learning_rate": 3.579706475220194e-06, "loss": 0.1517, "step": 296200 }, { "epoch": 2.91, "grad_norm": 2.9415194988250732, "learning_rate": 3.579582352765945e-06, "loss": 0.2092, "step": 296225 }, { "epoch": 2.91, "grad_norm": 27.53351593017578, "learning_rate": 3.5794582303116964e-06, "loss": 0.1397, "step": 296250 }, { "epoch": 2.91, "grad_norm": 0.6733613014221191, "learning_rate": 3.5793341078574484e-06, "loss": 0.193, "step": 296275 }, { "epoch": 2.91, "grad_norm": 11.710921287536621, "learning_rate": 3.5792099854031996e-06, "loss": 0.1344, "step": 296300 }, { "epoch": 2.91, "grad_norm": 3.4698896408081055, "learning_rate": 3.5790858629489513e-06, "loss": 0.2103, "step": 296325 }, { "epoch": 2.91, "grad_norm": 13.860591888427734, "learning_rate": 3.578961740494703e-06, "loss": 0.1592, "step": 296350 }, { "epoch": 2.91, "grad_norm": 6.950976371765137, "learning_rate": 3.5788376180404545e-06, "loss": 0.21, "step": 296375 }, { "epoch": 2.91, "grad_norm": 11.771740913391113, "learning_rate": 3.5787134955862057e-06, "loss": 0.1446, "step": 296400 }, { "epoch": 2.91, "grad_norm": 6.459238052368164, "learning_rate": 3.578589373131957e-06, "loss": 0.2251, "step": 296425 }, { "epoch": 2.91, "grad_norm": 14.265172958374023, "learning_rate": 3.578465250677709e-06, "loss": 0.1439, "step": 296450 }, { "epoch": 2.91, "grad_norm": 3.391082525253296, "learning_rate": 3.5783411282234602e-06, "loss": 0.255, "step": 296475 }, { "epoch": 2.92, "grad_norm": 9.008694648742676, "learning_rate": 3.578217005769212e-06, "loss": 0.1392, "step": 296500 }, { "epoch": 2.92, "grad_norm": 3.320965528488159, "learning_rate": 3.5780928833149635e-06, "loss": 0.2042, "step": 296525 }, { "epoch": 2.92, "grad_norm": 14.949715614318848, "learning_rate": 3.577968760860715e-06, "loss": 0.1358, "step": 296550 }, { "epoch": 2.92, "grad_norm": 5.922667980194092, "learning_rate": 3.5778446384064663e-06, "loss": 0.2253, "step": 296575 }, { "epoch": 2.92, "grad_norm": 9.706759452819824, "learning_rate": 3.5777205159522184e-06, "loss": 0.1145, "step": 296600 }, { "epoch": 2.92, "grad_norm": 3.5778229236602783, "learning_rate": 3.5775963934979696e-06, "loss": 0.1554, "step": 296625 }, { "epoch": 2.92, "grad_norm": 8.025506019592285, "learning_rate": 3.577472271043721e-06, "loss": 0.1022, "step": 296650 }, { "epoch": 2.92, "grad_norm": 4.148568153381348, "learning_rate": 3.577348148589473e-06, "loss": 0.2213, "step": 296675 }, { "epoch": 2.92, "grad_norm": 9.448431968688965, "learning_rate": 3.577224026135224e-06, "loss": 0.104, "step": 296700 }, { "epoch": 2.92, "grad_norm": 4.9987945556640625, "learning_rate": 3.5770999036809757e-06, "loss": 0.1879, "step": 296725 }, { "epoch": 2.92, "grad_norm": 6.5750651359558105, "learning_rate": 3.5769757812267274e-06, "loss": 0.1141, "step": 296750 }, { "epoch": 2.92, "grad_norm": 2.334740161895752, "learning_rate": 3.576851658772479e-06, "loss": 0.1813, "step": 296775 }, { "epoch": 2.92, "grad_norm": 21.777910232543945, "learning_rate": 3.57672753631823e-06, "loss": 0.1072, "step": 296800 }, { "epoch": 2.92, "grad_norm": 11.263596534729004, "learning_rate": 3.5766034138639823e-06, "loss": 0.1739, "step": 296825 }, { "epoch": 2.92, "grad_norm": 6.5709452629089355, "learning_rate": 3.5764792914097335e-06, "loss": 0.1027, "step": 296850 }, { "epoch": 2.92, "grad_norm": 6.095920562744141, "learning_rate": 3.5763551689554847e-06, "loss": 0.2483, "step": 296875 }, { "epoch": 2.92, "grad_norm": 10.326669692993164, "learning_rate": 3.5762310465012367e-06, "loss": 0.0832, "step": 296900 }, { "epoch": 2.92, "grad_norm": 8.178689956665039, "learning_rate": 3.576106924046988e-06, "loss": 0.2273, "step": 296925 }, { "epoch": 2.92, "grad_norm": 19.78350067138672, "learning_rate": 3.5759828015927396e-06, "loss": 0.1478, "step": 296950 }, { "epoch": 2.92, "grad_norm": 7.197412490844727, "learning_rate": 3.5758586791384912e-06, "loss": 0.1965, "step": 296975 }, { "epoch": 2.92, "grad_norm": 11.812007904052734, "learning_rate": 3.575734556684243e-06, "loss": 0.1617, "step": 297000 }, { "epoch": 2.92, "grad_norm": 6.144908905029297, "learning_rate": 3.575610434229994e-06, "loss": 0.213, "step": 297025 }, { "epoch": 2.92, "grad_norm": 51.22657775878906, "learning_rate": 3.575486311775746e-06, "loss": 0.1623, "step": 297050 }, { "epoch": 2.92, "grad_norm": 3.600449562072754, "learning_rate": 3.5753621893214973e-06, "loss": 0.1878, "step": 297075 }, { "epoch": 2.92, "grad_norm": 12.731160163879395, "learning_rate": 3.5752380668672486e-06, "loss": 0.1039, "step": 297100 }, { "epoch": 2.92, "grad_norm": 1.565409779548645, "learning_rate": 3.5751139444130006e-06, "loss": 0.1955, "step": 297125 }, { "epoch": 2.92, "grad_norm": 19.3823299407959, "learning_rate": 3.574989821958752e-06, "loss": 0.1273, "step": 297150 }, { "epoch": 2.92, "grad_norm": 21.45520782470703, "learning_rate": 3.5748656995045035e-06, "loss": 0.2096, "step": 297175 }, { "epoch": 2.92, "grad_norm": 15.325079917907715, "learning_rate": 3.574741577050255e-06, "loss": 0.1296, "step": 297200 }, { "epoch": 2.92, "grad_norm": 0.3309619724750519, "learning_rate": 3.5746174545960067e-06, "loss": 0.2133, "step": 297225 }, { "epoch": 2.92, "grad_norm": 6.5791120529174805, "learning_rate": 3.574493332141758e-06, "loss": 0.1568, "step": 297250 }, { "epoch": 2.92, "grad_norm": 3.5236380100250244, "learning_rate": 3.574369209687509e-06, "loss": 0.2799, "step": 297275 }, { "epoch": 2.92, "grad_norm": 15.870041847229004, "learning_rate": 3.5742450872332612e-06, "loss": 0.1314, "step": 297300 }, { "epoch": 2.92, "grad_norm": 7.482975482940674, "learning_rate": 3.5741209647790124e-06, "loss": 0.24, "step": 297325 }, { "epoch": 2.92, "grad_norm": 12.648252487182617, "learning_rate": 3.573996842324764e-06, "loss": 0.1425, "step": 297350 }, { "epoch": 2.92, "grad_norm": 2.2705421447753906, "learning_rate": 3.5738727198705157e-06, "loss": 0.2243, "step": 297375 }, { "epoch": 2.92, "grad_norm": 11.75425910949707, "learning_rate": 3.5737485974162673e-06, "loss": 0.1186, "step": 297400 }, { "epoch": 2.92, "grad_norm": 6.622165203094482, "learning_rate": 3.5736244749620185e-06, "loss": 0.1889, "step": 297425 }, { "epoch": 2.92, "grad_norm": 13.434286117553711, "learning_rate": 3.5735003525077706e-06, "loss": 0.1559, "step": 297450 }, { "epoch": 2.92, "grad_norm": 6.982113361358643, "learning_rate": 3.573376230053522e-06, "loss": 0.2436, "step": 297475 }, { "epoch": 2.93, "grad_norm": 9.004461288452148, "learning_rate": 3.573252107599273e-06, "loss": 0.1167, "step": 297500 }, { "epoch": 2.93, "grad_norm": 5.447750091552734, "learning_rate": 3.573127985145025e-06, "loss": 0.261, "step": 297525 }, { "epoch": 2.93, "grad_norm": 8.37009334564209, "learning_rate": 3.5730038626907763e-06, "loss": 0.1035, "step": 297550 }, { "epoch": 2.93, "grad_norm": 4.1847734451293945, "learning_rate": 3.572879740236528e-06, "loss": 0.2534, "step": 297575 }, { "epoch": 2.93, "grad_norm": 11.00545597076416, "learning_rate": 3.5727556177822796e-06, "loss": 0.1479, "step": 297600 }, { "epoch": 2.93, "grad_norm": 5.287083148956299, "learning_rate": 3.572631495328031e-06, "loss": 0.2139, "step": 297625 }, { "epoch": 2.93, "grad_norm": 16.87278938293457, "learning_rate": 3.5725073728737824e-06, "loss": 0.1553, "step": 297650 }, { "epoch": 2.93, "grad_norm": 2.5272934436798096, "learning_rate": 3.5723832504195345e-06, "loss": 0.2147, "step": 297675 }, { "epoch": 2.93, "grad_norm": 7.701019763946533, "learning_rate": 3.5722591279652857e-06, "loss": 0.1345, "step": 297700 }, { "epoch": 2.93, "grad_norm": 2.7308766841888428, "learning_rate": 3.572135005511037e-06, "loss": 0.2453, "step": 297725 }, { "epoch": 2.93, "grad_norm": 8.357453346252441, "learning_rate": 3.572010883056789e-06, "loss": 0.158, "step": 297750 }, { "epoch": 2.93, "grad_norm": 6.55819845199585, "learning_rate": 3.57188676060254e-06, "loss": 0.2149, "step": 297775 }, { "epoch": 2.93, "grad_norm": 10.278514862060547, "learning_rate": 3.571762638148292e-06, "loss": 0.1548, "step": 297800 }, { "epoch": 2.93, "grad_norm": 6.667409420013428, "learning_rate": 3.5716385156940434e-06, "loss": 0.1996, "step": 297825 }, { "epoch": 2.93, "grad_norm": 5.878843307495117, "learning_rate": 3.571514393239795e-06, "loss": 0.1321, "step": 297850 }, { "epoch": 2.93, "grad_norm": 4.8481974601745605, "learning_rate": 3.5713902707855463e-06, "loss": 0.2161, "step": 297875 }, { "epoch": 2.93, "grad_norm": 8.783474922180176, "learning_rate": 3.5712661483312983e-06, "loss": 0.1012, "step": 297900 }, { "epoch": 2.93, "grad_norm": 5.144486427307129, "learning_rate": 3.5711420258770495e-06, "loss": 0.2398, "step": 297925 }, { "epoch": 2.93, "grad_norm": 14.188947677612305, "learning_rate": 3.5710179034228008e-06, "loss": 0.1502, "step": 297950 }, { "epoch": 2.93, "grad_norm": 3.4981019496917725, "learning_rate": 3.570893780968553e-06, "loss": 0.2025, "step": 297975 }, { "epoch": 2.93, "grad_norm": 10.81646728515625, "learning_rate": 3.570769658514304e-06, "loss": 0.1427, "step": 298000 }, { "epoch": 2.93, "grad_norm": 11.346441268920898, "learning_rate": 3.5706455360600557e-06, "loss": 0.248, "step": 298025 }, { "epoch": 2.93, "grad_norm": 11.891913414001465, "learning_rate": 3.5705214136058073e-06, "loss": 0.1333, "step": 298050 }, { "epoch": 2.93, "grad_norm": 4.731064796447754, "learning_rate": 3.570397291151559e-06, "loss": 0.2259, "step": 298075 }, { "epoch": 2.93, "grad_norm": 9.86761474609375, "learning_rate": 3.57027316869731e-06, "loss": 0.1091, "step": 298100 }, { "epoch": 2.93, "grad_norm": 6.277988433837891, "learning_rate": 3.5701490462430618e-06, "loss": 0.1955, "step": 298125 }, { "epoch": 2.93, "grad_norm": 11.201818466186523, "learning_rate": 3.5700249237888134e-06, "loss": 0.1707, "step": 298150 }, { "epoch": 2.93, "grad_norm": 4.347602844238281, "learning_rate": 3.569900801334565e-06, "loss": 0.2538, "step": 298175 }, { "epoch": 2.93, "grad_norm": 15.26441478729248, "learning_rate": 3.5697766788803163e-06, "loss": 0.1357, "step": 298200 }, { "epoch": 2.93, "grad_norm": 5.6497273445129395, "learning_rate": 3.5696525564260683e-06, "loss": 0.184, "step": 298225 }, { "epoch": 2.93, "grad_norm": 12.68332290649414, "learning_rate": 3.5695284339718195e-06, "loss": 0.1276, "step": 298250 }, { "epoch": 2.93, "grad_norm": 4.326178550720215, "learning_rate": 3.5694043115175707e-06, "loss": 0.2053, "step": 298275 }, { "epoch": 2.93, "grad_norm": 12.567366600036621, "learning_rate": 3.569280189063323e-06, "loss": 0.1376, "step": 298300 }, { "epoch": 2.93, "grad_norm": 5.957771301269531, "learning_rate": 3.569156066609074e-06, "loss": 0.1999, "step": 298325 }, { "epoch": 2.93, "grad_norm": 19.381399154663086, "learning_rate": 3.5690319441548256e-06, "loss": 0.14, "step": 298350 }, { "epoch": 2.93, "grad_norm": 4.736611366271973, "learning_rate": 3.5689078217005773e-06, "loss": 0.1907, "step": 298375 }, { "epoch": 2.93, "grad_norm": 13.902298927307129, "learning_rate": 3.568783699246329e-06, "loss": 0.1279, "step": 298400 }, { "epoch": 2.93, "grad_norm": 5.962554454803467, "learning_rate": 3.56865957679208e-06, "loss": 0.2394, "step": 298425 }, { "epoch": 2.93, "grad_norm": 7.30997896194458, "learning_rate": 3.568535454337832e-06, "loss": 0.1385, "step": 298450 }, { "epoch": 2.93, "grad_norm": 3.1627135276794434, "learning_rate": 3.5684113318835834e-06, "loss": 0.2144, "step": 298475 }, { "epoch": 2.93, "grad_norm": 10.930243492126465, "learning_rate": 3.5682872094293346e-06, "loss": 0.1353, "step": 298500 }, { "epoch": 2.94, "grad_norm": 7.035091876983643, "learning_rate": 3.5681630869750867e-06, "loss": 0.2028, "step": 298525 }, { "epoch": 2.94, "grad_norm": 7.171579360961914, "learning_rate": 3.568038964520838e-06, "loss": 0.1054, "step": 298550 }, { "epoch": 2.94, "grad_norm": 4.2664794921875, "learning_rate": 3.5679148420665895e-06, "loss": 0.2424, "step": 298575 }, { "epoch": 2.94, "grad_norm": 8.30299186706543, "learning_rate": 3.567790719612341e-06, "loss": 0.1486, "step": 298600 }, { "epoch": 2.94, "grad_norm": 5.000082492828369, "learning_rate": 3.5676665971580928e-06, "loss": 0.196, "step": 298625 }, { "epoch": 2.94, "grad_norm": 13.505539894104004, "learning_rate": 3.567542474703844e-06, "loss": 0.1468, "step": 298650 }, { "epoch": 2.94, "grad_norm": 7.972134113311768, "learning_rate": 3.567418352249596e-06, "loss": 0.1933, "step": 298675 }, { "epoch": 2.94, "grad_norm": 9.750033378601074, "learning_rate": 3.5672942297953473e-06, "loss": 0.1722, "step": 298700 }, { "epoch": 2.94, "grad_norm": 1.9025499820709229, "learning_rate": 3.5671701073410985e-06, "loss": 0.213, "step": 298725 }, { "epoch": 2.94, "grad_norm": 16.085155487060547, "learning_rate": 3.5670459848868505e-06, "loss": 0.1759, "step": 298750 }, { "epoch": 2.94, "grad_norm": 3.588871717453003, "learning_rate": 3.5669218624326017e-06, "loss": 0.2418, "step": 298775 }, { "epoch": 2.94, "grad_norm": 18.560014724731445, "learning_rate": 3.5667977399783534e-06, "loss": 0.1363, "step": 298800 }, { "epoch": 2.94, "grad_norm": 7.389801979064941, "learning_rate": 3.566673617524105e-06, "loss": 0.2103, "step": 298825 }, { "epoch": 2.94, "grad_norm": 12.643564224243164, "learning_rate": 3.5665494950698567e-06, "loss": 0.1496, "step": 298850 }, { "epoch": 2.94, "grad_norm": 5.557665824890137, "learning_rate": 3.566425372615608e-06, "loss": 0.2122, "step": 298875 }, { "epoch": 2.94, "grad_norm": 12.23808479309082, "learning_rate": 3.56630125016136e-06, "loss": 0.1642, "step": 298900 }, { "epoch": 2.94, "grad_norm": 5.232241153717041, "learning_rate": 3.566177127707111e-06, "loss": 0.2122, "step": 298925 }, { "epoch": 2.94, "grad_norm": 17.351436614990234, "learning_rate": 3.5660530052528623e-06, "loss": 0.1293, "step": 298950 }, { "epoch": 2.94, "grad_norm": 3.948488712310791, "learning_rate": 3.565928882798614e-06, "loss": 0.2431, "step": 298975 }, { "epoch": 2.94, "grad_norm": 2.6663317680358887, "learning_rate": 3.5658047603443656e-06, "loss": 0.119, "step": 299000 }, { "epoch": 2.94, "grad_norm": 3.2665882110595703, "learning_rate": 3.5656806378901172e-06, "loss": 0.2049, "step": 299025 }, { "epoch": 2.94, "grad_norm": 3.657600164413452, "learning_rate": 3.5655565154358685e-06, "loss": 0.1191, "step": 299050 }, { "epoch": 2.94, "grad_norm": 4.860949993133545, "learning_rate": 3.5654323929816205e-06, "loss": 0.2076, "step": 299075 }, { "epoch": 2.94, "grad_norm": 15.278762817382812, "learning_rate": 3.5653082705273717e-06, "loss": 0.132, "step": 299100 }, { "epoch": 2.94, "grad_norm": 6.632920742034912, "learning_rate": 3.565184148073123e-06, "loss": 0.2148, "step": 299125 }, { "epoch": 2.94, "grad_norm": 12.180135726928711, "learning_rate": 3.565060025618875e-06, "loss": 0.1518, "step": 299150 }, { "epoch": 2.94, "grad_norm": 7.287993431091309, "learning_rate": 3.5649359031646262e-06, "loss": 0.1745, "step": 299175 }, { "epoch": 2.94, "grad_norm": 16.61711311340332, "learning_rate": 3.564811780710378e-06, "loss": 0.1227, "step": 299200 }, { "epoch": 2.94, "grad_norm": 4.405324935913086, "learning_rate": 3.5646876582561295e-06, "loss": 0.2269, "step": 299225 }, { "epoch": 2.94, "grad_norm": 8.061917304992676, "learning_rate": 3.564563535801881e-06, "loss": 0.138, "step": 299250 }, { "epoch": 2.94, "grad_norm": 3.4080913066864014, "learning_rate": 3.5644394133476323e-06, "loss": 0.2317, "step": 299275 }, { "epoch": 2.94, "grad_norm": 12.416175842285156, "learning_rate": 3.5643152908933844e-06, "loss": 0.1096, "step": 299300 }, { "epoch": 2.94, "grad_norm": 3.618657112121582, "learning_rate": 3.5641911684391356e-06, "loss": 0.213, "step": 299325 }, { "epoch": 2.94, "grad_norm": 12.446599960327148, "learning_rate": 3.564067045984887e-06, "loss": 0.1255, "step": 299350 }, { "epoch": 2.94, "grad_norm": 0.771413266658783, "learning_rate": 3.563942923530639e-06, "loss": 0.197, "step": 299375 }, { "epoch": 2.94, "grad_norm": 8.448734283447266, "learning_rate": 3.56381880107639e-06, "loss": 0.1448, "step": 299400 }, { "epoch": 2.94, "grad_norm": 4.0172247886657715, "learning_rate": 3.5636946786221417e-06, "loss": 0.189, "step": 299425 }, { "epoch": 2.94, "grad_norm": 11.07540225982666, "learning_rate": 3.5635705561678934e-06, "loss": 0.1309, "step": 299450 }, { "epoch": 2.94, "grad_norm": 4.043228626251221, "learning_rate": 3.563446433713645e-06, "loss": 0.2142, "step": 299475 }, { "epoch": 2.94, "grad_norm": 7.829902172088623, "learning_rate": 3.563322311259396e-06, "loss": 0.1123, "step": 299500 }, { "epoch": 2.94, "grad_norm": 5.5448222160339355, "learning_rate": 3.5631981888051483e-06, "loss": 0.2184, "step": 299525 }, { "epoch": 2.95, "grad_norm": 9.248568534851074, "learning_rate": 3.5630740663508995e-06, "loss": 0.1056, "step": 299550 }, { "epoch": 2.95, "grad_norm": 3.8670520782470703, "learning_rate": 3.5629499438966507e-06, "loss": 0.1896, "step": 299575 }, { "epoch": 2.95, "grad_norm": 13.98298168182373, "learning_rate": 3.5628258214424027e-06, "loss": 0.1341, "step": 299600 }, { "epoch": 2.95, "grad_norm": 5.602841854095459, "learning_rate": 3.562701698988154e-06, "loss": 0.2245, "step": 299625 }, { "epoch": 2.95, "grad_norm": 9.847489356994629, "learning_rate": 3.5625775765339056e-06, "loss": 0.1225, "step": 299650 }, { "epoch": 2.95, "grad_norm": 5.700597763061523, "learning_rate": 3.5624534540796572e-06, "loss": 0.2374, "step": 299675 }, { "epoch": 2.95, "grad_norm": 9.976877212524414, "learning_rate": 3.562329331625409e-06, "loss": 0.1034, "step": 299700 }, { "epoch": 2.95, "grad_norm": 5.978558540344238, "learning_rate": 3.56220520917116e-06, "loss": 0.1949, "step": 299725 }, { "epoch": 2.95, "grad_norm": 8.912501335144043, "learning_rate": 3.562081086716912e-06, "loss": 0.1229, "step": 299750 }, { "epoch": 2.95, "grad_norm": 4.830567836761475, "learning_rate": 3.5619569642626633e-06, "loss": 0.2363, "step": 299775 }, { "epoch": 2.95, "grad_norm": 10.139315605163574, "learning_rate": 3.5618328418084145e-06, "loss": 0.115, "step": 299800 }, { "epoch": 2.95, "grad_norm": 4.436055660247803, "learning_rate": 3.561708719354166e-06, "loss": 0.2608, "step": 299825 }, { "epoch": 2.95, "grad_norm": 7.498529434204102, "learning_rate": 3.561584596899918e-06, "loss": 0.12, "step": 299850 }, { "epoch": 2.95, "grad_norm": 4.252025127410889, "learning_rate": 3.5614604744456695e-06, "loss": 0.2041, "step": 299875 }, { "epoch": 2.95, "grad_norm": 12.27472972869873, "learning_rate": 3.5613363519914207e-06, "loss": 0.1096, "step": 299900 }, { "epoch": 2.95, "grad_norm": 6.3152570724487305, "learning_rate": 3.5612122295371727e-06, "loss": 0.2485, "step": 299925 }, { "epoch": 2.95, "grad_norm": 16.640836715698242, "learning_rate": 3.561088107082924e-06, "loss": 0.1236, "step": 299950 }, { "epoch": 2.95, "grad_norm": 2.0447843074798584, "learning_rate": 3.560963984628675e-06, "loss": 0.2373, "step": 299975 }, { "epoch": 2.95, "grad_norm": 8.969588279724121, "learning_rate": 3.560839862174427e-06, "loss": 0.1332, "step": 300000 }, { "epoch": 2.95, "eval_loss": 0.6174227595329285, "eval_runtime": 5873.3646, "eval_samples_per_second": 1.612, "eval_steps_per_second": 0.202, "eval_wer": 0.12117749307925063, "step": 300000 }, { "epoch": 2.95, "grad_norm": 6.4142045974731445, "learning_rate": 3.5607157397201784e-06, "loss": 0.2466, "step": 300025 }, { "epoch": 2.95, "grad_norm": 10.753829002380371, "learning_rate": 3.56059161726593e-06, "loss": 0.1322, "step": 300050 }, { "epoch": 2.95, "grad_norm": 1.0450212955474854, "learning_rate": 3.5604674948116817e-06, "loss": 0.2012, "step": 300075 }, { "epoch": 2.95, "grad_norm": 7.04857063293457, "learning_rate": 3.5603433723574333e-06, "loss": 0.1442, "step": 300100 }, { "epoch": 2.95, "grad_norm": 6.064887046813965, "learning_rate": 3.5602192499031845e-06, "loss": 0.2229, "step": 300125 }, { "epoch": 2.95, "grad_norm": 7.105614185333252, "learning_rate": 3.5600951274489366e-06, "loss": 0.1513, "step": 300150 }, { "epoch": 2.95, "grad_norm": 7.77285623550415, "learning_rate": 3.559971004994688e-06, "loss": 0.2378, "step": 300175 }, { "epoch": 2.95, "grad_norm": 4.432622909545898, "learning_rate": 3.559846882540439e-06, "loss": 0.095, "step": 300200 }, { "epoch": 2.95, "grad_norm": 3.7803094387054443, "learning_rate": 3.559727724984361e-06, "loss": 0.2246, "step": 300225 }, { "epoch": 2.95, "grad_norm": 9.042120933532715, "learning_rate": 3.559603602530112e-06, "loss": 0.1535, "step": 300250 }, { "epoch": 2.95, "grad_norm": 5.38872766494751, "learning_rate": 3.559479480075864e-06, "loss": 0.2231, "step": 300275 }, { "epoch": 2.95, "grad_norm": 17.77419090270996, "learning_rate": 3.5593553576216154e-06, "loss": 0.1472, "step": 300300 }, { "epoch": 2.95, "grad_norm": 4.748528957366943, "learning_rate": 3.559231235167367e-06, "loss": 0.228, "step": 300325 }, { "epoch": 2.95, "grad_norm": 9.630400657653809, "learning_rate": 3.5591071127131186e-06, "loss": 0.182, "step": 300350 }, { "epoch": 2.95, "grad_norm": 4.939479827880859, "learning_rate": 3.5589829902588703e-06, "loss": 0.2039, "step": 300375 }, { "epoch": 2.95, "grad_norm": 14.407035827636719, "learning_rate": 3.5588588678046215e-06, "loss": 0.1441, "step": 300400 }, { "epoch": 2.95, "grad_norm": 7.00258731842041, "learning_rate": 3.5587347453503727e-06, "loss": 0.2331, "step": 300425 }, { "epoch": 2.95, "grad_norm": 12.609267234802246, "learning_rate": 3.5586106228961248e-06, "loss": 0.1424, "step": 300450 }, { "epoch": 2.95, "grad_norm": 7.31364631652832, "learning_rate": 3.558486500441876e-06, "loss": 0.204, "step": 300475 }, { "epoch": 2.95, "grad_norm": 9.330205917358398, "learning_rate": 3.5583623779876276e-06, "loss": 0.1252, "step": 300500 }, { "epoch": 2.95, "grad_norm": 3.2583940029144287, "learning_rate": 3.5582382555333792e-06, "loss": 0.2344, "step": 300525 }, { "epoch": 2.96, "grad_norm": 9.62360668182373, "learning_rate": 3.558114133079131e-06, "loss": 0.1153, "step": 300550 }, { "epoch": 2.96, "grad_norm": 4.301417827606201, "learning_rate": 3.557990010624882e-06, "loss": 0.2415, "step": 300575 }, { "epoch": 2.96, "grad_norm": 12.327508926391602, "learning_rate": 3.557865888170634e-06, "loss": 0.1282, "step": 300600 }, { "epoch": 2.96, "grad_norm": 5.95821475982666, "learning_rate": 3.5577417657163854e-06, "loss": 0.2173, "step": 300625 }, { "epoch": 2.96, "grad_norm": 9.470059394836426, "learning_rate": 3.5576176432621366e-06, "loss": 0.1284, "step": 300650 }, { "epoch": 2.96, "grad_norm": 3.020968198776245, "learning_rate": 3.5574935208078886e-06, "loss": 0.2359, "step": 300675 }, { "epoch": 2.96, "grad_norm": 19.755170822143555, "learning_rate": 3.55736939835364e-06, "loss": 0.1448, "step": 300700 }, { "epoch": 2.96, "grad_norm": 5.732424736022949, "learning_rate": 3.5572452758993915e-06, "loss": 0.2261, "step": 300725 }, { "epoch": 2.96, "grad_norm": 6.911773204803467, "learning_rate": 3.557121153445143e-06, "loss": 0.1075, "step": 300750 }, { "epoch": 2.96, "grad_norm": 4.1469292640686035, "learning_rate": 3.5569970309908947e-06, "loss": 0.2453, "step": 300775 }, { "epoch": 2.96, "grad_norm": 8.430252075195312, "learning_rate": 3.556872908536646e-06, "loss": 0.1421, "step": 300800 }, { "epoch": 2.96, "grad_norm": 0.13288918137550354, "learning_rate": 3.556748786082398e-06, "loss": 0.1821, "step": 300825 }, { "epoch": 2.96, "grad_norm": 12.513725280761719, "learning_rate": 3.5566246636281492e-06, "loss": 0.1235, "step": 300850 }, { "epoch": 2.96, "grad_norm": 3.6926591396331787, "learning_rate": 3.556500541173901e-06, "loss": 0.2098, "step": 300875 }, { "epoch": 2.96, "grad_norm": 8.540658950805664, "learning_rate": 3.5563764187196525e-06, "loss": 0.1037, "step": 300900 }, { "epoch": 2.96, "grad_norm": 3.8533079624176025, "learning_rate": 3.556252296265404e-06, "loss": 0.265, "step": 300925 }, { "epoch": 2.96, "grad_norm": 12.75235366821289, "learning_rate": 3.5561281738111553e-06, "loss": 0.1411, "step": 300950 }, { "epoch": 2.96, "grad_norm": 3.961845636367798, "learning_rate": 3.5560040513569074e-06, "loss": 0.2194, "step": 300975 }, { "epoch": 2.96, "grad_norm": 13.480245590209961, "learning_rate": 3.5558799289026586e-06, "loss": 0.1472, "step": 301000 }, { "epoch": 2.96, "grad_norm": 4.92273473739624, "learning_rate": 3.55575580644841e-06, "loss": 0.2389, "step": 301025 }, { "epoch": 2.96, "grad_norm": 8.285050392150879, "learning_rate": 3.555631683994162e-06, "loss": 0.1277, "step": 301050 }, { "epoch": 2.96, "grad_norm": 5.164475440979004, "learning_rate": 3.555507561539913e-06, "loss": 0.228, "step": 301075 }, { "epoch": 2.96, "grad_norm": 15.38293743133545, "learning_rate": 3.5553834390856647e-06, "loss": 0.1126, "step": 301100 }, { "epoch": 2.96, "grad_norm": 6.668648719787598, "learning_rate": 3.5552593166314164e-06, "loss": 0.2112, "step": 301125 }, { "epoch": 2.96, "grad_norm": 9.989625930786133, "learning_rate": 3.555135194177168e-06, "loss": 0.149, "step": 301150 }, { "epoch": 2.96, "grad_norm": 10.379762649536133, "learning_rate": 3.555011071722919e-06, "loss": 0.2341, "step": 301175 }, { "epoch": 2.96, "grad_norm": 8.153922080993652, "learning_rate": 3.5548869492686713e-06, "loss": 0.1293, "step": 301200 }, { "epoch": 2.96, "grad_norm": 1.649713158607483, "learning_rate": 3.5547628268144225e-06, "loss": 0.2242, "step": 301225 }, { "epoch": 2.96, "grad_norm": 14.32268238067627, "learning_rate": 3.5546387043601737e-06, "loss": 0.1575, "step": 301250 }, { "epoch": 2.96, "grad_norm": 5.082383632659912, "learning_rate": 3.5545145819059253e-06, "loss": 0.1991, "step": 301275 }, { "epoch": 2.96, "grad_norm": 9.361893653869629, "learning_rate": 3.554390459451677e-06, "loss": 0.1029, "step": 301300 }, { "epoch": 2.96, "grad_norm": 3.1724421977996826, "learning_rate": 3.5542663369974286e-06, "loss": 0.1807, "step": 301325 }, { "epoch": 2.96, "grad_norm": 8.785435676574707, "learning_rate": 3.55414221454318e-06, "loss": 0.0924, "step": 301350 }, { "epoch": 2.96, "grad_norm": 4.195320129394531, "learning_rate": 3.554018092088932e-06, "loss": 0.2537, "step": 301375 }, { "epoch": 2.96, "grad_norm": 11.8897066116333, "learning_rate": 3.553893969634683e-06, "loss": 0.1309, "step": 301400 }, { "epoch": 2.96, "grad_norm": 17.215946197509766, "learning_rate": 3.5537698471804343e-06, "loss": 0.2086, "step": 301425 }, { "epoch": 2.96, "grad_norm": 8.52606201171875, "learning_rate": 3.5536457247261863e-06, "loss": 0.109, "step": 301450 }, { "epoch": 2.96, "grad_norm": 2.3461945056915283, "learning_rate": 3.5535216022719376e-06, "loss": 0.2295, "step": 301475 }, { "epoch": 2.96, "grad_norm": 12.097807884216309, "learning_rate": 3.553397479817689e-06, "loss": 0.1038, "step": 301500 }, { "epoch": 2.96, "grad_norm": 3.733736038208008, "learning_rate": 3.553273357363441e-06, "loss": 0.2824, "step": 301525 }, { "epoch": 2.96, "grad_norm": 11.348553657531738, "learning_rate": 3.5531492349091925e-06, "loss": 0.1667, "step": 301550 }, { "epoch": 2.97, "grad_norm": 1.482338547706604, "learning_rate": 3.5530251124549437e-06, "loss": 0.2079, "step": 301575 }, { "epoch": 2.97, "grad_norm": 9.244059562683105, "learning_rate": 3.5529009900006957e-06, "loss": 0.1509, "step": 301600 }, { "epoch": 2.97, "grad_norm": 3.3057124614715576, "learning_rate": 3.552776867546447e-06, "loss": 0.2082, "step": 301625 }, { "epoch": 2.97, "grad_norm": 14.097408294677734, "learning_rate": 3.552652745092198e-06, "loss": 0.1656, "step": 301650 }, { "epoch": 2.97, "grad_norm": 7.75833797454834, "learning_rate": 3.55252862263795e-06, "loss": 0.2248, "step": 301675 }, { "epoch": 2.97, "grad_norm": 8.79531192779541, "learning_rate": 3.5524045001837014e-06, "loss": 0.1407, "step": 301700 }, { "epoch": 2.97, "grad_norm": 3.6635844707489014, "learning_rate": 3.552280377729453e-06, "loss": 0.221, "step": 301725 }, { "epoch": 2.97, "grad_norm": 8.194326400756836, "learning_rate": 3.5521562552752047e-06, "loss": 0.1294, "step": 301750 }, { "epoch": 2.97, "grad_norm": 2.0064311027526855, "learning_rate": 3.5520321328209563e-06, "loss": 0.2528, "step": 301775 }, { "epoch": 2.97, "grad_norm": 17.92729377746582, "learning_rate": 3.5519080103667075e-06, "loss": 0.167, "step": 301800 }, { "epoch": 2.97, "grad_norm": 4.085689067840576, "learning_rate": 3.5517838879124596e-06, "loss": 0.1838, "step": 301825 }, { "epoch": 2.97, "grad_norm": 10.45410442352295, "learning_rate": 3.551659765458211e-06, "loss": 0.1432, "step": 301850 }, { "epoch": 2.97, "grad_norm": 8.441570281982422, "learning_rate": 3.551535643003962e-06, "loss": 0.2178, "step": 301875 }, { "epoch": 2.97, "grad_norm": 12.386796951293945, "learning_rate": 3.551411520549714e-06, "loss": 0.1685, "step": 301900 }, { "epoch": 2.97, "grad_norm": 1.0454518795013428, "learning_rate": 3.5512873980954653e-06, "loss": 0.2096, "step": 301925 }, { "epoch": 2.97, "grad_norm": 9.405022621154785, "learning_rate": 3.551163275641217e-06, "loss": 0.1453, "step": 301950 }, { "epoch": 2.97, "grad_norm": 5.63657808303833, "learning_rate": 3.5510391531869686e-06, "loss": 0.2262, "step": 301975 }, { "epoch": 2.97, "grad_norm": 7.206194877624512, "learning_rate": 3.55091503073272e-06, "loss": 0.1142, "step": 302000 }, { "epoch": 2.97, "grad_norm": 3.9055099487304688, "learning_rate": 3.5507909082784714e-06, "loss": 0.1929, "step": 302025 }, { "epoch": 2.97, "grad_norm": 8.234800338745117, "learning_rate": 3.5506667858242235e-06, "loss": 0.0972, "step": 302050 }, { "epoch": 2.97, "grad_norm": 5.900704383850098, "learning_rate": 3.5505426633699747e-06, "loss": 0.2159, "step": 302075 }, { "epoch": 2.97, "grad_norm": 7.897205829620361, "learning_rate": 3.550418540915726e-06, "loss": 0.1294, "step": 302100 }, { "epoch": 2.97, "grad_norm": 0.03776174038648605, "learning_rate": 3.5502944184614775e-06, "loss": 0.2035, "step": 302125 }, { "epoch": 2.97, "grad_norm": 14.419614791870117, "learning_rate": 3.550170296007229e-06, "loss": 0.1458, "step": 302150 }, { "epoch": 2.97, "grad_norm": 0.7590154409408569, "learning_rate": 3.550046173552981e-06, "loss": 0.22, "step": 302175 }, { "epoch": 2.97, "grad_norm": 10.021943092346191, "learning_rate": 3.549922051098732e-06, "loss": 0.1011, "step": 302200 }, { "epoch": 2.97, "grad_norm": 4.8545241355896, "learning_rate": 3.549797928644484e-06, "loss": 0.2547, "step": 302225 }, { "epoch": 2.97, "grad_norm": 15.237382888793945, "learning_rate": 3.5496738061902353e-06, "loss": 0.1645, "step": 302250 }, { "epoch": 2.97, "grad_norm": 4.130057334899902, "learning_rate": 3.549554648634157e-06, "loss": 0.2295, "step": 302275 }, { "epoch": 2.97, "grad_norm": 14.428725242614746, "learning_rate": 3.5494305261799084e-06, "loss": 0.1418, "step": 302300 }, { "epoch": 2.97, "grad_norm": 3.70444393157959, "learning_rate": 3.5493064037256596e-06, "loss": 0.2088, "step": 302325 }, { "epoch": 2.97, "grad_norm": 14.087909698486328, "learning_rate": 3.5491822812714116e-06, "loss": 0.1261, "step": 302350 }, { "epoch": 2.97, "grad_norm": 4.697526454925537, "learning_rate": 3.549058158817163e-06, "loss": 0.1502, "step": 302375 }, { "epoch": 2.97, "grad_norm": 7.8877763748168945, "learning_rate": 3.5489340363629145e-06, "loss": 0.1518, "step": 302400 }, { "epoch": 2.97, "grad_norm": 2.540916919708252, "learning_rate": 3.548809913908666e-06, "loss": 0.219, "step": 302425 }, { "epoch": 2.97, "grad_norm": 9.9478120803833, "learning_rate": 3.5486857914544177e-06, "loss": 0.1403, "step": 302450 }, { "epoch": 2.97, "grad_norm": 6.731872081756592, "learning_rate": 3.548561669000169e-06, "loss": 0.2225, "step": 302475 }, { "epoch": 2.97, "grad_norm": 5.9978928565979, "learning_rate": 3.548437546545921e-06, "loss": 0.111, "step": 302500 }, { "epoch": 2.97, "grad_norm": 3.336728572845459, "learning_rate": 3.5483134240916722e-06, "loss": 0.2115, "step": 302525 }, { "epoch": 2.97, "grad_norm": 10.588591575622559, "learning_rate": 3.5481893016374234e-06, "loss": 0.1216, "step": 302550 }, { "epoch": 2.97, "grad_norm": 2.3599863052368164, "learning_rate": 3.5480651791831755e-06, "loss": 0.2518, "step": 302575 }, { "epoch": 2.98, "grad_norm": 13.20621395111084, "learning_rate": 3.5479410567289267e-06, "loss": 0.0999, "step": 302600 }, { "epoch": 2.98, "grad_norm": 7.346709251403809, "learning_rate": 3.5478169342746783e-06, "loss": 0.2506, "step": 302625 }, { "epoch": 2.98, "grad_norm": 15.4244966506958, "learning_rate": 3.54769281182043e-06, "loss": 0.1351, "step": 302650 }, { "epoch": 2.98, "grad_norm": 4.190221309661865, "learning_rate": 3.5475686893661816e-06, "loss": 0.2525, "step": 302675 }, { "epoch": 2.98, "grad_norm": 16.603174209594727, "learning_rate": 3.547444566911933e-06, "loss": 0.1599, "step": 302700 }, { "epoch": 2.98, "grad_norm": 4.829629421234131, "learning_rate": 3.547320444457684e-06, "loss": 0.2299, "step": 302725 }, { "epoch": 2.98, "grad_norm": 8.883929252624512, "learning_rate": 3.547196322003436e-06, "loss": 0.1417, "step": 302750 }, { "epoch": 2.98, "grad_norm": 16.789037704467773, "learning_rate": 3.5470721995491873e-06, "loss": 0.2169, "step": 302775 }, { "epoch": 2.98, "grad_norm": 9.428668022155762, "learning_rate": 3.546948077094939e-06, "loss": 0.1375, "step": 302800 }, { "epoch": 2.98, "grad_norm": 1.6345537900924683, "learning_rate": 3.5468239546406906e-06, "loss": 0.2294, "step": 302825 }, { "epoch": 2.98, "grad_norm": 16.683637619018555, "learning_rate": 3.546699832186442e-06, "loss": 0.1243, "step": 302850 }, { "epoch": 2.98, "grad_norm": 5.567271709442139, "learning_rate": 3.5465757097321934e-06, "loss": 0.2319, "step": 302875 }, { "epoch": 2.98, "grad_norm": 7.89632511138916, "learning_rate": 3.5464515872779455e-06, "loss": 0.1123, "step": 302900 }, { "epoch": 2.98, "grad_norm": 0.9177160263061523, "learning_rate": 3.5463274648236967e-06, "loss": 0.2194, "step": 302925 }, { "epoch": 2.98, "grad_norm": 10.382716178894043, "learning_rate": 3.546203342369448e-06, "loss": 0.144, "step": 302950 }, { "epoch": 2.98, "grad_norm": 2.3765947818756104, "learning_rate": 3.5460792199152e-06, "loss": 0.2126, "step": 302975 }, { "epoch": 2.98, "grad_norm": 6.964227676391602, "learning_rate": 3.545955097460951e-06, "loss": 0.1061, "step": 303000 }, { "epoch": 2.98, "grad_norm": 9.76490306854248, "learning_rate": 3.545830975006703e-06, "loss": 0.2136, "step": 303025 }, { "epoch": 2.98, "grad_norm": 9.965991020202637, "learning_rate": 3.5457068525524544e-06, "loss": 0.1306, "step": 303050 }, { "epoch": 2.98, "grad_norm": 4.767026901245117, "learning_rate": 3.545582730098206e-06, "loss": 0.2097, "step": 303075 }, { "epoch": 2.98, "grad_norm": 16.454387664794922, "learning_rate": 3.5454586076439573e-06, "loss": 0.1329, "step": 303100 }, { "epoch": 2.98, "grad_norm": 7.320041179656982, "learning_rate": 3.5453344851897093e-06, "loss": 0.2024, "step": 303125 }, { "epoch": 2.98, "grad_norm": 11.676637649536133, "learning_rate": 3.5452103627354606e-06, "loss": 0.1366, "step": 303150 }, { "epoch": 2.98, "grad_norm": 5.6778178215026855, "learning_rate": 3.5450862402812118e-06, "loss": 0.2207, "step": 303175 }, { "epoch": 2.98, "grad_norm": 9.659520149230957, "learning_rate": 3.544962117826964e-06, "loss": 0.1191, "step": 303200 }, { "epoch": 2.98, "grad_norm": 8.76447582244873, "learning_rate": 3.544837995372715e-06, "loss": 0.2498, "step": 303225 }, { "epoch": 2.98, "grad_norm": 7.57038688659668, "learning_rate": 3.5447138729184667e-06, "loss": 0.1564, "step": 303250 }, { "epoch": 2.98, "grad_norm": 2.83534836769104, "learning_rate": 3.5445897504642183e-06, "loss": 0.2272, "step": 303275 }, { "epoch": 2.98, "grad_norm": 15.981199264526367, "learning_rate": 3.54446562800997e-06, "loss": 0.1207, "step": 303300 }, { "epoch": 2.98, "grad_norm": 7.205976963043213, "learning_rate": 3.544341505555721e-06, "loss": 0.2801, "step": 303325 }, { "epoch": 2.98, "grad_norm": 10.705074310302734, "learning_rate": 3.5442173831014732e-06, "loss": 0.1522, "step": 303350 }, { "epoch": 2.98, "grad_norm": 4.824061870574951, "learning_rate": 3.5440932606472244e-06, "loss": 0.1906, "step": 303375 }, { "epoch": 2.98, "grad_norm": 52.861854553222656, "learning_rate": 3.5439691381929756e-06, "loss": 0.1214, "step": 303400 }, { "epoch": 2.98, "grad_norm": 5.193541049957275, "learning_rate": 3.5438450157387277e-06, "loss": 0.2386, "step": 303425 }, { "epoch": 2.98, "grad_norm": 5.240030765533447, "learning_rate": 3.543720893284479e-06, "loss": 0.1527, "step": 303450 }, { "epoch": 2.98, "grad_norm": 4.302867412567139, "learning_rate": 3.5435967708302305e-06, "loss": 0.2168, "step": 303475 }, { "epoch": 2.98, "grad_norm": 11.548260688781738, "learning_rate": 3.543472648375982e-06, "loss": 0.1492, "step": 303500 }, { "epoch": 2.98, "grad_norm": 6.00607442855835, "learning_rate": 3.543348525921734e-06, "loss": 0.2095, "step": 303525 }, { "epoch": 2.98, "grad_norm": 16.26254653930664, "learning_rate": 3.543224403467485e-06, "loss": 0.1321, "step": 303550 }, { "epoch": 2.98, "grad_norm": 1.5207306146621704, "learning_rate": 3.5431002810132362e-06, "loss": 0.2311, "step": 303575 }, { "epoch": 2.99, "grad_norm": 11.53359603881836, "learning_rate": 3.5429761585589883e-06, "loss": 0.1249, "step": 303600 }, { "epoch": 2.99, "grad_norm": 3.3979249000549316, "learning_rate": 3.5428520361047395e-06, "loss": 0.1634, "step": 303625 }, { "epoch": 2.99, "grad_norm": 38.46112060546875, "learning_rate": 3.542727913650491e-06, "loss": 0.1488, "step": 303650 }, { "epoch": 2.99, "grad_norm": 7.647963523864746, "learning_rate": 3.5426037911962428e-06, "loss": 0.2623, "step": 303675 }, { "epoch": 2.99, "grad_norm": 10.33297348022461, "learning_rate": 3.5424796687419944e-06, "loss": 0.1136, "step": 303700 }, { "epoch": 2.99, "grad_norm": 2.6617748737335205, "learning_rate": 3.5423555462877456e-06, "loss": 0.2256, "step": 303725 }, { "epoch": 2.99, "grad_norm": 8.909750938415527, "learning_rate": 3.5422314238334977e-06, "loss": 0.1276, "step": 303750 }, { "epoch": 2.99, "grad_norm": 5.658728122711182, "learning_rate": 3.542107301379249e-06, "loss": 0.2206, "step": 303775 }, { "epoch": 2.99, "grad_norm": 11.563097953796387, "learning_rate": 3.5419831789250005e-06, "loss": 0.1259, "step": 303800 }, { "epoch": 2.99, "grad_norm": 12.019881248474121, "learning_rate": 3.541859056470752e-06, "loss": 0.2003, "step": 303825 }, { "epoch": 2.99, "grad_norm": 11.95899486541748, "learning_rate": 3.541734934016504e-06, "loss": 0.1462, "step": 303850 }, { "epoch": 2.99, "grad_norm": 4.0603766441345215, "learning_rate": 3.541610811562255e-06, "loss": 0.2533, "step": 303875 }, { "epoch": 2.99, "grad_norm": 7.964874744415283, "learning_rate": 3.541486689108007e-06, "loss": 0.1195, "step": 303900 }, { "epoch": 2.99, "grad_norm": 5.63107442855835, "learning_rate": 3.5413625666537583e-06, "loss": 0.2333, "step": 303925 }, { "epoch": 2.99, "grad_norm": 13.93142032623291, "learning_rate": 3.5412384441995095e-06, "loss": 0.1474, "step": 303950 }, { "epoch": 2.99, "grad_norm": 7.497751712799072, "learning_rate": 3.5411143217452615e-06, "loss": 0.2367, "step": 303975 }, { "epoch": 2.99, "grad_norm": 12.321023941040039, "learning_rate": 3.5409901992910128e-06, "loss": 0.137, "step": 304000 }, { "epoch": 2.99, "grad_norm": 4.208466053009033, "learning_rate": 3.5408660768367644e-06, "loss": 0.2133, "step": 304025 }, { "epoch": 2.99, "grad_norm": 6.735044956207275, "learning_rate": 3.540741954382516e-06, "loss": 0.1309, "step": 304050 }, { "epoch": 2.99, "grad_norm": 8.774198532104492, "learning_rate": 3.5406178319282677e-06, "loss": 0.225, "step": 304075 }, { "epoch": 2.99, "grad_norm": 3.093109607696533, "learning_rate": 3.540493709474019e-06, "loss": 0.104, "step": 304100 }, { "epoch": 2.99, "grad_norm": 3.5706098079681396, "learning_rate": 3.540369587019771e-06, "loss": 0.2215, "step": 304125 }, { "epoch": 2.99, "grad_norm": 10.831008911132812, "learning_rate": 3.540245464565522e-06, "loss": 0.1484, "step": 304150 }, { "epoch": 2.99, "grad_norm": 2.9865305423736572, "learning_rate": 3.5401213421112734e-06, "loss": 0.2162, "step": 304175 }, { "epoch": 2.99, "grad_norm": 14.1533203125, "learning_rate": 3.5399972196570254e-06, "loss": 0.1147, "step": 304200 }, { "epoch": 2.99, "grad_norm": 6.196183204650879, "learning_rate": 3.5398730972027766e-06, "loss": 0.2612, "step": 304225 }, { "epoch": 2.99, "grad_norm": 7.065343856811523, "learning_rate": 3.5397489747485283e-06, "loss": 0.1263, "step": 304250 }, { "epoch": 2.99, "grad_norm": 7.444580078125, "learning_rate": 3.53962485229428e-06, "loss": 0.2531, "step": 304275 }, { "epoch": 2.99, "grad_norm": 10.9920015335083, "learning_rate": 3.5395007298400315e-06, "loss": 0.1119, "step": 304300 }, { "epoch": 2.99, "grad_norm": 9.959012031555176, "learning_rate": 3.5393815722839526e-06, "loss": 0.1993, "step": 304325 }, { "epoch": 2.99, "grad_norm": 17.317380905151367, "learning_rate": 3.5392574498297046e-06, "loss": 0.1759, "step": 304350 }, { "epoch": 2.99, "grad_norm": 4.703673839569092, "learning_rate": 3.539133327375456e-06, "loss": 0.2, "step": 304375 }, { "epoch": 2.99, "grad_norm": 10.839059829711914, "learning_rate": 3.539009204921207e-06, "loss": 0.1135, "step": 304400 }, { "epoch": 2.99, "grad_norm": 6.433080673217773, "learning_rate": 3.538885082466959e-06, "loss": 0.2481, "step": 304425 }, { "epoch": 2.99, "grad_norm": 8.81951904296875, "learning_rate": 3.5387609600127103e-06, "loss": 0.1152, "step": 304450 }, { "epoch": 2.99, "grad_norm": 9.90243148803711, "learning_rate": 3.538636837558462e-06, "loss": 0.2182, "step": 304475 }, { "epoch": 2.99, "grad_norm": 10.472413063049316, "learning_rate": 3.5385127151042136e-06, "loss": 0.1437, "step": 304500 }, { "epoch": 2.99, "grad_norm": 2.274685859680176, "learning_rate": 3.5383885926499652e-06, "loss": 0.2085, "step": 304525 }, { "epoch": 2.99, "grad_norm": 11.26009464263916, "learning_rate": 3.5382644701957164e-06, "loss": 0.1592, "step": 304550 }, { "epoch": 2.99, "grad_norm": 6.830298900604248, "learning_rate": 3.5381403477414685e-06, "loss": 0.2354, "step": 304575 }, { "epoch": 2.99, "grad_norm": 13.714299201965332, "learning_rate": 3.5380162252872197e-06, "loss": 0.1435, "step": 304600 }, { "epoch": 3.0, "grad_norm": 4.382673263549805, "learning_rate": 3.537892102832971e-06, "loss": 0.1854, "step": 304625 }, { "epoch": 3.0, "grad_norm": 18.907543182373047, "learning_rate": 3.537767980378723e-06, "loss": 0.0891, "step": 304650 }, { "epoch": 3.0, "grad_norm": 6.166705131530762, "learning_rate": 3.537643857924474e-06, "loss": 0.1902, "step": 304675 }, { "epoch": 3.0, "grad_norm": 14.307022094726562, "learning_rate": 3.537519735470226e-06, "loss": 0.1674, "step": 304700 }, { "epoch": 3.0, "grad_norm": 4.373162746429443, "learning_rate": 3.5373956130159774e-06, "loss": 0.2075, "step": 304725 }, { "epoch": 3.0, "grad_norm": 13.597819328308105, "learning_rate": 3.537271490561729e-06, "loss": 0.1457, "step": 304750 }, { "epoch": 3.0, "grad_norm": 0.8993085026741028, "learning_rate": 3.5371473681074803e-06, "loss": 0.2069, "step": 304775 }, { "epoch": 3.0, "grad_norm": 12.64075756072998, "learning_rate": 3.5370232456532324e-06, "loss": 0.1581, "step": 304800 }, { "epoch": 3.0, "grad_norm": 1.0946292877197266, "learning_rate": 3.5368991231989836e-06, "loss": 0.2354, "step": 304825 }, { "epoch": 3.0, "grad_norm": 9.427475929260254, "learning_rate": 3.5367750007447348e-06, "loss": 0.1493, "step": 304850 }, { "epoch": 3.0, "grad_norm": 5.830624580383301, "learning_rate": 3.536650878290487e-06, "loss": 0.2521, "step": 304875 }, { "epoch": 3.0, "grad_norm": 12.375870704650879, "learning_rate": 3.536526755836238e-06, "loss": 0.1192, "step": 304900 }, { "epoch": 3.0, "grad_norm": 3.683727264404297, "learning_rate": 3.5364026333819897e-06, "loss": 0.1736, "step": 304925 }, { "epoch": 3.0, "grad_norm": 16.82771873474121, "learning_rate": 3.5362785109277413e-06, "loss": 0.1406, "step": 304950 }, { "epoch": 3.0, "grad_norm": 7.4966044425964355, "learning_rate": 3.536154388473493e-06, "loss": 0.1904, "step": 304975 }, { "epoch": 3.0, "grad_norm": 7.729742527008057, "learning_rate": 3.536030266019244e-06, "loss": 0.1452, "step": 305000 }, { "epoch": 3.0, "grad_norm": 5.926362991333008, "learning_rate": 3.5359061435649954e-06, "loss": 0.1909, "step": 305025 }, { "epoch": 3.0, "grad_norm": 10.674365997314453, "learning_rate": 3.5357820211107474e-06, "loss": 0.1531, "step": 305050 }, { "epoch": 3.0, "grad_norm": 6.712532043457031, "learning_rate": 3.5356578986564986e-06, "loss": 0.2124, "step": 305075 }, { "epoch": 3.0, "grad_norm": 9.698126792907715, "learning_rate": 3.5355337762022503e-06, "loss": 0.1434, "step": 305100 }, { "epoch": 3.0, "grad_norm": 3.5792412757873535, "learning_rate": 3.535409653748002e-06, "loss": 0.2073, "step": 305125 }, { "epoch": 3.0, "grad_norm": 2.869050979614258, "learning_rate": 3.5352855312937535e-06, "loss": 0.0692, "step": 305150 }, { "epoch": 3.0, "grad_norm": 4.069765090942383, "learning_rate": 3.5351614088395048e-06, "loss": 0.1827, "step": 305175 }, { "epoch": 3.0, "grad_norm": 2.5902743339538574, "learning_rate": 3.535037286385257e-06, "loss": 0.0636, "step": 305200 }, { "epoch": 3.0, "grad_norm": 15.352463722229004, "learning_rate": 3.534913163931008e-06, "loss": 0.1733, "step": 305225 }, { "epoch": 3.0, "grad_norm": 11.306583404541016, "learning_rate": 3.5347890414767592e-06, "loss": 0.0667, "step": 305250 }, { "epoch": 3.0, "grad_norm": 5.256450653076172, "learning_rate": 3.5346649190225113e-06, "loss": 0.1914, "step": 305275 }, { "epoch": 3.0, "grad_norm": 9.168416023254395, "learning_rate": 3.5345407965682625e-06, "loss": 0.0493, "step": 305300 }, { "epoch": 3.0, "grad_norm": 5.14718770980835, "learning_rate": 3.534416674114014e-06, "loss": 0.1519, "step": 305325 }, { "epoch": 3.0, "grad_norm": 4.810927867889404, "learning_rate": 3.5342925516597658e-06, "loss": 0.0524, "step": 305350 }, { "epoch": 3.0, "grad_norm": 4.5100860595703125, "learning_rate": 3.5341684292055174e-06, "loss": 0.171, "step": 305375 }, { "epoch": 3.0, "grad_norm": 13.477649688720703, "learning_rate": 3.5340443067512686e-06, "loss": 0.078, "step": 305400 }, { "epoch": 3.0, "grad_norm": 4.482232093811035, "learning_rate": 3.5339201842970207e-06, "loss": 0.1589, "step": 305425 }, { "epoch": 3.0, "grad_norm": 2.9099345207214355, "learning_rate": 3.533796061842772e-06, "loss": 0.0668, "step": 305450 }, { "epoch": 3.0, "grad_norm": 4.129701614379883, "learning_rate": 3.533671939388523e-06, "loss": 0.1539, "step": 305475 }, { "epoch": 3.0, "grad_norm": 5.068136215209961, "learning_rate": 3.533547816934275e-06, "loss": 0.0711, "step": 305500 }, { "epoch": 3.0, "grad_norm": 5.597763538360596, "learning_rate": 3.5334236944800264e-06, "loss": 0.1895, "step": 305525 }, { "epoch": 3.0, "grad_norm": 5.765257358551025, "learning_rate": 3.533299572025778e-06, "loss": 0.0524, "step": 305550 }, { "epoch": 3.0, "grad_norm": 9.804698944091797, "learning_rate": 3.5331754495715297e-06, "loss": 0.1937, "step": 305575 }, { "epoch": 3.0, "grad_norm": 9.958410263061523, "learning_rate": 3.5330513271172813e-06, "loss": 0.0799, "step": 305600 }, { "epoch": 3.0, "grad_norm": 3.9014503955841064, "learning_rate": 3.5329272046630325e-06, "loss": 0.1477, "step": 305625 }, { "epoch": 3.01, "grad_norm": 4.3760528564453125, "learning_rate": 3.5328030822087846e-06, "loss": 0.0772, "step": 305650 }, { "epoch": 3.01, "grad_norm": 4.053161144256592, "learning_rate": 3.5326789597545358e-06, "loss": 0.201, "step": 305675 }, { "epoch": 3.01, "grad_norm": 12.826306343078613, "learning_rate": 3.532554837300287e-06, "loss": 0.0885, "step": 305700 }, { "epoch": 3.01, "grad_norm": 4.720702171325684, "learning_rate": 3.532430714846039e-06, "loss": 0.1942, "step": 305725 }, { "epoch": 3.01, "grad_norm": 6.016655921936035, "learning_rate": 3.5323065923917902e-06, "loss": 0.0739, "step": 305750 }, { "epoch": 3.01, "grad_norm": 3.988413095474243, "learning_rate": 3.532182469937542e-06, "loss": 0.142, "step": 305775 }, { "epoch": 3.01, "grad_norm": 10.543869018554688, "learning_rate": 3.5320583474832935e-06, "loss": 0.0722, "step": 305800 }, { "epoch": 3.01, "grad_norm": 4.207967758178711, "learning_rate": 3.531934225029045e-06, "loss": 0.1991, "step": 305825 }, { "epoch": 3.01, "grad_norm": 7.6816558837890625, "learning_rate": 3.5318101025747964e-06, "loss": 0.0556, "step": 305850 }, { "epoch": 3.01, "grad_norm": 4.958273887634277, "learning_rate": 3.5316859801205476e-06, "loss": 0.1828, "step": 305875 }, { "epoch": 3.01, "grad_norm": 7.16391134262085, "learning_rate": 3.5315618576662996e-06, "loss": 0.064, "step": 305900 }, { "epoch": 3.01, "grad_norm": 4.624359607696533, "learning_rate": 3.531437735212051e-06, "loss": 0.1651, "step": 305925 }, { "epoch": 3.01, "grad_norm": 9.879021644592285, "learning_rate": 3.5313136127578025e-06, "loss": 0.0733, "step": 305950 }, { "epoch": 3.01, "grad_norm": 3.5283520221710205, "learning_rate": 3.531189490303554e-06, "loss": 0.1639, "step": 305975 }, { "epoch": 3.01, "grad_norm": 7.233659267425537, "learning_rate": 3.5310653678493058e-06, "loss": 0.0721, "step": 306000 }, { "epoch": 3.01, "grad_norm": 4.928181171417236, "learning_rate": 3.530941245395057e-06, "loss": 0.1884, "step": 306025 }, { "epoch": 3.01, "grad_norm": 9.979574203491211, "learning_rate": 3.530817122940809e-06, "loss": 0.0856, "step": 306050 }, { "epoch": 3.01, "grad_norm": 4.2387566566467285, "learning_rate": 3.5306930004865602e-06, "loss": 0.1787, "step": 306075 }, { "epoch": 3.01, "grad_norm": 10.090654373168945, "learning_rate": 3.5305688780323114e-06, "loss": 0.0649, "step": 306100 }, { "epoch": 3.01, "grad_norm": 4.021918773651123, "learning_rate": 3.5304447555780635e-06, "loss": 0.1816, "step": 306125 }, { "epoch": 3.01, "grad_norm": 7.073421001434326, "learning_rate": 3.5303206331238147e-06, "loss": 0.0705, "step": 306150 }, { "epoch": 3.01, "grad_norm": 5.742075443267822, "learning_rate": 3.5301965106695663e-06, "loss": 0.1246, "step": 306175 }, { "epoch": 3.01, "grad_norm": 11.545092582702637, "learning_rate": 3.530072388215318e-06, "loss": 0.0924, "step": 306200 }, { "epoch": 3.01, "grad_norm": 2.6787123680114746, "learning_rate": 3.5299482657610696e-06, "loss": 0.162, "step": 306225 }, { "epoch": 3.01, "grad_norm": 13.443085670471191, "learning_rate": 3.529824143306821e-06, "loss": 0.0879, "step": 306250 }, { "epoch": 3.01, "grad_norm": 4.432715892791748, "learning_rate": 3.529700020852573e-06, "loss": 0.1693, "step": 306275 }, { "epoch": 3.01, "grad_norm": 6.652504920959473, "learning_rate": 3.529575898398324e-06, "loss": 0.0886, "step": 306300 }, { "epoch": 3.01, "grad_norm": 4.899206161499023, "learning_rate": 3.5294517759440753e-06, "loss": 0.1555, "step": 306325 }, { "epoch": 3.01, "grad_norm": 8.15383529663086, "learning_rate": 3.5293276534898274e-06, "loss": 0.0648, "step": 306350 }, { "epoch": 3.01, "grad_norm": 2.7544918060302734, "learning_rate": 3.5292035310355786e-06, "loss": 0.1611, "step": 306375 }, { "epoch": 3.01, "grad_norm": 0.3751383423805237, "learning_rate": 3.5290794085813302e-06, "loss": 0.0524, "step": 306400 }, { "epoch": 3.01, "grad_norm": 4.390870571136475, "learning_rate": 3.528955286127082e-06, "loss": 0.1748, "step": 306425 }, { "epoch": 3.01, "grad_norm": 6.850615978240967, "learning_rate": 3.5288311636728335e-06, "loss": 0.076, "step": 306450 }, { "epoch": 3.01, "grad_norm": 6.752782821655273, "learning_rate": 3.5287070412185847e-06, "loss": 0.1838, "step": 306475 }, { "epoch": 3.01, "grad_norm": 1.8268283605575562, "learning_rate": 3.5285829187643368e-06, "loss": 0.0895, "step": 306500 }, { "epoch": 3.01, "grad_norm": 4.509619235992432, "learning_rate": 3.528458796310088e-06, "loss": 0.1396, "step": 306525 }, { "epoch": 3.01, "grad_norm": 5.227516174316406, "learning_rate": 3.528334673855839e-06, "loss": 0.0574, "step": 306550 }, { "epoch": 3.01, "grad_norm": 5.568238735198975, "learning_rate": 3.5282105514015912e-06, "loss": 0.1425, "step": 306575 }, { "epoch": 3.01, "grad_norm": 4.35141134262085, "learning_rate": 3.5280864289473424e-06, "loss": 0.0645, "step": 306600 }, { "epoch": 3.01, "grad_norm": 5.719435214996338, "learning_rate": 3.527962306493094e-06, "loss": 0.1614, "step": 306625 }, { "epoch": 3.02, "grad_norm": 13.496790885925293, "learning_rate": 3.5278381840388457e-06, "loss": 0.076, "step": 306650 }, { "epoch": 3.02, "grad_norm": 3.6748459339141846, "learning_rate": 3.5277140615845974e-06, "loss": 0.1911, "step": 306675 }, { "epoch": 3.02, "grad_norm": 7.047004699707031, "learning_rate": 3.5275899391303486e-06, "loss": 0.0891, "step": 306700 }, { "epoch": 3.02, "grad_norm": 3.218350648880005, "learning_rate": 3.5274658166761e-06, "loss": 0.1758, "step": 306725 }, { "epoch": 3.02, "grad_norm": 7.4920783042907715, "learning_rate": 3.527341694221852e-06, "loss": 0.0645, "step": 306750 }, { "epoch": 3.02, "grad_norm": 4.597737789154053, "learning_rate": 3.5272175717676035e-06, "loss": 0.1756, "step": 306775 }, { "epoch": 3.02, "grad_norm": 6.134131908416748, "learning_rate": 3.5270934493133547e-06, "loss": 0.0799, "step": 306800 }, { "epoch": 3.02, "grad_norm": 5.444045543670654, "learning_rate": 3.5269693268591067e-06, "loss": 0.1842, "step": 306825 }, { "epoch": 3.02, "grad_norm": 4.750827312469482, "learning_rate": 3.526845204404858e-06, "loss": 0.0537, "step": 306850 }, { "epoch": 3.02, "grad_norm": 3.5128116607666016, "learning_rate": 3.526721081950609e-06, "loss": 0.1719, "step": 306875 }, { "epoch": 3.02, "grad_norm": 3.2320079803466797, "learning_rate": 3.5265969594963612e-06, "loss": 0.0849, "step": 306900 }, { "epoch": 3.02, "grad_norm": 6.159970283508301, "learning_rate": 3.5264728370421124e-06, "loss": 0.1986, "step": 306925 }, { "epoch": 3.02, "grad_norm": 6.465193748474121, "learning_rate": 3.526348714587864e-06, "loss": 0.0634, "step": 306950 }, { "epoch": 3.02, "grad_norm": 4.260066986083984, "learning_rate": 3.5262245921336157e-06, "loss": 0.1396, "step": 306975 }, { "epoch": 3.02, "grad_norm": 7.0630340576171875, "learning_rate": 3.5261004696793673e-06, "loss": 0.0541, "step": 307000 }, { "epoch": 3.02, "grad_norm": 4.4778289794921875, "learning_rate": 3.5259763472251185e-06, "loss": 0.2036, "step": 307025 }, { "epoch": 3.02, "grad_norm": 6.0085930824279785, "learning_rate": 3.5258522247708706e-06, "loss": 0.0796, "step": 307050 }, { "epoch": 3.02, "grad_norm": 4.471039295196533, "learning_rate": 3.525728102316622e-06, "loss": 0.1694, "step": 307075 }, { "epoch": 3.02, "grad_norm": 7.724619388580322, "learning_rate": 3.525603979862373e-06, "loss": 0.0838, "step": 307100 }, { "epoch": 3.02, "grad_norm": 5.296306610107422, "learning_rate": 3.525479857408125e-06, "loss": 0.1767, "step": 307125 }, { "epoch": 3.02, "grad_norm": 12.569740295410156, "learning_rate": 3.5253557349538763e-06, "loss": 0.0838, "step": 307150 }, { "epoch": 3.02, "grad_norm": 3.304413080215454, "learning_rate": 3.525231612499628e-06, "loss": 0.1566, "step": 307175 }, { "epoch": 3.02, "grad_norm": 3.07545804977417, "learning_rate": 3.5251074900453796e-06, "loss": 0.0582, "step": 307200 }, { "epoch": 3.02, "grad_norm": 3.377633571624756, "learning_rate": 3.524983367591131e-06, "loss": 0.1781, "step": 307225 }, { "epoch": 3.02, "grad_norm": 7.716853141784668, "learning_rate": 3.5248592451368824e-06, "loss": 0.0742, "step": 307250 }, { "epoch": 3.02, "grad_norm": 4.693838119506836, "learning_rate": 3.5247351226826345e-06, "loss": 0.1906, "step": 307275 }, { "epoch": 3.02, "grad_norm": 10.45028305053711, "learning_rate": 3.5246110002283857e-06, "loss": 0.0825, "step": 307300 }, { "epoch": 3.02, "grad_norm": 4.851770401000977, "learning_rate": 3.5244918426723067e-06, "loss": 0.1594, "step": 307325 }, { "epoch": 3.02, "grad_norm": 7.241879463195801, "learning_rate": 3.5243677202180588e-06, "loss": 0.052, "step": 307350 }, { "epoch": 3.02, "grad_norm": 3.2048752307891846, "learning_rate": 3.52424359776381e-06, "loss": 0.1497, "step": 307375 }, { "epoch": 3.02, "grad_norm": 10.888429641723633, "learning_rate": 3.5241194753095616e-06, "loss": 0.0843, "step": 307400 }, { "epoch": 3.02, "grad_norm": 5.755282878875732, "learning_rate": 3.5239953528553133e-06, "loss": 0.1728, "step": 307425 }, { "epoch": 3.02, "grad_norm": 14.724596977233887, "learning_rate": 3.523871230401065e-06, "loss": 0.073, "step": 307450 }, { "epoch": 3.02, "grad_norm": 3.2976653575897217, "learning_rate": 3.523747107946816e-06, "loss": 0.1471, "step": 307475 }, { "epoch": 3.02, "grad_norm": 11.814430236816406, "learning_rate": 3.523622985492568e-06, "loss": 0.0614, "step": 307500 }, { "epoch": 3.02, "grad_norm": 3.588374137878418, "learning_rate": 3.5234988630383194e-06, "loss": 0.1755, "step": 307525 }, { "epoch": 3.02, "grad_norm": 7.237406253814697, "learning_rate": 3.5233747405840706e-06, "loss": 0.0829, "step": 307550 }, { "epoch": 3.02, "grad_norm": 4.939253330230713, "learning_rate": 3.5232506181298226e-06, "loss": 0.2138, "step": 307575 }, { "epoch": 3.02, "grad_norm": 10.847465515136719, "learning_rate": 3.523126495675574e-06, "loss": 0.0893, "step": 307600 }, { "epoch": 3.02, "grad_norm": 6.517066955566406, "learning_rate": 3.5230023732213255e-06, "loss": 0.1949, "step": 307625 }, { "epoch": 3.02, "grad_norm": 1.719935417175293, "learning_rate": 3.522883215665247e-06, "loss": 0.0447, "step": 307650 }, { "epoch": 3.03, "grad_norm": 7.864378452301025, "learning_rate": 3.5227590932109986e-06, "loss": 0.1397, "step": 307675 }, { "epoch": 3.03, "grad_norm": 9.755450248718262, "learning_rate": 3.52263497075675e-06, "loss": 0.0662, "step": 307700 }, { "epoch": 3.03, "grad_norm": 9.168344497680664, "learning_rate": 3.522510848302502e-06, "loss": 0.1967, "step": 307725 }, { "epoch": 3.03, "grad_norm": 9.33470344543457, "learning_rate": 3.522386725848253e-06, "loss": 0.0583, "step": 307750 }, { "epoch": 3.03, "grad_norm": 4.4626078605651855, "learning_rate": 3.522262603394005e-06, "loss": 0.1612, "step": 307775 }, { "epoch": 3.03, "grad_norm": 5.639039039611816, "learning_rate": 3.5221384809397563e-06, "loss": 0.0866, "step": 307800 }, { "epoch": 3.03, "grad_norm": 5.199243068695068, "learning_rate": 3.5220143584855075e-06, "loss": 0.2049, "step": 307825 }, { "epoch": 3.03, "grad_norm": 4.410852909088135, "learning_rate": 3.521890236031259e-06, "loss": 0.0865, "step": 307850 }, { "epoch": 3.03, "grad_norm": 3.4849789142608643, "learning_rate": 3.521766113577011e-06, "loss": 0.168, "step": 307875 }, { "epoch": 3.03, "grad_norm": 9.42465591430664, "learning_rate": 3.5216419911227624e-06, "loss": 0.0635, "step": 307900 }, { "epoch": 3.03, "grad_norm": 8.367561340332031, "learning_rate": 3.5215178686685137e-06, "loss": 0.1774, "step": 307925 }, { "epoch": 3.03, "grad_norm": 5.151627540588379, "learning_rate": 3.5213937462142657e-06, "loss": 0.0732, "step": 307950 }, { "epoch": 3.03, "grad_norm": 4.037874698638916, "learning_rate": 3.521269623760017e-06, "loss": 0.1868, "step": 307975 }, { "epoch": 3.03, "grad_norm": 7.88753080368042, "learning_rate": 3.521145501305768e-06, "loss": 0.0487, "step": 308000 }, { "epoch": 3.03, "grad_norm": 5.0457892417907715, "learning_rate": 3.52102137885152e-06, "loss": 0.1685, "step": 308025 }, { "epoch": 3.03, "grad_norm": 9.72260856628418, "learning_rate": 3.5208972563972714e-06, "loss": 0.0773, "step": 308050 }, { "epoch": 3.03, "grad_norm": 4.892934799194336, "learning_rate": 3.520773133943023e-06, "loss": 0.1423, "step": 308075 }, { "epoch": 3.03, "grad_norm": 13.413297653198242, "learning_rate": 3.5206490114887747e-06, "loss": 0.0881, "step": 308100 }, { "epoch": 3.03, "grad_norm": 5.975575923919678, "learning_rate": 3.5205248890345263e-06, "loss": 0.1673, "step": 308125 }, { "epoch": 3.03, "grad_norm": 6.659173965454102, "learning_rate": 3.5204007665802775e-06, "loss": 0.0404, "step": 308150 }, { "epoch": 3.03, "grad_norm": 3.9536590576171875, "learning_rate": 3.5202766441260296e-06, "loss": 0.173, "step": 308175 }, { "epoch": 3.03, "grad_norm": 7.338542938232422, "learning_rate": 3.520152521671781e-06, "loss": 0.0623, "step": 308200 }, { "epoch": 3.03, "grad_norm": 13.038312911987305, "learning_rate": 3.520028399217532e-06, "loss": 0.2063, "step": 308225 }, { "epoch": 3.03, "grad_norm": 4.646578788757324, "learning_rate": 3.519904276763284e-06, "loss": 0.0586, "step": 308250 }, { "epoch": 3.03, "grad_norm": 3.4989802837371826, "learning_rate": 3.5197801543090353e-06, "loss": 0.1493, "step": 308275 }, { "epoch": 3.03, "grad_norm": 9.383132934570312, "learning_rate": 3.519656031854787e-06, "loss": 0.0791, "step": 308300 }, { "epoch": 3.03, "grad_norm": 4.882197856903076, "learning_rate": 3.5195319094005385e-06, "loss": 0.184, "step": 308325 }, { "epoch": 3.03, "grad_norm": 10.923709869384766, "learning_rate": 3.51940778694629e-06, "loss": 0.065, "step": 308350 }, { "epoch": 3.03, "grad_norm": 4.518585205078125, "learning_rate": 3.5192836644920414e-06, "loss": 0.1747, "step": 308375 }, { "epoch": 3.03, "grad_norm": 10.791095733642578, "learning_rate": 3.5191595420377934e-06, "loss": 0.0902, "step": 308400 }, { "epoch": 3.03, "grad_norm": 5.532282829284668, "learning_rate": 3.5190354195835447e-06, "loss": 0.1837, "step": 308425 }, { "epoch": 3.03, "grad_norm": 29.3433837890625, "learning_rate": 3.518911297129296e-06, "loss": 0.0899, "step": 308450 }, { "epoch": 3.03, "grad_norm": 4.795180797576904, "learning_rate": 3.518787174675048e-06, "loss": 0.1554, "step": 308475 }, { "epoch": 3.03, "grad_norm": 8.682814598083496, "learning_rate": 3.518663052220799e-06, "loss": 0.0697, "step": 308500 }, { "epoch": 3.03, "grad_norm": 4.529016494750977, "learning_rate": 3.5185389297665508e-06, "loss": 0.1546, "step": 308525 }, { "epoch": 3.03, "grad_norm": 5.1006598472595215, "learning_rate": 3.5184148073123024e-06, "loss": 0.0601, "step": 308550 }, { "epoch": 3.03, "grad_norm": 4.7994065284729, "learning_rate": 3.518290684858054e-06, "loss": 0.179, "step": 308575 }, { "epoch": 3.03, "grad_norm": 9.494132995605469, "learning_rate": 3.5181665624038053e-06, "loss": 0.0575, "step": 308600 }, { "epoch": 3.03, "grad_norm": 5.479705333709717, "learning_rate": 3.5180424399495573e-06, "loss": 0.1714, "step": 308625 }, { "epoch": 3.03, "grad_norm": 9.494572639465332, "learning_rate": 3.5179183174953085e-06, "loss": 0.0854, "step": 308650 }, { "epoch": 3.03, "grad_norm": 4.35135555267334, "learning_rate": 3.5177941950410597e-06, "loss": 0.1511, "step": 308675 }, { "epoch": 3.04, "grad_norm": 9.383052825927734, "learning_rate": 3.5176700725868114e-06, "loss": 0.0695, "step": 308700 }, { "epoch": 3.04, "grad_norm": 3.3278274536132812, "learning_rate": 3.517545950132563e-06, "loss": 0.1576, "step": 308725 }, { "epoch": 3.04, "grad_norm": 7.397340774536133, "learning_rate": 3.5174218276783146e-06, "loss": 0.0648, "step": 308750 }, { "epoch": 3.04, "grad_norm": 12.13473129272461, "learning_rate": 3.517297705224066e-06, "loss": 0.1983, "step": 308775 }, { "epoch": 3.04, "grad_norm": 18.86528778076172, "learning_rate": 3.517173582769818e-06, "loss": 0.0822, "step": 308800 }, { "epoch": 3.04, "grad_norm": 6.26859712600708, "learning_rate": 3.517049460315569e-06, "loss": 0.1708, "step": 308825 }, { "epoch": 3.04, "grad_norm": 3.213181257247925, "learning_rate": 3.5169253378613203e-06, "loss": 0.0643, "step": 308850 }, { "epoch": 3.04, "grad_norm": 5.501834869384766, "learning_rate": 3.5168012154070724e-06, "loss": 0.1831, "step": 308875 }, { "epoch": 3.04, "grad_norm": 1.094693899154663, "learning_rate": 3.5166770929528236e-06, "loss": 0.0689, "step": 308900 }, { "epoch": 3.04, "grad_norm": 3.6869301795959473, "learning_rate": 3.5165529704985752e-06, "loss": 0.1664, "step": 308925 }, { "epoch": 3.04, "grad_norm": 7.585065841674805, "learning_rate": 3.516428848044327e-06, "loss": 0.061, "step": 308950 }, { "epoch": 3.04, "grad_norm": 4.837284088134766, "learning_rate": 3.5163047255900785e-06, "loss": 0.1781, "step": 308975 }, { "epoch": 3.04, "grad_norm": 9.420072555541992, "learning_rate": 3.5161806031358297e-06, "loss": 0.0815, "step": 309000 }, { "epoch": 3.04, "grad_norm": 3.0339438915252686, "learning_rate": 3.5160564806815818e-06, "loss": 0.1537, "step": 309025 }, { "epoch": 3.04, "grad_norm": 17.010089874267578, "learning_rate": 3.515932358227333e-06, "loss": 0.0683, "step": 309050 }, { "epoch": 3.04, "grad_norm": 3.838844060897827, "learning_rate": 3.515808235773084e-06, "loss": 0.166, "step": 309075 }, { "epoch": 3.04, "grad_norm": 8.895499229431152, "learning_rate": 3.5156841133188363e-06, "loss": 0.0921, "step": 309100 }, { "epoch": 3.04, "grad_norm": 2.58146071434021, "learning_rate": 3.5155599908645875e-06, "loss": 0.1601, "step": 309125 }, { "epoch": 3.04, "grad_norm": 7.095704555511475, "learning_rate": 3.515435868410339e-06, "loss": 0.0673, "step": 309150 }, { "epoch": 3.04, "grad_norm": 4.899630546569824, "learning_rate": 3.5153117459560907e-06, "loss": 0.1811, "step": 309175 }, { "epoch": 3.04, "grad_norm": 10.883723258972168, "learning_rate": 3.5151876235018424e-06, "loss": 0.0733, "step": 309200 }, { "epoch": 3.04, "grad_norm": 6.775605201721191, "learning_rate": 3.5150635010475936e-06, "loss": 0.1774, "step": 309225 }, { "epoch": 3.04, "grad_norm": 7.002941608428955, "learning_rate": 3.5149393785933456e-06, "loss": 0.067, "step": 309250 }, { "epoch": 3.04, "grad_norm": 6.452447414398193, "learning_rate": 3.514815256139097e-06, "loss": 0.2027, "step": 309275 }, { "epoch": 3.04, "grad_norm": 6.338433742523193, "learning_rate": 3.514691133684848e-06, "loss": 0.0663, "step": 309300 }, { "epoch": 3.04, "grad_norm": 4.145025730133057, "learning_rate": 3.5145670112306e-06, "loss": 0.2122, "step": 309325 }, { "epoch": 3.04, "grad_norm": 11.071022033691406, "learning_rate": 3.5144428887763513e-06, "loss": 0.056, "step": 309350 }, { "epoch": 3.04, "grad_norm": 2.7878077030181885, "learning_rate": 3.514318766322103e-06, "loss": 0.1826, "step": 309375 }, { "epoch": 3.04, "grad_norm": 2.6691482067108154, "learning_rate": 3.5141946438678546e-06, "loss": 0.059, "step": 309400 }, { "epoch": 3.04, "grad_norm": 5.16477108001709, "learning_rate": 3.5140705214136062e-06, "loss": 0.1876, "step": 309425 }, { "epoch": 3.04, "grad_norm": 6.673655033111572, "learning_rate": 3.5139463989593575e-06, "loss": 0.0698, "step": 309450 }, { "epoch": 3.04, "grad_norm": 3.370321273803711, "learning_rate": 3.5138222765051095e-06, "loss": 0.2004, "step": 309475 }, { "epoch": 3.04, "grad_norm": 2.3625762462615967, "learning_rate": 3.5136981540508607e-06, "loss": 0.0616, "step": 309500 }, { "epoch": 3.04, "grad_norm": 4.189816474914551, "learning_rate": 3.513574031596612e-06, "loss": 0.1799, "step": 309525 }, { "epoch": 3.04, "grad_norm": 15.516435623168945, "learning_rate": 3.5134499091423636e-06, "loss": 0.1105, "step": 309550 }, { "epoch": 3.04, "grad_norm": 3.455592632293701, "learning_rate": 3.513325786688115e-06, "loss": 0.1763, "step": 309575 }, { "epoch": 3.04, "grad_norm": 9.467188835144043, "learning_rate": 3.513201664233867e-06, "loss": 0.0917, "step": 309600 }, { "epoch": 3.04, "grad_norm": 5.199056625366211, "learning_rate": 3.513077541779618e-06, "loss": 0.1909, "step": 309625 }, { "epoch": 3.04, "grad_norm": 3.0853302478790283, "learning_rate": 3.51295341932537e-06, "loss": 0.0518, "step": 309650 }, { "epoch": 3.04, "grad_norm": 5.22023344039917, "learning_rate": 3.5128292968711213e-06, "loss": 0.178, "step": 309675 }, { "epoch": 3.05, "grad_norm": 9.108044624328613, "learning_rate": 3.512705174416873e-06, "loss": 0.0452, "step": 309700 }, { "epoch": 3.05, "grad_norm": 9.401815414428711, "learning_rate": 3.5125810519626246e-06, "loss": 0.1851, "step": 309725 }, { "epoch": 3.05, "grad_norm": 10.27640151977539, "learning_rate": 3.5124569295083762e-06, "loss": 0.0476, "step": 309750 }, { "epoch": 3.05, "grad_norm": 5.252319812774658, "learning_rate": 3.5123328070541274e-06, "loss": 0.1731, "step": 309775 }, { "epoch": 3.05, "grad_norm": 3.5615782737731934, "learning_rate": 3.5122086845998795e-06, "loss": 0.0829, "step": 309800 }, { "epoch": 3.05, "grad_norm": 4.401454448699951, "learning_rate": 3.5120845621456307e-06, "loss": 0.209, "step": 309825 }, { "epoch": 3.05, "grad_norm": 6.774190425872803, "learning_rate": 3.511960439691382e-06, "loss": 0.0867, "step": 309850 }, { "epoch": 3.05, "grad_norm": 3.835784912109375, "learning_rate": 3.511836317237134e-06, "loss": 0.1545, "step": 309875 }, { "epoch": 3.05, "grad_norm": 6.536966800689697, "learning_rate": 3.511712194782885e-06, "loss": 0.0667, "step": 309900 }, { "epoch": 3.05, "grad_norm": 5.871091365814209, "learning_rate": 3.511588072328637e-06, "loss": 0.1993, "step": 309925 }, { "epoch": 3.05, "grad_norm": 8.297621726989746, "learning_rate": 3.5114639498743885e-06, "loss": 0.0837, "step": 309950 }, { "epoch": 3.05, "grad_norm": 5.908444881439209, "learning_rate": 3.51133982742014e-06, "loss": 0.1891, "step": 309975 }, { "epoch": 3.05, "grad_norm": 9.64826488494873, "learning_rate": 3.5112157049658913e-06, "loss": 0.1011, "step": 310000 }, { "epoch": 3.05, "grad_norm": 5.296390533447266, "learning_rate": 3.5110915825116434e-06, "loss": 0.1819, "step": 310025 }, { "epoch": 3.05, "grad_norm": 4.717145919799805, "learning_rate": 3.5109674600573946e-06, "loss": 0.0615, "step": 310050 }, { "epoch": 3.05, "grad_norm": 3.5903968811035156, "learning_rate": 3.510843337603146e-06, "loss": 0.1882, "step": 310075 }, { "epoch": 3.05, "grad_norm": 8.597844123840332, "learning_rate": 3.510719215148898e-06, "loss": 0.0598, "step": 310100 }, { "epoch": 3.05, "grad_norm": 5.126579761505127, "learning_rate": 3.510595092694649e-06, "loss": 0.1474, "step": 310125 }, { "epoch": 3.05, "grad_norm": 6.679254531860352, "learning_rate": 3.5104709702404007e-06, "loss": 0.0673, "step": 310150 }, { "epoch": 3.05, "grad_norm": 4.837900161743164, "learning_rate": 3.5103468477861523e-06, "loss": 0.1677, "step": 310175 }, { "epoch": 3.05, "grad_norm": 5.796115875244141, "learning_rate": 3.510222725331904e-06, "loss": 0.0713, "step": 310200 }, { "epoch": 3.05, "grad_norm": 3.7827024459838867, "learning_rate": 3.510098602877655e-06, "loss": 0.2018, "step": 310225 }, { "epoch": 3.05, "grad_norm": 1.2329590320587158, "learning_rate": 3.5099744804234072e-06, "loss": 0.0581, "step": 310250 }, { "epoch": 3.05, "grad_norm": 6.963384628295898, "learning_rate": 3.5098503579691584e-06, "loss": 0.2077, "step": 310275 }, { "epoch": 3.05, "grad_norm": 6.659961700439453, "learning_rate": 3.5097262355149097e-06, "loss": 0.0643, "step": 310300 }, { "epoch": 3.05, "grad_norm": 4.853615760803223, "learning_rate": 3.5096021130606617e-06, "loss": 0.1846, "step": 310325 }, { "epoch": 3.05, "grad_norm": 5.197819232940674, "learning_rate": 3.509477990606413e-06, "loss": 0.0656, "step": 310350 }, { "epoch": 3.05, "grad_norm": 2.3259077072143555, "learning_rate": 3.5093538681521646e-06, "loss": 0.1538, "step": 310375 }, { "epoch": 3.05, "grad_norm": 10.592138290405273, "learning_rate": 3.5092297456979158e-06, "loss": 0.0644, "step": 310400 }, { "epoch": 3.05, "grad_norm": 19.95488739013672, "learning_rate": 3.509105623243668e-06, "loss": 0.1606, "step": 310425 }, { "epoch": 3.05, "grad_norm": 6.695782661437988, "learning_rate": 3.508981500789419e-06, "loss": 0.0725, "step": 310450 }, { "epoch": 3.05, "grad_norm": 4.162243366241455, "learning_rate": 3.5088573783351703e-06, "loss": 0.1755, "step": 310475 }, { "epoch": 3.05, "grad_norm": 9.951175689697266, "learning_rate": 3.5087332558809223e-06, "loss": 0.0567, "step": 310500 }, { "epoch": 3.05, "grad_norm": 4.208264350891113, "learning_rate": 3.5086091334266735e-06, "loss": 0.1816, "step": 310525 }, { "epoch": 3.05, "grad_norm": 8.400197982788086, "learning_rate": 3.508485010972425e-06, "loss": 0.0776, "step": 310550 }, { "epoch": 3.05, "grad_norm": 4.590497970581055, "learning_rate": 3.508360888518177e-06, "loss": 0.1872, "step": 310575 }, { "epoch": 3.05, "grad_norm": 9.516911506652832, "learning_rate": 3.5082367660639284e-06, "loss": 0.0859, "step": 310600 }, { "epoch": 3.05, "grad_norm": 3.328463554382324, "learning_rate": 3.5081126436096796e-06, "loss": 0.1955, "step": 310625 }, { "epoch": 3.05, "grad_norm": 5.521413326263428, "learning_rate": 3.5079885211554317e-06, "loss": 0.0555, "step": 310650 }, { "epoch": 3.05, "grad_norm": 4.388458728790283, "learning_rate": 3.507864398701183e-06, "loss": 0.1882, "step": 310675 }, { "epoch": 3.05, "grad_norm": 3.839552879333496, "learning_rate": 3.507740276246934e-06, "loss": 0.0437, "step": 310700 }, { "epoch": 3.06, "grad_norm": 4.393055438995361, "learning_rate": 3.507616153792686e-06, "loss": 0.1884, "step": 310725 }, { "epoch": 3.06, "grad_norm": 9.886960983276367, "learning_rate": 3.5074920313384374e-06, "loss": 0.0663, "step": 310750 }, { "epoch": 3.06, "grad_norm": 3.6995885372161865, "learning_rate": 3.507367908884189e-06, "loss": 0.1671, "step": 310775 }, { "epoch": 3.06, "grad_norm": 6.063545227050781, "learning_rate": 3.5072437864299407e-06, "loss": 0.0527, "step": 310800 }, { "epoch": 3.06, "grad_norm": 4.172964096069336, "learning_rate": 3.5071196639756923e-06, "loss": 0.1869, "step": 310825 }, { "epoch": 3.06, "grad_norm": 11.9395751953125, "learning_rate": 3.5069955415214435e-06, "loss": 0.0922, "step": 310850 }, { "epoch": 3.06, "grad_norm": 4.6457977294921875, "learning_rate": 3.5068714190671956e-06, "loss": 0.136, "step": 310875 }, { "epoch": 3.06, "grad_norm": 8.443021774291992, "learning_rate": 3.5067472966129468e-06, "loss": 0.0768, "step": 310900 }, { "epoch": 3.06, "grad_norm": 3.9992423057556152, "learning_rate": 3.506623174158698e-06, "loss": 0.1858, "step": 310925 }, { "epoch": 3.06, "grad_norm": 6.2285943031311035, "learning_rate": 3.50649905170445e-06, "loss": 0.0639, "step": 310950 }, { "epoch": 3.06, "grad_norm": 4.579965591430664, "learning_rate": 3.5063749292502013e-06, "loss": 0.1757, "step": 310975 }, { "epoch": 3.06, "grad_norm": 2.1963131427764893, "learning_rate": 3.506250806795953e-06, "loss": 0.0356, "step": 311000 }, { "epoch": 3.06, "grad_norm": 4.444215297698975, "learning_rate": 3.5061266843417045e-06, "loss": 0.1547, "step": 311025 }, { "epoch": 3.06, "grad_norm": 9.666659355163574, "learning_rate": 3.506002561887456e-06, "loss": 0.0721, "step": 311050 }, { "epoch": 3.06, "grad_norm": 3.608043909072876, "learning_rate": 3.5058784394332074e-06, "loss": 0.1942, "step": 311075 }, { "epoch": 3.06, "grad_norm": 4.12461519241333, "learning_rate": 3.5057543169789594e-06, "loss": 0.0573, "step": 311100 }, { "epoch": 3.06, "grad_norm": 3.8843278884887695, "learning_rate": 3.5056301945247106e-06, "loss": 0.1743, "step": 311125 }, { "epoch": 3.06, "grad_norm": 5.140388011932373, "learning_rate": 3.505506072070462e-06, "loss": 0.0701, "step": 311150 }, { "epoch": 3.06, "grad_norm": 3.9862220287323, "learning_rate": 3.505381949616214e-06, "loss": 0.1147, "step": 311175 }, { "epoch": 3.06, "grad_norm": 6.103080749511719, "learning_rate": 3.505257827161965e-06, "loss": 0.0654, "step": 311200 }, { "epoch": 3.06, "grad_norm": 7.842987060546875, "learning_rate": 3.5051337047077168e-06, "loss": 0.211, "step": 311225 }, { "epoch": 3.06, "grad_norm": 6.639566898345947, "learning_rate": 3.5050095822534684e-06, "loss": 0.0606, "step": 311250 }, { "epoch": 3.06, "grad_norm": 6.478495121002197, "learning_rate": 3.50488545979922e-06, "loss": 0.1953, "step": 311275 }, { "epoch": 3.06, "grad_norm": 10.731019973754883, "learning_rate": 3.5047613373449712e-06, "loss": 0.0837, "step": 311300 }, { "epoch": 3.06, "grad_norm": 4.195470333099365, "learning_rate": 3.5046372148907225e-06, "loss": 0.1865, "step": 311325 }, { "epoch": 3.06, "grad_norm": 2.1722404956817627, "learning_rate": 3.5045130924364745e-06, "loss": 0.0663, "step": 311350 }, { "epoch": 3.06, "grad_norm": 4.801673889160156, "learning_rate": 3.5043889699822257e-06, "loss": 0.1807, "step": 311375 }, { "epoch": 3.06, "grad_norm": 7.925765037536621, "learning_rate": 3.5042648475279774e-06, "loss": 0.063, "step": 311400 }, { "epoch": 3.06, "grad_norm": 5.744657039642334, "learning_rate": 3.504140725073729e-06, "loss": 0.1678, "step": 311425 }, { "epoch": 3.06, "grad_norm": 7.157425403594971, "learning_rate": 3.5040166026194806e-06, "loss": 0.075, "step": 311450 }, { "epoch": 3.06, "grad_norm": 4.022615909576416, "learning_rate": 3.503892480165232e-06, "loss": 0.1495, "step": 311475 }, { "epoch": 3.06, "grad_norm": 29.25388526916504, "learning_rate": 3.503768357710984e-06, "loss": 0.0636, "step": 311500 }, { "epoch": 3.06, "grad_norm": 5.23932409286499, "learning_rate": 3.503644235256735e-06, "loss": 0.1505, "step": 311525 }, { "epoch": 3.06, "grad_norm": 40.72761917114258, "learning_rate": 3.5035201128024863e-06, "loss": 0.0758, "step": 311550 }, { "epoch": 3.06, "grad_norm": 4.726210117340088, "learning_rate": 3.5033959903482384e-06, "loss": 0.1725, "step": 311575 }, { "epoch": 3.06, "grad_norm": 7.9999799728393555, "learning_rate": 3.5032718678939896e-06, "loss": 0.0689, "step": 311600 }, { "epoch": 3.06, "grad_norm": 5.152058124542236, "learning_rate": 3.5031477454397412e-06, "loss": 0.1801, "step": 311625 }, { "epoch": 3.06, "grad_norm": 4.039407253265381, "learning_rate": 3.503023622985493e-06, "loss": 0.0656, "step": 311650 }, { "epoch": 3.06, "grad_norm": 4.718003749847412, "learning_rate": 3.5028995005312445e-06, "loss": 0.1515, "step": 311675 }, { "epoch": 3.06, "grad_norm": 6.0063676834106445, "learning_rate": 3.5027753780769957e-06, "loss": 0.0667, "step": 311700 }, { "epoch": 3.06, "grad_norm": 4.682305812835693, "learning_rate": 3.5026562205209176e-06, "loss": 0.1544, "step": 311725 }, { "epoch": 3.07, "grad_norm": 9.45790958404541, "learning_rate": 3.502532098066669e-06, "loss": 0.0506, "step": 311750 }, { "epoch": 3.07, "grad_norm": 5.225525379180908, "learning_rate": 3.502407975612421e-06, "loss": 0.1717, "step": 311775 }, { "epoch": 3.07, "grad_norm": 4.632591247558594, "learning_rate": 3.502283853158172e-06, "loss": 0.0639, "step": 311800 }, { "epoch": 3.07, "grad_norm": 5.979068279266357, "learning_rate": 3.5021597307039233e-06, "loss": 0.1594, "step": 311825 }, { "epoch": 3.07, "grad_norm": 10.823312759399414, "learning_rate": 3.502035608249675e-06, "loss": 0.0725, "step": 311850 }, { "epoch": 3.07, "grad_norm": 4.166099548339844, "learning_rate": 3.5019114857954265e-06, "loss": 0.1476, "step": 311875 }, { "epoch": 3.07, "grad_norm": 5.937107563018799, "learning_rate": 3.501787363341178e-06, "loss": 0.0635, "step": 311900 }, { "epoch": 3.07, "grad_norm": 5.692667007446289, "learning_rate": 3.5016632408869294e-06, "loss": 0.1783, "step": 311925 }, { "epoch": 3.07, "grad_norm": 5.407511234283447, "learning_rate": 3.5015391184326815e-06, "loss": 0.0502, "step": 311950 }, { "epoch": 3.07, "grad_norm": 2.9899702072143555, "learning_rate": 3.5014149959784327e-06, "loss": 0.1903, "step": 311975 }, { "epoch": 3.07, "grad_norm": 7.244032382965088, "learning_rate": 3.501290873524184e-06, "loss": 0.0517, "step": 312000 }, { "epoch": 3.07, "grad_norm": 4.70057487487793, "learning_rate": 3.501166751069936e-06, "loss": 0.1535, "step": 312025 }, { "epoch": 3.07, "grad_norm": 1.4190500974655151, "learning_rate": 3.501042628615687e-06, "loss": 0.0609, "step": 312050 }, { "epoch": 3.07, "grad_norm": 3.272252082824707, "learning_rate": 3.5009185061614388e-06, "loss": 0.1809, "step": 312075 }, { "epoch": 3.07, "grad_norm": 4.815194606781006, "learning_rate": 3.5007943837071904e-06, "loss": 0.0838, "step": 312100 }, { "epoch": 3.07, "grad_norm": 3.4772651195526123, "learning_rate": 3.500670261252942e-06, "loss": 0.155, "step": 312125 }, { "epoch": 3.07, "grad_norm": 9.412394523620605, "learning_rate": 3.5005461387986933e-06, "loss": 0.072, "step": 312150 }, { "epoch": 3.07, "grad_norm": 3.0972158908843994, "learning_rate": 3.5004220163444453e-06, "loss": 0.1814, "step": 312175 }, { "epoch": 3.07, "grad_norm": 9.544875144958496, "learning_rate": 3.5002978938901965e-06, "loss": 0.0574, "step": 312200 }, { "epoch": 3.07, "grad_norm": 5.295288562774658, "learning_rate": 3.5001737714359477e-06, "loss": 0.1786, "step": 312225 }, { "epoch": 3.07, "grad_norm": 6.338667869567871, "learning_rate": 3.5000496489817e-06, "loss": 0.0875, "step": 312250 }, { "epoch": 3.07, "grad_norm": 7.0934672355651855, "learning_rate": 3.499925526527451e-06, "loss": 0.194, "step": 312275 }, { "epoch": 3.07, "grad_norm": 3.5577664375305176, "learning_rate": 3.4998014040732026e-06, "loss": 0.0655, "step": 312300 }, { "epoch": 3.07, "grad_norm": 2.941560983657837, "learning_rate": 3.4996772816189543e-06, "loss": 0.1648, "step": 312325 }, { "epoch": 3.07, "grad_norm": 8.840581893920898, "learning_rate": 3.499553159164706e-06, "loss": 0.0532, "step": 312350 }, { "epoch": 3.07, "grad_norm": 5.116329193115234, "learning_rate": 3.499429036710457e-06, "loss": 0.1717, "step": 312375 }, { "epoch": 3.07, "grad_norm": 2.1311230659484863, "learning_rate": 3.499304914256209e-06, "loss": 0.0571, "step": 312400 }, { "epoch": 3.07, "grad_norm": 4.1010613441467285, "learning_rate": 3.4991807918019604e-06, "loss": 0.1682, "step": 312425 }, { "epoch": 3.07, "grad_norm": 3.874638557434082, "learning_rate": 3.4990566693477116e-06, "loss": 0.0859, "step": 312450 }, { "epoch": 3.07, "grad_norm": 6.963963985443115, "learning_rate": 3.4989325468934637e-06, "loss": 0.1336, "step": 312475 }, { "epoch": 3.07, "grad_norm": 9.273247718811035, "learning_rate": 3.498808424439215e-06, "loss": 0.062, "step": 312500 }, { "epoch": 3.07, "grad_norm": 3.546595335006714, "learning_rate": 3.4986843019849665e-06, "loss": 0.1691, "step": 312525 }, { "epoch": 3.07, "grad_norm": 8.791033744812012, "learning_rate": 3.498560179530718e-06, "loss": 0.0863, "step": 312550 }, { "epoch": 3.07, "grad_norm": 4.351716041564941, "learning_rate": 3.4984360570764698e-06, "loss": 0.1886, "step": 312575 }, { "epoch": 3.07, "grad_norm": 9.281892776489258, "learning_rate": 3.498311934622221e-06, "loss": 0.071, "step": 312600 }, { "epoch": 3.07, "grad_norm": 5.85398530960083, "learning_rate": 3.498187812167973e-06, "loss": 0.1861, "step": 312625 }, { "epoch": 3.07, "grad_norm": 6.194692611694336, "learning_rate": 3.4980636897137243e-06, "loss": 0.0621, "step": 312650 }, { "epoch": 3.07, "grad_norm": 3.4674909114837646, "learning_rate": 3.497939567259476e-06, "loss": 0.1624, "step": 312675 }, { "epoch": 3.07, "grad_norm": 7.5919318199157715, "learning_rate": 3.497815444805227e-06, "loss": 0.0549, "step": 312700 }, { "epoch": 3.07, "grad_norm": 3.844168186187744, "learning_rate": 3.497691322350979e-06, "loss": 0.1731, "step": 312725 }, { "epoch": 3.08, "grad_norm": 18.188888549804688, "learning_rate": 3.4975671998967304e-06, "loss": 0.1004, "step": 312750 }, { "epoch": 3.08, "grad_norm": 5.319461822509766, "learning_rate": 3.4974430774424816e-06, "loss": 0.1806, "step": 312775 }, { "epoch": 3.08, "grad_norm": 9.20726203918457, "learning_rate": 3.4973189549882337e-06, "loss": 0.063, "step": 312800 }, { "epoch": 3.08, "grad_norm": 6.597368240356445, "learning_rate": 3.497194832533985e-06, "loss": 0.1841, "step": 312825 }, { "epoch": 3.08, "grad_norm": 7.394240856170654, "learning_rate": 3.4970707100797365e-06, "loss": 0.0521, "step": 312850 }, { "epoch": 3.08, "grad_norm": 4.810920715332031, "learning_rate": 3.496946587625488e-06, "loss": 0.1544, "step": 312875 }, { "epoch": 3.08, "grad_norm": 4.0875468254089355, "learning_rate": 3.4968224651712398e-06, "loss": 0.0677, "step": 312900 }, { "epoch": 3.08, "grad_norm": 6.753295421600342, "learning_rate": 3.496698342716991e-06, "loss": 0.1611, "step": 312925 }, { "epoch": 3.08, "grad_norm": 7.001775741577148, "learning_rate": 3.496574220262743e-06, "loss": 0.0629, "step": 312950 }, { "epoch": 3.08, "grad_norm": 4.26339054107666, "learning_rate": 3.4964500978084943e-06, "loss": 0.1729, "step": 312975 }, { "epoch": 3.08, "grad_norm": 8.878083229064941, "learning_rate": 3.4963259753542455e-06, "loss": 0.0855, "step": 313000 }, { "epoch": 3.08, "grad_norm": 5.098933219909668, "learning_rate": 3.4962018528999975e-06, "loss": 0.1665, "step": 313025 }, { "epoch": 3.08, "grad_norm": 8.777400016784668, "learning_rate": 3.4960777304457487e-06, "loss": 0.0679, "step": 313050 }, { "epoch": 3.08, "grad_norm": 5.1219162940979, "learning_rate": 3.4959536079915004e-06, "loss": 0.2265, "step": 313075 }, { "epoch": 3.08, "grad_norm": 2.804800033569336, "learning_rate": 3.495829485537252e-06, "loss": 0.0601, "step": 313100 }, { "epoch": 3.08, "grad_norm": 4.836734771728516, "learning_rate": 3.4957053630830036e-06, "loss": 0.1506, "step": 313125 }, { "epoch": 3.08, "grad_norm": 7.861556053161621, "learning_rate": 3.495581240628755e-06, "loss": 0.078, "step": 313150 }, { "epoch": 3.08, "grad_norm": 4.933216571807861, "learning_rate": 3.495457118174507e-06, "loss": 0.1942, "step": 313175 }, { "epoch": 3.08, "grad_norm": 10.233813285827637, "learning_rate": 3.495332995720258e-06, "loss": 0.0587, "step": 313200 }, { "epoch": 3.08, "grad_norm": 5.933646202087402, "learning_rate": 3.4952088732660093e-06, "loss": 0.1444, "step": 313225 }, { "epoch": 3.08, "grad_norm": 2.925628662109375, "learning_rate": 3.4950847508117614e-06, "loss": 0.0512, "step": 313250 }, { "epoch": 3.08, "grad_norm": 3.347799062728882, "learning_rate": 3.4949606283575126e-06, "loss": 0.1792, "step": 313275 }, { "epoch": 3.08, "grad_norm": 8.283670425415039, "learning_rate": 3.4948365059032642e-06, "loss": 0.0961, "step": 313300 }, { "epoch": 3.08, "grad_norm": 4.813540935516357, "learning_rate": 3.494712383449016e-06, "loss": 0.1695, "step": 313325 }, { "epoch": 3.08, "grad_norm": 5.133530616760254, "learning_rate": 3.4945882609947675e-06, "loss": 0.0697, "step": 313350 }, { "epoch": 3.08, "grad_norm": 4.508119583129883, "learning_rate": 3.4944641385405187e-06, "loss": 0.187, "step": 313375 }, { "epoch": 3.08, "grad_norm": 11.834474563598633, "learning_rate": 3.4943400160862708e-06, "loss": 0.0899, "step": 313400 }, { "epoch": 3.08, "grad_norm": 6.988489151000977, "learning_rate": 3.494215893632022e-06, "loss": 0.1965, "step": 313425 }, { "epoch": 3.08, "grad_norm": 10.957048416137695, "learning_rate": 3.494091771177773e-06, "loss": 0.0686, "step": 313450 }, { "epoch": 3.08, "grad_norm": 5.863590717315674, "learning_rate": 3.4939676487235253e-06, "loss": 0.1809, "step": 313475 }, { "epoch": 3.08, "grad_norm": 11.456145286560059, "learning_rate": 3.4938435262692765e-06, "loss": 0.0609, "step": 313500 }, { "epoch": 3.08, "grad_norm": 6.624556064605713, "learning_rate": 3.493719403815028e-06, "loss": 0.1649, "step": 313525 }, { "epoch": 3.08, "grad_norm": 8.956608772277832, "learning_rate": 3.4935952813607793e-06, "loss": 0.0675, "step": 313550 }, { "epoch": 3.08, "grad_norm": 5.581142902374268, "learning_rate": 3.4934711589065314e-06, "loss": 0.1668, "step": 313575 }, { "epoch": 3.08, "grad_norm": 15.443349838256836, "learning_rate": 3.4933470364522826e-06, "loss": 0.0746, "step": 313600 }, { "epoch": 3.08, "grad_norm": 4.026510238647461, "learning_rate": 3.493222913998034e-06, "loss": 0.2193, "step": 313625 }, { "epoch": 3.08, "grad_norm": 10.98933219909668, "learning_rate": 3.493098791543786e-06, "loss": 0.06, "step": 313650 }, { "epoch": 3.08, "grad_norm": 2.3333332538604736, "learning_rate": 3.492974669089537e-06, "loss": 0.1721, "step": 313675 }, { "epoch": 3.08, "grad_norm": 5.134000778198242, "learning_rate": 3.4928505466352887e-06, "loss": 0.07, "step": 313700 }, { "epoch": 3.08, "grad_norm": 3.898483991622925, "learning_rate": 3.4927264241810403e-06, "loss": 0.194, "step": 313725 }, { "epoch": 3.08, "grad_norm": 5.480495452880859, "learning_rate": 3.492602301726792e-06, "loss": 0.0781, "step": 313750 }, { "epoch": 3.09, "grad_norm": 3.3709843158721924, "learning_rate": 3.492478179272543e-06, "loss": 0.1939, "step": 313775 }, { "epoch": 3.09, "grad_norm": 11.71998405456543, "learning_rate": 3.4923540568182952e-06, "loss": 0.075, "step": 313800 }, { "epoch": 3.09, "grad_norm": 4.496960163116455, "learning_rate": 3.4922299343640465e-06, "loss": 0.1854, "step": 313825 }, { "epoch": 3.09, "grad_norm": 12.103014945983887, "learning_rate": 3.4921058119097977e-06, "loss": 0.0735, "step": 313850 }, { "epoch": 3.09, "grad_norm": 4.127076625823975, "learning_rate": 3.4919816894555497e-06, "loss": 0.1877, "step": 313875 }, { "epoch": 3.09, "grad_norm": 7.146984577178955, "learning_rate": 3.491857567001301e-06, "loss": 0.064, "step": 313900 }, { "epoch": 3.09, "grad_norm": 3.990473508834839, "learning_rate": 3.4917334445470526e-06, "loss": 0.1767, "step": 313925 }, { "epoch": 3.09, "grad_norm": 14.006081581115723, "learning_rate": 3.491609322092804e-06, "loss": 0.0638, "step": 313950 }, { "epoch": 3.09, "grad_norm": 3.978238344192505, "learning_rate": 3.491485199638556e-06, "loss": 0.1842, "step": 313975 }, { "epoch": 3.09, "grad_norm": 12.745955467224121, "learning_rate": 3.491361077184307e-06, "loss": 0.0755, "step": 314000 }, { "epoch": 3.09, "grad_norm": 8.92977523803711, "learning_rate": 3.491236954730059e-06, "loss": 0.157, "step": 314025 }, { "epoch": 3.09, "grad_norm": 4.254782199859619, "learning_rate": 3.4911128322758103e-06, "loss": 0.0449, "step": 314050 }, { "epoch": 3.09, "grad_norm": 2.473921537399292, "learning_rate": 3.4909887098215615e-06, "loss": 0.157, "step": 314075 }, { "epoch": 3.09, "grad_norm": 10.591944694519043, "learning_rate": 3.4908645873673136e-06, "loss": 0.0606, "step": 314100 }, { "epoch": 3.09, "grad_norm": 4.430069446563721, "learning_rate": 3.4907454298112346e-06, "loss": 0.1978, "step": 314125 }, { "epoch": 3.09, "grad_norm": 6.331576824188232, "learning_rate": 3.4906213073569863e-06, "loss": 0.0724, "step": 314150 }, { "epoch": 3.09, "grad_norm": 4.937070369720459, "learning_rate": 3.490497184902738e-06, "loss": 0.1646, "step": 314175 }, { "epoch": 3.09, "grad_norm": 1.5277386903762817, "learning_rate": 3.4903730624484895e-06, "loss": 0.0651, "step": 314200 }, { "epoch": 3.09, "grad_norm": 6.029321670532227, "learning_rate": 3.4902489399942407e-06, "loss": 0.1801, "step": 314225 }, { "epoch": 3.09, "grad_norm": 9.367512702941895, "learning_rate": 3.490124817539993e-06, "loss": 0.0809, "step": 314250 }, { "epoch": 3.09, "grad_norm": 5.5285844802856445, "learning_rate": 3.490000695085744e-06, "loss": 0.173, "step": 314275 }, { "epoch": 3.09, "grad_norm": 13.087508201599121, "learning_rate": 3.4898765726314952e-06, "loss": 0.0706, "step": 314300 }, { "epoch": 3.09, "grad_norm": 5.536531448364258, "learning_rate": 3.4897524501772473e-06, "loss": 0.2269, "step": 314325 }, { "epoch": 3.09, "grad_norm": 5.575429916381836, "learning_rate": 3.4896283277229985e-06, "loss": 0.0518, "step": 314350 }, { "epoch": 3.09, "grad_norm": 4.422136306762695, "learning_rate": 3.48950420526875e-06, "loss": 0.1618, "step": 314375 }, { "epoch": 3.09, "grad_norm": 11.917940139770508, "learning_rate": 3.4893800828145018e-06, "loss": 0.0734, "step": 314400 }, { "epoch": 3.09, "grad_norm": 6.14163875579834, "learning_rate": 3.4892559603602534e-06, "loss": 0.1637, "step": 314425 }, { "epoch": 3.09, "grad_norm": 3.7003023624420166, "learning_rate": 3.4891318379060046e-06, "loss": 0.0624, "step": 314450 }, { "epoch": 3.09, "grad_norm": 4.042944431304932, "learning_rate": 3.4890077154517567e-06, "loss": 0.1483, "step": 314475 }, { "epoch": 3.09, "grad_norm": 10.082275390625, "learning_rate": 3.488883592997508e-06, "loss": 0.0393, "step": 314500 }, { "epoch": 3.09, "grad_norm": 7.992582321166992, "learning_rate": 3.488759470543259e-06, "loss": 0.1845, "step": 314525 }, { "epoch": 3.09, "grad_norm": 6.947113037109375, "learning_rate": 3.488635348089011e-06, "loss": 0.0598, "step": 314550 }, { "epoch": 3.09, "grad_norm": 2.7417352199554443, "learning_rate": 3.4885112256347624e-06, "loss": 0.1671, "step": 314575 }, { "epoch": 3.09, "grad_norm": 7.3866095542907715, "learning_rate": 3.488387103180514e-06, "loss": 0.0729, "step": 314600 }, { "epoch": 3.09, "grad_norm": 4.427454471588135, "learning_rate": 3.4882629807262656e-06, "loss": 0.1748, "step": 314625 }, { "epoch": 3.09, "grad_norm": 6.5407185554504395, "learning_rate": 3.4881388582720173e-06, "loss": 0.0635, "step": 314650 }, { "epoch": 3.09, "grad_norm": 5.684469699859619, "learning_rate": 3.4880147358177685e-06, "loss": 0.1523, "step": 314675 }, { "epoch": 3.09, "grad_norm": 9.186927795410156, "learning_rate": 3.4878906133635205e-06, "loss": 0.0469, "step": 314700 }, { "epoch": 3.09, "grad_norm": 5.998155117034912, "learning_rate": 3.4877664909092717e-06, "loss": 0.1979, "step": 314725 }, { "epoch": 3.09, "grad_norm": 10.30776309967041, "learning_rate": 3.487642368455023e-06, "loss": 0.0601, "step": 314750 }, { "epoch": 3.09, "grad_norm": 4.149929523468018, "learning_rate": 3.487518246000775e-06, "loss": 0.1731, "step": 314775 }, { "epoch": 3.1, "grad_norm": 7.502527236938477, "learning_rate": 3.4873941235465262e-06, "loss": 0.0824, "step": 314800 }, { "epoch": 3.1, "grad_norm": 5.622851371765137, "learning_rate": 3.487270001092278e-06, "loss": 0.1578, "step": 314825 }, { "epoch": 3.1, "grad_norm": 6.826806545257568, "learning_rate": 3.4871458786380295e-06, "loss": 0.0532, "step": 314850 }, { "epoch": 3.1, "grad_norm": 4.681291103363037, "learning_rate": 3.487021756183781e-06, "loss": 0.182, "step": 314875 }, { "epoch": 3.1, "grad_norm": 7.483400344848633, "learning_rate": 3.4868976337295323e-06, "loss": 0.047, "step": 314900 }, { "epoch": 3.1, "grad_norm": 5.384644508361816, "learning_rate": 3.4867735112752844e-06, "loss": 0.1823, "step": 314925 }, { "epoch": 3.1, "grad_norm": 16.681821823120117, "learning_rate": 3.4866493888210356e-06, "loss": 0.0784, "step": 314950 }, { "epoch": 3.1, "grad_norm": 4.641385555267334, "learning_rate": 3.486525266366787e-06, "loss": 0.168, "step": 314975 }, { "epoch": 3.1, "grad_norm": 8.328827857971191, "learning_rate": 3.4864011439125385e-06, "loss": 0.0442, "step": 315000 }, { "epoch": 3.1, "grad_norm": 3.061720371246338, "learning_rate": 3.48627702145829e-06, "loss": 0.1543, "step": 315025 }, { "epoch": 3.1, "grad_norm": 8.849480628967285, "learning_rate": 3.4861528990040417e-06, "loss": 0.0954, "step": 315050 }, { "epoch": 3.1, "grad_norm": 4.458996772766113, "learning_rate": 3.486028776549793e-06, "loss": 0.1799, "step": 315075 }, { "epoch": 3.1, "grad_norm": 1.635067105293274, "learning_rate": 3.485904654095545e-06, "loss": 0.0511, "step": 315100 }, { "epoch": 3.1, "grad_norm": 5.862024307250977, "learning_rate": 3.485780531641296e-06, "loss": 0.1852, "step": 315125 }, { "epoch": 3.1, "grad_norm": 5.823683261871338, "learning_rate": 3.4856564091870474e-06, "loss": 0.0694, "step": 315150 }, { "epoch": 3.1, "grad_norm": 6.714266300201416, "learning_rate": 3.4855322867327995e-06, "loss": 0.2084, "step": 315175 }, { "epoch": 3.1, "grad_norm": 9.832873344421387, "learning_rate": 3.4854081642785507e-06, "loss": 0.0762, "step": 315200 }, { "epoch": 3.1, "grad_norm": 5.489398956298828, "learning_rate": 3.4852840418243023e-06, "loss": 0.2372, "step": 315225 }, { "epoch": 3.1, "grad_norm": 6.135317325592041, "learning_rate": 3.485159919370054e-06, "loss": 0.0764, "step": 315250 }, { "epoch": 3.1, "grad_norm": 4.201991081237793, "learning_rate": 3.4850357969158056e-06, "loss": 0.1615, "step": 315275 }, { "epoch": 3.1, "grad_norm": 4.138754367828369, "learning_rate": 3.484911674461557e-06, "loss": 0.0568, "step": 315300 }, { "epoch": 3.1, "grad_norm": 4.758011817932129, "learning_rate": 3.484787552007309e-06, "loss": 0.1792, "step": 315325 }, { "epoch": 3.1, "grad_norm": 6.269582271575928, "learning_rate": 3.48466342955306e-06, "loss": 0.0643, "step": 315350 }, { "epoch": 3.1, "grad_norm": 5.533068656921387, "learning_rate": 3.4845393070988117e-06, "loss": 0.222, "step": 315375 }, { "epoch": 3.1, "grad_norm": 4.075277328491211, "learning_rate": 3.4844151846445633e-06, "loss": 0.0631, "step": 315400 }, { "epoch": 3.1, "grad_norm": 5.559964656829834, "learning_rate": 3.484291062190315e-06, "loss": 0.1705, "step": 315425 }, { "epoch": 3.1, "grad_norm": 9.251086235046387, "learning_rate": 3.484166939736066e-06, "loss": 0.0695, "step": 315450 }, { "epoch": 3.1, "grad_norm": 4.750442981719971, "learning_rate": 3.484042817281818e-06, "loss": 0.1829, "step": 315475 }, { "epoch": 3.1, "grad_norm": 5.5782599449157715, "learning_rate": 3.4839186948275695e-06, "loss": 0.0648, "step": 315500 }, { "epoch": 3.1, "grad_norm": 4.4517621994018555, "learning_rate": 3.4837945723733207e-06, "loss": 0.1787, "step": 315525 }, { "epoch": 3.1, "grad_norm": 7.766657829284668, "learning_rate": 3.4836704499190727e-06, "loss": 0.0624, "step": 315550 }, { "epoch": 3.1, "grad_norm": 4.847758769989014, "learning_rate": 3.483546327464824e-06, "loss": 0.1631, "step": 315575 }, { "epoch": 3.1, "grad_norm": 1.481344223022461, "learning_rate": 3.4834222050105756e-06, "loss": 0.0485, "step": 315600 }, { "epoch": 3.1, "grad_norm": 6.323657989501953, "learning_rate": 3.483298082556327e-06, "loss": 0.1533, "step": 315625 }, { "epoch": 3.1, "grad_norm": 3.824277877807617, "learning_rate": 3.483173960102079e-06, "loss": 0.073, "step": 315650 }, { "epoch": 3.1, "grad_norm": 4.0193986892700195, "learning_rate": 3.48304983764783e-06, "loss": 0.1665, "step": 315675 }, { "epoch": 3.1, "grad_norm": 4.121397972106934, "learning_rate": 3.482925715193582e-06, "loss": 0.0581, "step": 315700 }, { "epoch": 3.1, "grad_norm": 17.53251838684082, "learning_rate": 3.4828015927393333e-06, "loss": 0.2055, "step": 315725 }, { "epoch": 3.1, "grad_norm": 3.936150074005127, "learning_rate": 3.4826774702850845e-06, "loss": 0.0803, "step": 315750 }, { "epoch": 3.1, "grad_norm": 4.678345680236816, "learning_rate": 3.4825533478308366e-06, "loss": 0.1165, "step": 315775 }, { "epoch": 3.1, "grad_norm": 5.990525245666504, "learning_rate": 3.482429225376588e-06, "loss": 0.0696, "step": 315800 }, { "epoch": 3.11, "grad_norm": 4.03419828414917, "learning_rate": 3.4823051029223394e-06, "loss": 0.2084, "step": 315825 }, { "epoch": 3.11, "grad_norm": 13.915008544921875, "learning_rate": 3.4821809804680907e-06, "loss": 0.0937, "step": 315850 }, { "epoch": 3.11, "grad_norm": 6.07094669342041, "learning_rate": 3.4820568580138427e-06, "loss": 0.1763, "step": 315875 }, { "epoch": 3.11, "grad_norm": 9.974981307983398, "learning_rate": 3.481932735559594e-06, "loss": 0.0717, "step": 315900 }, { "epoch": 3.11, "grad_norm": 5.684816360473633, "learning_rate": 3.481808613105345e-06, "loss": 0.1854, "step": 315925 }, { "epoch": 3.11, "grad_norm": 16.07151222229004, "learning_rate": 3.481684490651097e-06, "loss": 0.0898, "step": 315950 }, { "epoch": 3.11, "grad_norm": 5.206014156341553, "learning_rate": 3.4815603681968484e-06, "loss": 0.1583, "step": 315975 }, { "epoch": 3.11, "grad_norm": 7.117545127868652, "learning_rate": 3.4814362457426e-06, "loss": 0.068, "step": 316000 }, { "epoch": 3.11, "grad_norm": 3.9074175357818604, "learning_rate": 3.4813121232883517e-06, "loss": 0.1616, "step": 316025 }, { "epoch": 3.11, "grad_norm": 8.514152526855469, "learning_rate": 3.4811880008341033e-06, "loss": 0.0596, "step": 316050 }, { "epoch": 3.11, "grad_norm": 6.106632709503174, "learning_rate": 3.4810638783798545e-06, "loss": 0.1854, "step": 316075 }, { "epoch": 3.11, "grad_norm": 11.433956146240234, "learning_rate": 3.4809397559256066e-06, "loss": 0.0624, "step": 316100 }, { "epoch": 3.11, "grad_norm": 3.4545578956604004, "learning_rate": 3.480815633471358e-06, "loss": 0.1592, "step": 316125 }, { "epoch": 3.11, "grad_norm": 3.472935914993286, "learning_rate": 3.480691511017109e-06, "loss": 0.0666, "step": 316150 }, { "epoch": 3.11, "grad_norm": 4.687918663024902, "learning_rate": 3.480567388562861e-06, "loss": 0.1706, "step": 316175 }, { "epoch": 3.11, "grad_norm": 5.838892936706543, "learning_rate": 3.4804432661086123e-06, "loss": 0.0994, "step": 316200 }, { "epoch": 3.11, "grad_norm": 4.311790943145752, "learning_rate": 3.480319143654364e-06, "loss": 0.1751, "step": 316225 }, { "epoch": 3.11, "grad_norm": 9.946269035339355, "learning_rate": 3.4801950212001155e-06, "loss": 0.0798, "step": 316250 }, { "epoch": 3.11, "grad_norm": 5.406496524810791, "learning_rate": 3.480070898745867e-06, "loss": 0.219, "step": 316275 }, { "epoch": 3.11, "grad_norm": 7.119144439697266, "learning_rate": 3.4799467762916184e-06, "loss": 0.0673, "step": 316300 }, { "epoch": 3.11, "grad_norm": 3.115631341934204, "learning_rate": 3.4798226538373704e-06, "loss": 0.2223, "step": 316325 }, { "epoch": 3.11, "grad_norm": 4.790036201477051, "learning_rate": 3.4796985313831217e-06, "loss": 0.0547, "step": 316350 }, { "epoch": 3.11, "grad_norm": 5.002230644226074, "learning_rate": 3.479574408928873e-06, "loss": 0.156, "step": 316375 }, { "epoch": 3.11, "grad_norm": 10.888303756713867, "learning_rate": 3.479450286474625e-06, "loss": 0.0521, "step": 316400 }, { "epoch": 3.11, "grad_norm": 4.859684944152832, "learning_rate": 3.479326164020376e-06, "loss": 0.1579, "step": 316425 }, { "epoch": 3.11, "grad_norm": 3.9535834789276123, "learning_rate": 3.4792020415661278e-06, "loss": 0.0745, "step": 316450 }, { "epoch": 3.11, "grad_norm": 7.855865001678467, "learning_rate": 3.4790779191118794e-06, "loss": 0.1389, "step": 316475 }, { "epoch": 3.11, "grad_norm": 12.758987426757812, "learning_rate": 3.478953796657631e-06, "loss": 0.0675, "step": 316500 }, { "epoch": 3.11, "grad_norm": 5.760698318481445, "learning_rate": 3.4788296742033823e-06, "loss": 0.1787, "step": 316525 }, { "epoch": 3.11, "grad_norm": 7.724603652954102, "learning_rate": 3.4787055517491343e-06, "loss": 0.061, "step": 316550 }, { "epoch": 3.11, "grad_norm": 6.767778396606445, "learning_rate": 3.4785863941930553e-06, "loss": 0.1723, "step": 316575 }, { "epoch": 3.11, "grad_norm": 10.321316719055176, "learning_rate": 3.4784622717388066e-06, "loss": 0.0911, "step": 316600 }, { "epoch": 3.11, "grad_norm": 3.592846393585205, "learning_rate": 3.4783381492845586e-06, "loss": 0.2143, "step": 316625 }, { "epoch": 3.11, "grad_norm": 3.1489923000335693, "learning_rate": 3.47821402683031e-06, "loss": 0.0649, "step": 316650 }, { "epoch": 3.11, "grad_norm": 3.8839468955993652, "learning_rate": 3.4780899043760615e-06, "loss": 0.1694, "step": 316675 }, { "epoch": 3.11, "grad_norm": 9.617481231689453, "learning_rate": 3.477965781921813e-06, "loss": 0.0765, "step": 316700 }, { "epoch": 3.11, "grad_norm": 3.9023780822753906, "learning_rate": 3.4778416594675647e-06, "loss": 0.1821, "step": 316725 }, { "epoch": 3.11, "grad_norm": 10.139701843261719, "learning_rate": 3.477717537013316e-06, "loss": 0.0794, "step": 316750 }, { "epoch": 3.11, "grad_norm": 5.5018768310546875, "learning_rate": 3.477593414559068e-06, "loss": 0.2004, "step": 316775 }, { "epoch": 3.11, "grad_norm": 4.2661895751953125, "learning_rate": 3.477469292104819e-06, "loss": 0.0688, "step": 316800 }, { "epoch": 3.12, "grad_norm": 3.1624808311462402, "learning_rate": 3.4773451696505704e-06, "loss": 0.1943, "step": 316825 }, { "epoch": 3.12, "grad_norm": 6.2862229347229, "learning_rate": 3.4772210471963225e-06, "loss": 0.0791, "step": 316850 }, { "epoch": 3.12, "grad_norm": 3.753610134124756, "learning_rate": 3.4770969247420737e-06, "loss": 0.2256, "step": 316875 }, { "epoch": 3.12, "grad_norm": 12.744255065917969, "learning_rate": 3.4769728022878253e-06, "loss": 0.0681, "step": 316900 }, { "epoch": 3.12, "grad_norm": 5.471314430236816, "learning_rate": 3.476848679833577e-06, "loss": 0.135, "step": 316925 }, { "epoch": 3.12, "grad_norm": 6.498239517211914, "learning_rate": 3.4767245573793286e-06, "loss": 0.0684, "step": 316950 }, { "epoch": 3.12, "grad_norm": 3.3715879917144775, "learning_rate": 3.47660043492508e-06, "loss": 0.141, "step": 316975 }, { "epoch": 3.12, "grad_norm": 15.123456954956055, "learning_rate": 3.476476312470832e-06, "loss": 0.0583, "step": 317000 }, { "epoch": 3.12, "grad_norm": 6.705918312072754, "learning_rate": 3.476352190016583e-06, "loss": 0.1203, "step": 317025 }, { "epoch": 3.12, "grad_norm": 7.434397220611572, "learning_rate": 3.4762280675623343e-06, "loss": 0.0767, "step": 317050 }, { "epoch": 3.12, "grad_norm": 2.747565507888794, "learning_rate": 3.4761039451080863e-06, "loss": 0.1643, "step": 317075 }, { "epoch": 3.12, "grad_norm": 11.31870174407959, "learning_rate": 3.4759798226538376e-06, "loss": 0.1008, "step": 317100 }, { "epoch": 3.12, "grad_norm": 4.48163366317749, "learning_rate": 3.475855700199589e-06, "loss": 0.1965, "step": 317125 }, { "epoch": 3.12, "grad_norm": 14.689859390258789, "learning_rate": 3.475731577745341e-06, "loss": 0.1017, "step": 317150 }, { "epoch": 3.12, "grad_norm": 5.482476711273193, "learning_rate": 3.4756074552910925e-06, "loss": 0.1906, "step": 317175 }, { "epoch": 3.12, "grad_norm": 11.063631057739258, "learning_rate": 3.4754833328368437e-06, "loss": 0.0896, "step": 317200 }, { "epoch": 3.12, "grad_norm": 6.7623982429504395, "learning_rate": 3.4753592103825957e-06, "loss": 0.2074, "step": 317225 }, { "epoch": 3.12, "grad_norm": 23.784809112548828, "learning_rate": 3.475235087928347e-06, "loss": 0.0667, "step": 317250 }, { "epoch": 3.12, "grad_norm": 7.061573028564453, "learning_rate": 3.475110965474098e-06, "loss": 0.1812, "step": 317275 }, { "epoch": 3.12, "grad_norm": 9.939846992492676, "learning_rate": 3.47498684301985e-06, "loss": 0.0583, "step": 317300 }, { "epoch": 3.12, "grad_norm": 7.680535793304443, "learning_rate": 3.4748627205656014e-06, "loss": 0.1829, "step": 317325 }, { "epoch": 3.12, "grad_norm": 10.485686302185059, "learning_rate": 3.474738598111353e-06, "loss": 0.0928, "step": 317350 }, { "epoch": 3.12, "grad_norm": 8.999667167663574, "learning_rate": 3.4746144756571043e-06, "loss": 0.1599, "step": 317375 }, { "epoch": 3.12, "grad_norm": 10.329577445983887, "learning_rate": 3.4744903532028563e-06, "loss": 0.0536, "step": 317400 }, { "epoch": 3.12, "grad_norm": 9.30349063873291, "learning_rate": 3.4743662307486075e-06, "loss": 0.1957, "step": 317425 }, { "epoch": 3.12, "grad_norm": 7.0192952156066895, "learning_rate": 3.4742421082943588e-06, "loss": 0.071, "step": 317450 }, { "epoch": 3.12, "grad_norm": 5.444192886352539, "learning_rate": 3.474117985840111e-06, "loss": 0.2035, "step": 317475 }, { "epoch": 3.12, "grad_norm": 3.689601182937622, "learning_rate": 3.473993863385862e-06, "loss": 0.0866, "step": 317500 }, { "epoch": 3.12, "grad_norm": 5.401399612426758, "learning_rate": 3.4738697409316137e-06, "loss": 0.2287, "step": 317525 }, { "epoch": 3.12, "grad_norm": 7.118276596069336, "learning_rate": 3.4737456184773653e-06, "loss": 0.0606, "step": 317550 }, { "epoch": 3.12, "grad_norm": 6.765760898590088, "learning_rate": 3.473621496023117e-06, "loss": 0.1609, "step": 317575 }, { "epoch": 3.12, "grad_norm": 4.839415550231934, "learning_rate": 3.473497373568868e-06, "loss": 0.0768, "step": 317600 }, { "epoch": 3.12, "grad_norm": 3.991616725921631, "learning_rate": 3.47337325111462e-06, "loss": 0.1801, "step": 317625 }, { "epoch": 3.12, "grad_norm": 4.680896282196045, "learning_rate": 3.4732491286603714e-06, "loss": 0.0494, "step": 317650 }, { "epoch": 3.12, "grad_norm": 5.131191730499268, "learning_rate": 3.4731250062061226e-06, "loss": 0.1924, "step": 317675 }, { "epoch": 3.12, "grad_norm": 8.068124771118164, "learning_rate": 3.4730008837518747e-06, "loss": 0.0719, "step": 317700 }, { "epoch": 3.12, "grad_norm": 3.3169872760772705, "learning_rate": 3.472876761297626e-06, "loss": 0.1752, "step": 317725 }, { "epoch": 3.12, "grad_norm": 9.602498054504395, "learning_rate": 3.4727526388433775e-06, "loss": 0.0831, "step": 317750 }, { "epoch": 3.12, "grad_norm": 6.1314568519592285, "learning_rate": 3.472628516389129e-06, "loss": 0.1839, "step": 317775 }, { "epoch": 3.12, "grad_norm": 10.298955917358398, "learning_rate": 3.472504393934881e-06, "loss": 0.0771, "step": 317800 }, { "epoch": 3.12, "grad_norm": 5.514616012573242, "learning_rate": 3.472380271480632e-06, "loss": 0.1563, "step": 317825 }, { "epoch": 3.13, "grad_norm": 10.304206848144531, "learning_rate": 3.472256149026384e-06, "loss": 0.0527, "step": 317850 }, { "epoch": 3.13, "grad_norm": 5.269707202911377, "learning_rate": 3.4721320265721353e-06, "loss": 0.2091, "step": 317875 }, { "epoch": 3.13, "grad_norm": 9.202773094177246, "learning_rate": 3.4720079041178865e-06, "loss": 0.0721, "step": 317900 }, { "epoch": 3.13, "grad_norm": 3.875366449356079, "learning_rate": 3.4718837816636386e-06, "loss": 0.1829, "step": 317925 }, { "epoch": 3.13, "grad_norm": 8.615614891052246, "learning_rate": 3.4717596592093898e-06, "loss": 0.0671, "step": 317950 }, { "epoch": 3.13, "grad_norm": 10.785996437072754, "learning_rate": 3.4716355367551414e-06, "loss": 0.1386, "step": 317975 }, { "epoch": 3.13, "grad_norm": 6.9252848625183105, "learning_rate": 3.471511414300893e-06, "loss": 0.0564, "step": 318000 }, { "epoch": 3.13, "grad_norm": 3.563305616378784, "learning_rate": 3.4713872918466447e-06, "loss": 0.2022, "step": 318025 }, { "epoch": 3.13, "grad_norm": 2.5554494857788086, "learning_rate": 3.471263169392396e-06, "loss": 0.0849, "step": 318050 }, { "epoch": 3.13, "grad_norm": 3.817176580429077, "learning_rate": 3.471139046938148e-06, "loss": 0.1505, "step": 318075 }, { "epoch": 3.13, "grad_norm": 9.677630424499512, "learning_rate": 3.471014924483899e-06, "loss": 0.0436, "step": 318100 }, { "epoch": 3.13, "grad_norm": 5.621406555175781, "learning_rate": 3.4708908020296504e-06, "loss": 0.1903, "step": 318125 }, { "epoch": 3.13, "grad_norm": 3.5920262336730957, "learning_rate": 3.470766679575402e-06, "loss": 0.0471, "step": 318150 }, { "epoch": 3.13, "grad_norm": 5.355973243713379, "learning_rate": 3.4706425571211536e-06, "loss": 0.2041, "step": 318175 }, { "epoch": 3.13, "grad_norm": 8.155759811401367, "learning_rate": 3.4705184346669053e-06, "loss": 0.0824, "step": 318200 }, { "epoch": 3.13, "grad_norm": 4.4205193519592285, "learning_rate": 3.4703943122126565e-06, "loss": 0.183, "step": 318225 }, { "epoch": 3.13, "grad_norm": 0.9413344860076904, "learning_rate": 3.4702701897584085e-06, "loss": 0.0604, "step": 318250 }, { "epoch": 3.13, "grad_norm": 4.136692047119141, "learning_rate": 3.4701460673041597e-06, "loss": 0.1461, "step": 318275 }, { "epoch": 3.13, "grad_norm": 7.82545804977417, "learning_rate": 3.4700219448499114e-06, "loss": 0.0553, "step": 318300 }, { "epoch": 3.13, "grad_norm": 5.117485523223877, "learning_rate": 3.469897822395663e-06, "loss": 0.2179, "step": 318325 }, { "epoch": 3.13, "grad_norm": 5.806281566619873, "learning_rate": 3.4697736999414147e-06, "loss": 0.0609, "step": 318350 }, { "epoch": 3.13, "grad_norm": 3.2751238346099854, "learning_rate": 3.469654542385336e-06, "loss": 0.1952, "step": 318375 }, { "epoch": 3.13, "grad_norm": 8.5803804397583, "learning_rate": 3.4695304199310873e-06, "loss": 0.0949, "step": 318400 }, { "epoch": 3.13, "grad_norm": 5.597059726715088, "learning_rate": 3.469406297476839e-06, "loss": 0.1847, "step": 318425 }, { "epoch": 3.13, "grad_norm": 8.68205738067627, "learning_rate": 3.4692821750225906e-06, "loss": 0.0531, "step": 318450 }, { "epoch": 3.13, "grad_norm": 6.053707599639893, "learning_rate": 3.4691580525683422e-06, "loss": 0.1922, "step": 318475 }, { "epoch": 3.13, "grad_norm": 6.5096755027771, "learning_rate": 3.4690339301140934e-06, "loss": 0.0953, "step": 318500 }, { "epoch": 3.13, "grad_norm": 4.758052825927734, "learning_rate": 3.4689098076598455e-06, "loss": 0.1907, "step": 318525 }, { "epoch": 3.13, "grad_norm": 27.304834365844727, "learning_rate": 3.4687856852055967e-06, "loss": 0.0759, "step": 318550 }, { "epoch": 3.13, "grad_norm": 3.743499755859375, "learning_rate": 3.4686615627513483e-06, "loss": 0.1946, "step": 318575 }, { "epoch": 3.13, "grad_norm": 8.59914493560791, "learning_rate": 3.4685374402971e-06, "loss": 0.0799, "step": 318600 }, { "epoch": 3.13, "grad_norm": 5.507613182067871, "learning_rate": 3.4684133178428516e-06, "loss": 0.1596, "step": 318625 }, { "epoch": 3.13, "grad_norm": 10.587868690490723, "learning_rate": 3.468289195388603e-06, "loss": 0.0574, "step": 318650 }, { "epoch": 3.13, "grad_norm": 6.032569885253906, "learning_rate": 3.468165072934355e-06, "loss": 0.1725, "step": 318675 }, { "epoch": 3.13, "grad_norm": 4.67393684387207, "learning_rate": 3.468040950480106e-06, "loss": 0.069, "step": 318700 }, { "epoch": 3.13, "grad_norm": 9.103684425354004, "learning_rate": 3.4679168280258573e-06, "loss": 0.1761, "step": 318725 }, { "epoch": 3.13, "grad_norm": 7.18739128112793, "learning_rate": 3.467792705571609e-06, "loss": 0.0633, "step": 318750 }, { "epoch": 3.13, "grad_norm": 4.483482360839844, "learning_rate": 3.4676685831173606e-06, "loss": 0.1804, "step": 318775 }, { "epoch": 3.13, "grad_norm": 2.399289846420288, "learning_rate": 3.467544460663112e-06, "loss": 0.0635, "step": 318800 }, { "epoch": 3.13, "grad_norm": 4.645411014556885, "learning_rate": 3.4674203382088634e-06, "loss": 0.2134, "step": 318825 }, { "epoch": 3.13, "grad_norm": 7.271555423736572, "learning_rate": 3.4672962157546155e-06, "loss": 0.0674, "step": 318850 }, { "epoch": 3.14, "grad_norm": 5.334903240203857, "learning_rate": 3.4671720933003667e-06, "loss": 0.1624, "step": 318875 }, { "epoch": 3.14, "grad_norm": 14.680673599243164, "learning_rate": 3.467047970846118e-06, "loss": 0.0694, "step": 318900 }, { "epoch": 3.14, "grad_norm": 3.994687080383301, "learning_rate": 3.46692384839187e-06, "loss": 0.1708, "step": 318925 }, { "epoch": 3.14, "grad_norm": 5.481447696685791, "learning_rate": 3.466799725937621e-06, "loss": 0.0566, "step": 318950 }, { "epoch": 3.14, "grad_norm": 4.536896228790283, "learning_rate": 3.466675603483373e-06, "loss": 0.1848, "step": 318975 }, { "epoch": 3.14, "grad_norm": 13.544629096984863, "learning_rate": 3.4665514810291244e-06, "loss": 0.1214, "step": 319000 }, { "epoch": 3.14, "grad_norm": 3.1306793689727783, "learning_rate": 3.466427358574876e-06, "loss": 0.1736, "step": 319025 }, { "epoch": 3.14, "grad_norm": 13.599230766296387, "learning_rate": 3.4663032361206273e-06, "loss": 0.082, "step": 319050 }, { "epoch": 3.14, "grad_norm": 4.943576812744141, "learning_rate": 3.4661791136663793e-06, "loss": 0.209, "step": 319075 }, { "epoch": 3.14, "grad_norm": 6.320700168609619, "learning_rate": 3.4660549912121306e-06, "loss": 0.0665, "step": 319100 }, { "epoch": 3.14, "grad_norm": 4.3453569412231445, "learning_rate": 3.4659308687578818e-06, "loss": 0.1773, "step": 319125 }, { "epoch": 3.14, "grad_norm": 7.017457008361816, "learning_rate": 3.465806746303634e-06, "loss": 0.0645, "step": 319150 }, { "epoch": 3.14, "grad_norm": 2.9868648052215576, "learning_rate": 3.465682623849385e-06, "loss": 0.174, "step": 319175 }, { "epoch": 3.14, "grad_norm": 6.910758972167969, "learning_rate": 3.4655585013951367e-06, "loss": 0.0505, "step": 319200 }, { "epoch": 3.14, "grad_norm": 4.507150173187256, "learning_rate": 3.4654343789408883e-06, "loss": 0.1633, "step": 319225 }, { "epoch": 3.14, "grad_norm": 9.329671859741211, "learning_rate": 3.46531025648664e-06, "loss": 0.0622, "step": 319250 }, { "epoch": 3.14, "grad_norm": 5.002989292144775, "learning_rate": 3.465186134032391e-06, "loss": 0.1337, "step": 319275 }, { "epoch": 3.14, "grad_norm": 8.537242889404297, "learning_rate": 3.465062011578143e-06, "loss": 0.0707, "step": 319300 }, { "epoch": 3.14, "grad_norm": 5.860264778137207, "learning_rate": 3.4649378891238944e-06, "loss": 0.219, "step": 319325 }, { "epoch": 3.14, "grad_norm": 7.320828914642334, "learning_rate": 3.4648137666696456e-06, "loss": 0.0582, "step": 319350 }, { "epoch": 3.14, "grad_norm": 6.423178672790527, "learning_rate": 3.4646896442153977e-06, "loss": 0.1678, "step": 319375 }, { "epoch": 3.14, "grad_norm": 10.041244506835938, "learning_rate": 3.464565521761149e-06, "loss": 0.0923, "step": 319400 }, { "epoch": 3.14, "grad_norm": 6.375858306884766, "learning_rate": 3.4644413993069005e-06, "loss": 0.1854, "step": 319425 }, { "epoch": 3.14, "grad_norm": 7.946579456329346, "learning_rate": 3.464317276852652e-06, "loss": 0.0658, "step": 319450 }, { "epoch": 3.14, "grad_norm": 4.481818675994873, "learning_rate": 3.464193154398404e-06, "loss": 0.2103, "step": 319475 }, { "epoch": 3.14, "grad_norm": 3.357067823410034, "learning_rate": 3.464069031944155e-06, "loss": 0.0832, "step": 319500 }, { "epoch": 3.14, "grad_norm": 6.372377872467041, "learning_rate": 3.463944909489907e-06, "loss": 0.2018, "step": 319525 }, { "epoch": 3.14, "grad_norm": 8.569977760314941, "learning_rate": 3.4638207870356583e-06, "loss": 0.0626, "step": 319550 }, { "epoch": 3.14, "grad_norm": 5.371665954589844, "learning_rate": 3.4636966645814095e-06, "loss": 0.2038, "step": 319575 }, { "epoch": 3.14, "grad_norm": 3.630115509033203, "learning_rate": 3.463572542127161e-06, "loss": 0.0738, "step": 319600 }, { "epoch": 3.14, "grad_norm": 3.2364587783813477, "learning_rate": 3.4634484196729128e-06, "loss": 0.1866, "step": 319625 }, { "epoch": 3.14, "grad_norm": 11.159725189208984, "learning_rate": 3.4633242972186644e-06, "loss": 0.0718, "step": 319650 }, { "epoch": 3.14, "grad_norm": 3.16343355178833, "learning_rate": 3.4632001747644156e-06, "loss": 0.1843, "step": 319675 }, { "epoch": 3.14, "grad_norm": 13.576031684875488, "learning_rate": 3.4630760523101677e-06, "loss": 0.0958, "step": 319700 }, { "epoch": 3.14, "grad_norm": 6.078695297241211, "learning_rate": 3.462951929855919e-06, "loss": 0.1853, "step": 319725 }, { "epoch": 3.14, "grad_norm": 8.458929061889648, "learning_rate": 3.46282780740167e-06, "loss": 0.0658, "step": 319750 }, { "epoch": 3.14, "grad_norm": 2.992152214050293, "learning_rate": 3.462703684947422e-06, "loss": 0.1621, "step": 319775 }, { "epoch": 3.14, "grad_norm": 9.199078559875488, "learning_rate": 3.4625795624931734e-06, "loss": 0.0855, "step": 319800 }, { "epoch": 3.14, "grad_norm": 4.870015621185303, "learning_rate": 3.462455440038925e-06, "loss": 0.1954, "step": 319825 }, { "epoch": 3.14, "grad_norm": 6.942340850830078, "learning_rate": 3.4623313175846766e-06, "loss": 0.0575, "step": 319850 }, { "epoch": 3.15, "grad_norm": 5.944588661193848, "learning_rate": 3.4622071951304283e-06, "loss": 0.1738, "step": 319875 }, { "epoch": 3.15, "grad_norm": 7.755459785461426, "learning_rate": 3.4620830726761795e-06, "loss": 0.079, "step": 319900 }, { "epoch": 3.15, "grad_norm": 5.449110984802246, "learning_rate": 3.4619589502219315e-06, "loss": 0.1574, "step": 319925 }, { "epoch": 3.15, "grad_norm": 10.20115852355957, "learning_rate": 3.4618348277676828e-06, "loss": 0.0778, "step": 319950 }, { "epoch": 3.15, "grad_norm": 4.348048210144043, "learning_rate": 3.461710705313434e-06, "loss": 0.1714, "step": 319975 }, { "epoch": 3.15, "grad_norm": 7.843790531158447, "learning_rate": 3.461586582859186e-06, "loss": 0.0634, "step": 320000 }, { "epoch": 3.15, "eval_loss": 0.6461198925971985, "eval_runtime": 5930.6153, "eval_samples_per_second": 1.596, "eval_steps_per_second": 0.2, "eval_wer": 0.11903688920363098, "step": 320000 }, { "epoch": 3.15, "grad_norm": 5.93250846862793, "learning_rate": 3.4614624604049372e-06, "loss": 0.1628, "step": 320025 }, { "epoch": 3.15, "grad_norm": 3.8682878017425537, "learning_rate": 3.461338337950689e-06, "loss": 0.0599, "step": 320050 }, { "epoch": 3.15, "grad_norm": 3.563390016555786, "learning_rate": 3.4612142154964405e-06, "loss": 0.1789, "step": 320075 }, { "epoch": 3.15, "grad_norm": 9.089759826660156, "learning_rate": 3.461090093042192e-06, "loss": 0.0648, "step": 320100 }, { "epoch": 3.15, "grad_norm": 4.309138298034668, "learning_rate": 3.4609659705879433e-06, "loss": 0.1858, "step": 320125 }, { "epoch": 3.15, "grad_norm": 9.24865436553955, "learning_rate": 3.4608418481336954e-06, "loss": 0.071, "step": 320150 }, { "epoch": 3.15, "grad_norm": 3.9582936763763428, "learning_rate": 3.4607177256794466e-06, "loss": 0.1738, "step": 320175 }, { "epoch": 3.15, "grad_norm": 3.7073075771331787, "learning_rate": 3.460593603225198e-06, "loss": 0.0557, "step": 320200 }, { "epoch": 3.15, "grad_norm": 5.757822036743164, "learning_rate": 3.46046948077095e-06, "loss": 0.1698, "step": 320225 }, { "epoch": 3.15, "grad_norm": 10.082441329956055, "learning_rate": 3.460345358316701e-06, "loss": 0.0708, "step": 320250 }, { "epoch": 3.15, "grad_norm": 5.127831935882568, "learning_rate": 3.4602212358624527e-06, "loss": 0.1697, "step": 320275 }, { "epoch": 3.15, "grad_norm": 8.229788780212402, "learning_rate": 3.4600971134082044e-06, "loss": 0.0597, "step": 320300 }, { "epoch": 3.15, "grad_norm": 6.13027811050415, "learning_rate": 3.459972990953956e-06, "loss": 0.1769, "step": 320325 }, { "epoch": 3.15, "grad_norm": 6.381622791290283, "learning_rate": 3.4598488684997072e-06, "loss": 0.066, "step": 320350 }, { "epoch": 3.15, "grad_norm": 5.212952613830566, "learning_rate": 3.4597247460454593e-06, "loss": 0.1681, "step": 320375 }, { "epoch": 3.15, "grad_norm": 10.676789283752441, "learning_rate": 3.4596006235912105e-06, "loss": 0.0781, "step": 320400 }, { "epoch": 3.15, "grad_norm": 3.3040719032287598, "learning_rate": 3.4594765011369617e-06, "loss": 0.1739, "step": 320425 }, { "epoch": 3.15, "grad_norm": 10.082391738891602, "learning_rate": 3.4593523786827133e-06, "loss": 0.075, "step": 320450 }, { "epoch": 3.15, "grad_norm": 11.70291805267334, "learning_rate": 3.459228256228465e-06, "loss": 0.1821, "step": 320475 }, { "epoch": 3.15, "grad_norm": 5.9048991203308105, "learning_rate": 3.4591041337742166e-06, "loss": 0.0679, "step": 320500 }, { "epoch": 3.15, "grad_norm": 2.2884387969970703, "learning_rate": 3.458980011319968e-06, "loss": 0.1804, "step": 320525 }, { "epoch": 3.15, "grad_norm": 7.996448040008545, "learning_rate": 3.45885588886572e-06, "loss": 0.0567, "step": 320550 }, { "epoch": 3.15, "grad_norm": 6.955694675445557, "learning_rate": 3.458731766411471e-06, "loss": 0.1949, "step": 320575 }, { "epoch": 3.15, "grad_norm": 15.694332122802734, "learning_rate": 3.4586076439572223e-06, "loss": 0.0633, "step": 320600 }, { "epoch": 3.15, "grad_norm": 4.489861965179443, "learning_rate": 3.4584835215029744e-06, "loss": 0.1937, "step": 320625 }, { "epoch": 3.15, "grad_norm": 8.669236183166504, "learning_rate": 3.4583593990487256e-06, "loss": 0.078, "step": 320650 }, { "epoch": 3.15, "grad_norm": 6.8406243324279785, "learning_rate": 3.458235276594477e-06, "loss": 0.1763, "step": 320675 }, { "epoch": 3.15, "grad_norm": 10.567423820495605, "learning_rate": 3.458111154140229e-06, "loss": 0.0694, "step": 320700 }, { "epoch": 3.15, "grad_norm": 5.284891128540039, "learning_rate": 3.4579870316859805e-06, "loss": 0.1815, "step": 320725 }, { "epoch": 3.15, "grad_norm": 4.652205467224121, "learning_rate": 3.4578629092317317e-06, "loss": 0.0729, "step": 320750 }, { "epoch": 3.15, "grad_norm": 4.920511722564697, "learning_rate": 3.4577387867774837e-06, "loss": 0.2069, "step": 320775 }, { "epoch": 3.15, "grad_norm": 2.044074773788452, "learning_rate": 3.457614664323235e-06, "loss": 0.0845, "step": 320800 }, { "epoch": 3.15, "grad_norm": 5.699481964111328, "learning_rate": 3.457490541868986e-06, "loss": 0.1634, "step": 320825 }, { "epoch": 3.15, "grad_norm": 4.054605960845947, "learning_rate": 3.4573664194147382e-06, "loss": 0.0726, "step": 320850 }, { "epoch": 3.15, "grad_norm": 4.681130886077881, "learning_rate": 3.4572422969604894e-06, "loss": 0.1982, "step": 320875 }, { "epoch": 3.16, "grad_norm": 7.09308385848999, "learning_rate": 3.457118174506241e-06, "loss": 0.0575, "step": 320900 }, { "epoch": 3.16, "grad_norm": 4.620640754699707, "learning_rate": 3.4569940520519927e-06, "loss": 0.1715, "step": 320925 }, { "epoch": 3.16, "grad_norm": 8.895370483398438, "learning_rate": 3.4568699295977443e-06, "loss": 0.0625, "step": 320950 }, { "epoch": 3.16, "grad_norm": 4.904479503631592, "learning_rate": 3.4567458071434956e-06, "loss": 0.1774, "step": 320975 }, { "epoch": 3.16, "grad_norm": 7.875607013702393, "learning_rate": 3.4566216846892476e-06, "loss": 0.0775, "step": 321000 }, { "epoch": 3.16, "grad_norm": 3.8059146404266357, "learning_rate": 3.456497562234999e-06, "loss": 0.18, "step": 321025 }, { "epoch": 3.16, "grad_norm": 9.627573013305664, "learning_rate": 3.45637343978075e-06, "loss": 0.0647, "step": 321050 }, { "epoch": 3.16, "grad_norm": 9.7658052444458, "learning_rate": 3.456249317326502e-06, "loss": 0.2141, "step": 321075 }, { "epoch": 3.16, "grad_norm": 1.3005982637405396, "learning_rate": 3.4561251948722533e-06, "loss": 0.0667, "step": 321100 }, { "epoch": 3.16, "grad_norm": 3.502863883972168, "learning_rate": 3.456001072418005e-06, "loss": 0.182, "step": 321125 }, { "epoch": 3.16, "grad_norm": 11.979901313781738, "learning_rate": 3.4558769499637566e-06, "loss": 0.0646, "step": 321150 }, { "epoch": 3.16, "grad_norm": 6.21358585357666, "learning_rate": 3.455752827509508e-06, "loss": 0.1837, "step": 321175 }, { "epoch": 3.16, "grad_norm": 5.356195449829102, "learning_rate": 3.4556287050552594e-06, "loss": 0.092, "step": 321200 }, { "epoch": 3.16, "grad_norm": 2.3745222091674805, "learning_rate": 3.4555045826010115e-06, "loss": 0.179, "step": 321225 }, { "epoch": 3.16, "grad_norm": 6.253696441650391, "learning_rate": 3.4553804601467627e-06, "loss": 0.0711, "step": 321250 }, { "epoch": 3.16, "grad_norm": 8.252973556518555, "learning_rate": 3.4552563376925143e-06, "loss": 0.174, "step": 321275 }, { "epoch": 3.16, "grad_norm": 7.032034873962402, "learning_rate": 3.4551322152382655e-06, "loss": 0.0874, "step": 321300 }, { "epoch": 3.16, "grad_norm": 4.451848030090332, "learning_rate": 3.4550080927840176e-06, "loss": 0.185, "step": 321325 }, { "epoch": 3.16, "grad_norm": 2.4949281215667725, "learning_rate": 3.454883970329769e-06, "loss": 0.0568, "step": 321350 }, { "epoch": 3.16, "grad_norm": 4.231120586395264, "learning_rate": 3.45475984787552e-06, "loss": 0.1785, "step": 321375 }, { "epoch": 3.16, "grad_norm": 9.885098457336426, "learning_rate": 3.454635725421272e-06, "loss": 0.084, "step": 321400 }, { "epoch": 3.16, "grad_norm": 8.87685489654541, "learning_rate": 3.4545116029670233e-06, "loss": 0.1476, "step": 321425 }, { "epoch": 3.16, "grad_norm": 7.356952667236328, "learning_rate": 3.454387480512775e-06, "loss": 0.0727, "step": 321450 }, { "epoch": 3.16, "grad_norm": 4.0374064445495605, "learning_rate": 3.4542633580585266e-06, "loss": 0.1557, "step": 321475 }, { "epoch": 3.16, "grad_norm": 18.094139099121094, "learning_rate": 3.454139235604278e-06, "loss": 0.0723, "step": 321500 }, { "epoch": 3.16, "grad_norm": 4.5145583152771, "learning_rate": 3.4540151131500294e-06, "loss": 0.2115, "step": 321525 }, { "epoch": 3.16, "grad_norm": 9.84961223602295, "learning_rate": 3.4538909906957815e-06, "loss": 0.0715, "step": 321550 }, { "epoch": 3.16, "grad_norm": 4.503655433654785, "learning_rate": 3.4537668682415327e-06, "loss": 0.152, "step": 321575 }, { "epoch": 3.16, "grad_norm": 3.9424779415130615, "learning_rate": 3.453642745787284e-06, "loss": 0.0559, "step": 321600 }, { "epoch": 3.16, "grad_norm": 8.005290985107422, "learning_rate": 3.453518623333036e-06, "loss": 0.214, "step": 321625 }, { "epoch": 3.16, "grad_norm": 6.379487037658691, "learning_rate": 3.453394500878787e-06, "loss": 0.0594, "step": 321650 }, { "epoch": 3.16, "grad_norm": 6.996203422546387, "learning_rate": 3.453270378424539e-06, "loss": 0.176, "step": 321675 }, { "epoch": 3.16, "grad_norm": 8.128270149230957, "learning_rate": 3.4531462559702904e-06, "loss": 0.0668, "step": 321700 }, { "epoch": 3.16, "grad_norm": 6.643189907073975, "learning_rate": 3.453022133516042e-06, "loss": 0.1712, "step": 321725 }, { "epoch": 3.16, "grad_norm": 0.9709745049476624, "learning_rate": 3.4528980110617933e-06, "loss": 0.0961, "step": 321750 }, { "epoch": 3.16, "grad_norm": 3.652186632156372, "learning_rate": 3.4527738886075453e-06, "loss": 0.1801, "step": 321775 }, { "epoch": 3.16, "grad_norm": 13.445419311523438, "learning_rate": 3.4526497661532965e-06, "loss": 0.0888, "step": 321800 }, { "epoch": 3.16, "grad_norm": 5.1504130363464355, "learning_rate": 3.4525256436990478e-06, "loss": 0.1705, "step": 321825 }, { "epoch": 3.16, "grad_norm": 1.3314614295959473, "learning_rate": 3.4524015212448e-06, "loss": 0.0626, "step": 321850 }, { "epoch": 3.16, "grad_norm": 4.878105163574219, "learning_rate": 3.452277398790551e-06, "loss": 0.1694, "step": 321875 }, { "epoch": 3.16, "grad_norm": 10.522133827209473, "learning_rate": 3.4521532763363027e-06, "loss": 0.0704, "step": 321900 }, { "epoch": 3.17, "grad_norm": 3.7926599979400635, "learning_rate": 3.4520291538820543e-06, "loss": 0.1661, "step": 321925 }, { "epoch": 3.17, "grad_norm": 6.9485578536987305, "learning_rate": 3.451905031427806e-06, "loss": 0.0691, "step": 321950 }, { "epoch": 3.17, "grad_norm": 4.120105266571045, "learning_rate": 3.451780908973557e-06, "loss": 0.1984, "step": 321975 }, { "epoch": 3.17, "grad_norm": 8.8118314743042, "learning_rate": 3.451656786519309e-06, "loss": 0.0621, "step": 322000 }, { "epoch": 3.17, "grad_norm": 4.692809104919434, "learning_rate": 3.4515326640650604e-06, "loss": 0.1722, "step": 322025 }, { "epoch": 3.17, "grad_norm": 5.5756025314331055, "learning_rate": 3.4514085416108116e-06, "loss": 0.0566, "step": 322050 }, { "epoch": 3.17, "grad_norm": 9.794559478759766, "learning_rate": 3.4512844191565637e-06, "loss": 0.1725, "step": 322075 }, { "epoch": 3.17, "grad_norm": 7.777461051940918, "learning_rate": 3.451160296702315e-06, "loss": 0.0706, "step": 322100 }, { "epoch": 3.17, "grad_norm": 3.1659226417541504, "learning_rate": 3.4510361742480665e-06, "loss": 0.2199, "step": 322125 }, { "epoch": 3.17, "grad_norm": 10.33647346496582, "learning_rate": 3.4509120517938177e-06, "loss": 0.0606, "step": 322150 }, { "epoch": 3.17, "grad_norm": 4.335089206695557, "learning_rate": 3.45078792933957e-06, "loss": 0.1795, "step": 322175 }, { "epoch": 3.17, "grad_norm": 10.003487586975098, "learning_rate": 3.450663806885321e-06, "loss": 0.0588, "step": 322200 }, { "epoch": 3.17, "grad_norm": 3.6512539386749268, "learning_rate": 3.4505396844310722e-06, "loss": 0.156, "step": 322225 }, { "epoch": 3.17, "grad_norm": 11.226959228515625, "learning_rate": 3.4504155619768243e-06, "loss": 0.0622, "step": 322250 }, { "epoch": 3.17, "grad_norm": 5.380653381347656, "learning_rate": 3.4502914395225755e-06, "loss": 0.1986, "step": 322275 }, { "epoch": 3.17, "grad_norm": 5.898226261138916, "learning_rate": 3.450167317068327e-06, "loss": 0.0515, "step": 322300 }, { "epoch": 3.17, "grad_norm": 7.188583850860596, "learning_rate": 3.4500431946140788e-06, "loss": 0.1562, "step": 322325 }, { "epoch": 3.17, "grad_norm": 4.837241172790527, "learning_rate": 3.4499190721598304e-06, "loss": 0.0652, "step": 322350 }, { "epoch": 3.17, "grad_norm": 5.417271614074707, "learning_rate": 3.449799914603752e-06, "loss": 0.1589, "step": 322375 }, { "epoch": 3.17, "grad_norm": 7.0045671463012695, "learning_rate": 3.4496757921495035e-06, "loss": 0.0896, "step": 322400 }, { "epoch": 3.17, "grad_norm": 3.885556221008301, "learning_rate": 3.4495516696952547e-06, "loss": 0.1765, "step": 322425 }, { "epoch": 3.17, "grad_norm": 12.973925590515137, "learning_rate": 3.4494275472410067e-06, "loss": 0.0574, "step": 322450 }, { "epoch": 3.17, "grad_norm": 4.925118923187256, "learning_rate": 3.449303424786758e-06, "loss": 0.1626, "step": 322475 }, { "epoch": 3.17, "grad_norm": 4.366997718811035, "learning_rate": 3.449179302332509e-06, "loss": 0.0764, "step": 322500 }, { "epoch": 3.17, "grad_norm": 3.3260345458984375, "learning_rate": 3.4490551798782612e-06, "loss": 0.1844, "step": 322525 }, { "epoch": 3.17, "grad_norm": 5.035399436950684, "learning_rate": 3.4489310574240124e-06, "loss": 0.0682, "step": 322550 }, { "epoch": 3.17, "grad_norm": 3.9331212043762207, "learning_rate": 3.448806934969764e-06, "loss": 0.162, "step": 322575 }, { "epoch": 3.17, "grad_norm": 7.806412220001221, "learning_rate": 3.4486828125155157e-06, "loss": 0.0714, "step": 322600 }, { "epoch": 3.17, "grad_norm": 3.853580951690674, "learning_rate": 3.4485586900612673e-06, "loss": 0.158, "step": 322625 }, { "epoch": 3.17, "grad_norm": 8.712136268615723, "learning_rate": 3.4484345676070186e-06, "loss": 0.0777, "step": 322650 }, { "epoch": 3.17, "grad_norm": 4.929041385650635, "learning_rate": 3.4483104451527706e-06, "loss": 0.1798, "step": 322675 }, { "epoch": 3.17, "grad_norm": 5.6347126960754395, "learning_rate": 3.448186322698522e-06, "loss": 0.0737, "step": 322700 }, { "epoch": 3.17, "grad_norm": 4.82560920715332, "learning_rate": 3.448062200244273e-06, "loss": 0.189, "step": 322725 }, { "epoch": 3.17, "grad_norm": 7.1446533203125, "learning_rate": 3.4479380777900247e-06, "loss": 0.0826, "step": 322750 }, { "epoch": 3.17, "grad_norm": 4.617182731628418, "learning_rate": 3.4478139553357763e-06, "loss": 0.1784, "step": 322775 }, { "epoch": 3.17, "grad_norm": 8.721510887145996, "learning_rate": 3.447689832881528e-06, "loss": 0.0691, "step": 322800 }, { "epoch": 3.17, "grad_norm": 5.552175045013428, "learning_rate": 3.447565710427279e-06, "loss": 0.1869, "step": 322825 }, { "epoch": 3.17, "grad_norm": 7.493389129638672, "learning_rate": 3.4474415879730312e-06, "loss": 0.06, "step": 322850 }, { "epoch": 3.17, "grad_norm": 4.570310592651367, "learning_rate": 3.4473174655187824e-06, "loss": 0.1649, "step": 322875 }, { "epoch": 3.17, "grad_norm": 10.000728607177734, "learning_rate": 3.4471933430645336e-06, "loss": 0.0709, "step": 322900 }, { "epoch": 3.18, "grad_norm": 5.191801071166992, "learning_rate": 3.4470692206102857e-06, "loss": 0.1523, "step": 322925 }, { "epoch": 3.18, "grad_norm": 10.617119789123535, "learning_rate": 3.446945098156037e-06, "loss": 0.0634, "step": 322950 }, { "epoch": 3.18, "grad_norm": 4.781207084655762, "learning_rate": 3.4468209757017885e-06, "loss": 0.1773, "step": 322975 }, { "epoch": 3.18, "grad_norm": 7.746352672576904, "learning_rate": 3.44669685324754e-06, "loss": 0.0543, "step": 323000 }, { "epoch": 3.18, "grad_norm": 6.608275890350342, "learning_rate": 3.446572730793292e-06, "loss": 0.1685, "step": 323025 }, { "epoch": 3.18, "grad_norm": 13.7532377243042, "learning_rate": 3.446448608339043e-06, "loss": 0.0863, "step": 323050 }, { "epoch": 3.18, "grad_norm": 4.5651960372924805, "learning_rate": 3.446324485884795e-06, "loss": 0.188, "step": 323075 }, { "epoch": 3.18, "grad_norm": 10.796341896057129, "learning_rate": 3.4462003634305463e-06, "loss": 0.0881, "step": 323100 }, { "epoch": 3.18, "grad_norm": 4.488788604736328, "learning_rate": 3.4460762409762975e-06, "loss": 0.2102, "step": 323125 }, { "epoch": 3.18, "grad_norm": 7.244864463806152, "learning_rate": 3.4459521185220496e-06, "loss": 0.0726, "step": 323150 }, { "epoch": 3.18, "grad_norm": 5.268192768096924, "learning_rate": 3.4458279960678008e-06, "loss": 0.1768, "step": 323175 }, { "epoch": 3.18, "grad_norm": 12.233940124511719, "learning_rate": 3.4457038736135524e-06, "loss": 0.0633, "step": 323200 }, { "epoch": 3.18, "grad_norm": 4.976121425628662, "learning_rate": 3.445579751159304e-06, "loss": 0.207, "step": 323225 }, { "epoch": 3.18, "grad_norm": 5.253929138183594, "learning_rate": 3.4454556287050557e-06, "loss": 0.0834, "step": 323250 }, { "epoch": 3.18, "grad_norm": 3.129286050796509, "learning_rate": 3.445331506250807e-06, "loss": 0.1674, "step": 323275 }, { "epoch": 3.18, "grad_norm": 7.501862525939941, "learning_rate": 3.445207383796559e-06, "loss": 0.0632, "step": 323300 }, { "epoch": 3.18, "grad_norm": 3.4331929683685303, "learning_rate": 3.44508326134231e-06, "loss": 0.1797, "step": 323325 }, { "epoch": 3.18, "grad_norm": 4.467074394226074, "learning_rate": 3.4449591388880614e-06, "loss": 0.0547, "step": 323350 }, { "epoch": 3.18, "grad_norm": 5.139570236206055, "learning_rate": 3.4448350164338134e-06, "loss": 0.1415, "step": 323375 }, { "epoch": 3.18, "grad_norm": 7.015965461730957, "learning_rate": 3.4447108939795646e-06, "loss": 0.0687, "step": 323400 }, { "epoch": 3.18, "grad_norm": 6.838418960571289, "learning_rate": 3.4445867715253163e-06, "loss": 0.167, "step": 323425 }, { "epoch": 3.18, "grad_norm": 12.025472640991211, "learning_rate": 3.444462649071068e-06, "loss": 0.075, "step": 323450 }, { "epoch": 3.18, "grad_norm": 2.5587830543518066, "learning_rate": 3.4443385266168195e-06, "loss": 0.1746, "step": 323475 }, { "epoch": 3.18, "grad_norm": 3.821646213531494, "learning_rate": 3.4442144041625708e-06, "loss": 0.0917, "step": 323500 }, { "epoch": 3.18, "grad_norm": 5.866421699523926, "learning_rate": 3.444090281708323e-06, "loss": 0.2008, "step": 323525 }, { "epoch": 3.18, "grad_norm": 6.286674499511719, "learning_rate": 3.443966159254074e-06, "loss": 0.0788, "step": 323550 }, { "epoch": 3.18, "grad_norm": 4.075298309326172, "learning_rate": 3.4438420367998252e-06, "loss": 0.171, "step": 323575 }, { "epoch": 3.18, "grad_norm": 6.068055629730225, "learning_rate": 3.443717914345577e-06, "loss": 0.0752, "step": 323600 }, { "epoch": 3.18, "grad_norm": 4.65654182434082, "learning_rate": 3.4435937918913285e-06, "loss": 0.1846, "step": 323625 }, { "epoch": 3.18, "grad_norm": 10.750818252563477, "learning_rate": 3.44346966943708e-06, "loss": 0.0824, "step": 323650 }, { "epoch": 3.18, "grad_norm": 5.765162944793701, "learning_rate": 3.4433455469828314e-06, "loss": 0.2074, "step": 323675 }, { "epoch": 3.18, "grad_norm": 6.0746750831604, "learning_rate": 3.4432214245285834e-06, "loss": 0.0473, "step": 323700 }, { "epoch": 3.18, "grad_norm": 5.439117908477783, "learning_rate": 3.4430973020743346e-06, "loss": 0.1604, "step": 323725 }, { "epoch": 3.18, "grad_norm": 3.67677903175354, "learning_rate": 3.442973179620086e-06, "loss": 0.0743, "step": 323750 }, { "epoch": 3.18, "grad_norm": 4.23554801940918, "learning_rate": 3.442849057165838e-06, "loss": 0.1845, "step": 323775 }, { "epoch": 3.18, "grad_norm": 11.776982307434082, "learning_rate": 3.442724934711589e-06, "loss": 0.0874, "step": 323800 }, { "epoch": 3.18, "grad_norm": 3.098332643508911, "learning_rate": 3.4426008122573407e-06, "loss": 0.168, "step": 323825 }, { "epoch": 3.18, "grad_norm": 2.9193966388702393, "learning_rate": 3.4424766898030924e-06, "loss": 0.0626, "step": 323850 }, { "epoch": 3.18, "grad_norm": 4.750352382659912, "learning_rate": 3.442352567348844e-06, "loss": 0.1608, "step": 323875 }, { "epoch": 3.18, "grad_norm": 10.347079277038574, "learning_rate": 3.4422284448945952e-06, "loss": 0.0755, "step": 323900 }, { "epoch": 3.18, "grad_norm": 6.8062872886657715, "learning_rate": 3.4421043224403473e-06, "loss": 0.1723, "step": 323925 }, { "epoch": 3.19, "grad_norm": 10.87751579284668, "learning_rate": 3.4419801999860985e-06, "loss": 0.0654, "step": 323950 }, { "epoch": 3.19, "grad_norm": 3.768860340118408, "learning_rate": 3.4418560775318497e-06, "loss": 0.139, "step": 323975 }, { "epoch": 3.19, "grad_norm": 14.200997352600098, "learning_rate": 3.4417319550776018e-06, "loss": 0.0604, "step": 324000 }, { "epoch": 3.19, "grad_norm": 4.697504997253418, "learning_rate": 3.441607832623353e-06, "loss": 0.1495, "step": 324025 }, { "epoch": 3.19, "grad_norm": 7.117663383483887, "learning_rate": 3.4414837101691046e-06, "loss": 0.0691, "step": 324050 }, { "epoch": 3.19, "grad_norm": 4.661529541015625, "learning_rate": 3.4413595877148562e-06, "loss": 0.1718, "step": 324075 }, { "epoch": 3.19, "grad_norm": 5.3627543449401855, "learning_rate": 3.441235465260608e-06, "loss": 0.0626, "step": 324100 }, { "epoch": 3.19, "grad_norm": 4.920718669891357, "learning_rate": 3.441111342806359e-06, "loss": 0.1819, "step": 324125 }, { "epoch": 3.19, "grad_norm": 8.218291282653809, "learning_rate": 3.440987220352111e-06, "loss": 0.0812, "step": 324150 }, { "epoch": 3.19, "grad_norm": 5.127823829650879, "learning_rate": 3.4408630978978624e-06, "loss": 0.1684, "step": 324175 }, { "epoch": 3.19, "grad_norm": 7.593101978302002, "learning_rate": 3.440738975443614e-06, "loss": 0.0709, "step": 324200 }, { "epoch": 3.19, "grad_norm": 2.6997995376586914, "learning_rate": 3.4406148529893656e-06, "loss": 0.1827, "step": 324225 }, { "epoch": 3.19, "grad_norm": 11.133465766906738, "learning_rate": 3.4404907305351173e-06, "loss": 0.0688, "step": 324250 }, { "epoch": 3.19, "grad_norm": 4.70333194732666, "learning_rate": 3.4403666080808685e-06, "loss": 0.21, "step": 324275 }, { "epoch": 3.19, "grad_norm": 6.584529876708984, "learning_rate": 3.4402424856266205e-06, "loss": 0.0677, "step": 324300 }, { "epoch": 3.19, "grad_norm": 3.4451375007629395, "learning_rate": 3.4401183631723717e-06, "loss": 0.1512, "step": 324325 }, { "epoch": 3.19, "grad_norm": 13.865266799926758, "learning_rate": 3.439994240718123e-06, "loss": 0.064, "step": 324350 }, { "epoch": 3.19, "grad_norm": 5.523889064788818, "learning_rate": 3.439870118263875e-06, "loss": 0.1898, "step": 324375 }, { "epoch": 3.19, "grad_norm": 5.333938121795654, "learning_rate": 3.4397459958096262e-06, "loss": 0.0736, "step": 324400 }, { "epoch": 3.19, "grad_norm": 4.645179271697998, "learning_rate": 3.439621873355378e-06, "loss": 0.1821, "step": 324425 }, { "epoch": 3.19, "grad_norm": 6.667846202850342, "learning_rate": 3.439497750901129e-06, "loss": 0.0565, "step": 324450 }, { "epoch": 3.19, "grad_norm": 3.8855624198913574, "learning_rate": 3.439378593345051e-06, "loss": 0.1957, "step": 324475 }, { "epoch": 3.19, "grad_norm": 11.587774276733398, "learning_rate": 3.439254470890802e-06, "loss": 0.061, "step": 324500 }, { "epoch": 3.19, "grad_norm": 5.923163890838623, "learning_rate": 3.4391303484365542e-06, "loss": 0.1739, "step": 324525 }, { "epoch": 3.19, "grad_norm": 14.442232131958008, "learning_rate": 3.4390062259823054e-06, "loss": 0.0598, "step": 324550 }, { "epoch": 3.19, "grad_norm": 4.721521854400635, "learning_rate": 3.4388821035280566e-06, "loss": 0.2089, "step": 324575 }, { "epoch": 3.19, "grad_norm": 8.68898868560791, "learning_rate": 3.4387579810738087e-06, "loss": 0.0502, "step": 324600 }, { "epoch": 3.19, "grad_norm": 5.371026515960693, "learning_rate": 3.43863385861956e-06, "loss": 0.2017, "step": 324625 }, { "epoch": 3.19, "grad_norm": 7.483326435089111, "learning_rate": 3.4385097361653115e-06, "loss": 0.0804, "step": 324650 }, { "epoch": 3.19, "grad_norm": 6.350618362426758, "learning_rate": 3.438385613711063e-06, "loss": 0.1771, "step": 324675 }, { "epoch": 3.19, "grad_norm": 7.7648162841796875, "learning_rate": 3.438261491256815e-06, "loss": 0.074, "step": 324700 }, { "epoch": 3.19, "grad_norm": 4.280833721160889, "learning_rate": 3.438137368802566e-06, "loss": 0.1545, "step": 324725 }, { "epoch": 3.19, "grad_norm": 8.965533256530762, "learning_rate": 3.438013246348318e-06, "loss": 0.0818, "step": 324750 }, { "epoch": 3.19, "grad_norm": 5.9735894203186035, "learning_rate": 3.4378891238940693e-06, "loss": 0.1967, "step": 324775 }, { "epoch": 3.19, "grad_norm": 6.4790239334106445, "learning_rate": 3.4377650014398205e-06, "loss": 0.068, "step": 324800 }, { "epoch": 3.19, "grad_norm": 5.702365875244141, "learning_rate": 3.4376408789855726e-06, "loss": 0.17, "step": 324825 }, { "epoch": 3.19, "grad_norm": 11.170622825622559, "learning_rate": 3.4375167565313238e-06, "loss": 0.0815, "step": 324850 }, { "epoch": 3.19, "grad_norm": 4.34598445892334, "learning_rate": 3.4373926340770754e-06, "loss": 0.1663, "step": 324875 }, { "epoch": 3.19, "grad_norm": 13.978243827819824, "learning_rate": 3.437268511622827e-06, "loss": 0.0645, "step": 324900 }, { "epoch": 3.19, "grad_norm": 5.540380001068115, "learning_rate": 3.4371443891685787e-06, "loss": 0.1765, "step": 324925 }, { "epoch": 3.19, "grad_norm": 10.664413452148438, "learning_rate": 3.43702026671433e-06, "loss": 0.0735, "step": 324950 }, { "epoch": 3.2, "grad_norm": 3.211555004119873, "learning_rate": 3.436896144260082e-06, "loss": 0.177, "step": 324975 }, { "epoch": 3.2, "grad_norm": 8.735815048217773, "learning_rate": 3.436772021805833e-06, "loss": 0.0658, "step": 325000 }, { "epoch": 3.2, "grad_norm": 3.767688035964966, "learning_rate": 3.4366478993515844e-06, "loss": 0.1722, "step": 325025 }, { "epoch": 3.2, "grad_norm": 4.999096393585205, "learning_rate": 3.436523776897336e-06, "loss": 0.0507, "step": 325050 }, { "epoch": 3.2, "grad_norm": 3.4924144744873047, "learning_rate": 3.4363996544430876e-06, "loss": 0.161, "step": 325075 }, { "epoch": 3.2, "grad_norm": 8.925212860107422, "learning_rate": 3.4362755319888393e-06, "loss": 0.0653, "step": 325100 }, { "epoch": 3.2, "grad_norm": 4.094483375549316, "learning_rate": 3.4361514095345905e-06, "loss": 0.1435, "step": 325125 }, { "epoch": 3.2, "grad_norm": 10.664043426513672, "learning_rate": 3.4360272870803426e-06, "loss": 0.0929, "step": 325150 }, { "epoch": 3.2, "grad_norm": 3.8623251914978027, "learning_rate": 3.4359031646260938e-06, "loss": 0.1642, "step": 325175 }, { "epoch": 3.2, "grad_norm": 6.000993728637695, "learning_rate": 3.435779042171845e-06, "loss": 0.0695, "step": 325200 }, { "epoch": 3.2, "grad_norm": 7.203376770019531, "learning_rate": 3.435654919717597e-06, "loss": 0.175, "step": 325225 }, { "epoch": 3.2, "grad_norm": 5.824995040893555, "learning_rate": 3.4355307972633482e-06, "loss": 0.0702, "step": 325250 }, { "epoch": 3.2, "grad_norm": 6.137574672698975, "learning_rate": 3.4354066748091e-06, "loss": 0.1739, "step": 325275 }, { "epoch": 3.2, "grad_norm": 9.360512733459473, "learning_rate": 3.4352825523548515e-06, "loss": 0.0641, "step": 325300 }, { "epoch": 3.2, "grad_norm": 4.248960018157959, "learning_rate": 3.435158429900603e-06, "loss": 0.1488, "step": 325325 }, { "epoch": 3.2, "grad_norm": 6.111778259277344, "learning_rate": 3.4350343074463544e-06, "loss": 0.0775, "step": 325350 }, { "epoch": 3.2, "grad_norm": 4.045440673828125, "learning_rate": 3.4349101849921064e-06, "loss": 0.1657, "step": 325375 }, { "epoch": 3.2, "grad_norm": 9.1466703414917, "learning_rate": 3.4347860625378576e-06, "loss": 0.1019, "step": 325400 }, { "epoch": 3.2, "grad_norm": 5.349860668182373, "learning_rate": 3.434661940083609e-06, "loss": 0.1851, "step": 325425 }, { "epoch": 3.2, "grad_norm": 5.433478832244873, "learning_rate": 3.434537817629361e-06, "loss": 0.0868, "step": 325450 }, { "epoch": 3.2, "grad_norm": 3.158524513244629, "learning_rate": 3.434413695175112e-06, "loss": 0.1598, "step": 325475 }, { "epoch": 3.2, "grad_norm": 10.015692710876465, "learning_rate": 3.4342895727208637e-06, "loss": 0.0658, "step": 325500 }, { "epoch": 3.2, "grad_norm": 4.4024338722229, "learning_rate": 3.4341654502666154e-06, "loss": 0.1741, "step": 325525 }, { "epoch": 3.2, "grad_norm": 2.8361287117004395, "learning_rate": 3.434041327812367e-06, "loss": 0.0974, "step": 325550 }, { "epoch": 3.2, "grad_norm": 4.723062038421631, "learning_rate": 3.4339172053581182e-06, "loss": 0.1573, "step": 325575 }, { "epoch": 3.2, "grad_norm": 7.409154891967773, "learning_rate": 3.4337930829038703e-06, "loss": 0.0746, "step": 325600 }, { "epoch": 3.2, "grad_norm": 4.632193565368652, "learning_rate": 3.4336689604496215e-06, "loss": 0.1239, "step": 325625 }, { "epoch": 3.2, "grad_norm": 6.633249759674072, "learning_rate": 3.4335448379953727e-06, "loss": 0.0663, "step": 325650 }, { "epoch": 3.2, "grad_norm": 4.802735805511475, "learning_rate": 3.4334207155411248e-06, "loss": 0.1855, "step": 325675 }, { "epoch": 3.2, "grad_norm": 5.582330703735352, "learning_rate": 3.433296593086876e-06, "loss": 0.0611, "step": 325700 }, { "epoch": 3.2, "grad_norm": 6.0485734939575195, "learning_rate": 3.4331724706326276e-06, "loss": 0.1769, "step": 325725 }, { "epoch": 3.2, "grad_norm": 1.8191500902175903, "learning_rate": 3.4330483481783793e-06, "loss": 0.0675, "step": 325750 }, { "epoch": 3.2, "grad_norm": 3.995258331298828, "learning_rate": 3.432924225724131e-06, "loss": 0.1704, "step": 325775 }, { "epoch": 3.2, "grad_norm": 7.451038360595703, "learning_rate": 3.432800103269882e-06, "loss": 0.0705, "step": 325800 }, { "epoch": 3.2, "grad_norm": 5.341322898864746, "learning_rate": 3.432675980815634e-06, "loss": 0.1675, "step": 325825 }, { "epoch": 3.2, "grad_norm": 9.137775421142578, "learning_rate": 3.4325518583613854e-06, "loss": 0.083, "step": 325850 }, { "epoch": 3.2, "grad_norm": 19.16881561279297, "learning_rate": 3.4324277359071366e-06, "loss": 0.1998, "step": 325875 }, { "epoch": 3.2, "grad_norm": 4.5988593101501465, "learning_rate": 3.4323036134528882e-06, "loss": 0.0648, "step": 325900 }, { "epoch": 3.2, "grad_norm": 4.154104709625244, "learning_rate": 3.43217949099864e-06, "loss": 0.212, "step": 325925 }, { "epoch": 3.2, "grad_norm": 9.085597038269043, "learning_rate": 3.4320553685443915e-06, "loss": 0.06, "step": 325950 }, { "epoch": 3.21, "grad_norm": 5.590339660644531, "learning_rate": 3.4319312460901427e-06, "loss": 0.184, "step": 325975 }, { "epoch": 3.21, "grad_norm": 4.603555679321289, "learning_rate": 3.4318071236358948e-06, "loss": 0.0914, "step": 326000 }, { "epoch": 3.21, "grad_norm": 4.347459316253662, "learning_rate": 3.431683001181646e-06, "loss": 0.1829, "step": 326025 }, { "epoch": 3.21, "grad_norm": 7.847052097320557, "learning_rate": 3.431558878727397e-06, "loss": 0.0807, "step": 326050 }, { "epoch": 3.21, "grad_norm": 3.8899784088134766, "learning_rate": 3.4314347562731492e-06, "loss": 0.1892, "step": 326075 }, { "epoch": 3.21, "grad_norm": 7.560565948486328, "learning_rate": 3.4313106338189004e-06, "loss": 0.0619, "step": 326100 }, { "epoch": 3.21, "grad_norm": 3.496168375015259, "learning_rate": 3.431186511364652e-06, "loss": 0.1738, "step": 326125 }, { "epoch": 3.21, "grad_norm": 4.53931188583374, "learning_rate": 3.4310623889104037e-06, "loss": 0.0583, "step": 326150 }, { "epoch": 3.21, "grad_norm": 3.63076114654541, "learning_rate": 3.4309382664561554e-06, "loss": 0.1883, "step": 326175 }, { "epoch": 3.21, "grad_norm": 2.8793118000030518, "learning_rate": 3.4308141440019066e-06, "loss": 0.0468, "step": 326200 }, { "epoch": 3.21, "grad_norm": 3.9352078437805176, "learning_rate": 3.4306900215476586e-06, "loss": 0.1607, "step": 326225 }, { "epoch": 3.21, "grad_norm": 10.333794593811035, "learning_rate": 3.43056589909341e-06, "loss": 0.0995, "step": 326250 }, { "epoch": 3.21, "grad_norm": 4.598018169403076, "learning_rate": 3.430441776639161e-06, "loss": 0.1505, "step": 326275 }, { "epoch": 3.21, "grad_norm": 10.735316276550293, "learning_rate": 3.430317654184913e-06, "loss": 0.0671, "step": 326300 }, { "epoch": 3.21, "grad_norm": 3.753253221511841, "learning_rate": 3.4301935317306643e-06, "loss": 0.2035, "step": 326325 }, { "epoch": 3.21, "grad_norm": 12.4890775680542, "learning_rate": 3.430069409276416e-06, "loss": 0.0775, "step": 326350 }, { "epoch": 3.21, "grad_norm": 3.795734167098999, "learning_rate": 3.4299452868221676e-06, "loss": 0.1558, "step": 326375 }, { "epoch": 3.21, "grad_norm": 8.392895698547363, "learning_rate": 3.4298211643679192e-06, "loss": 0.0962, "step": 326400 }, { "epoch": 3.21, "grad_norm": 5.079407691955566, "learning_rate": 3.4296970419136704e-06, "loss": 0.1774, "step": 326425 }, { "epoch": 3.21, "grad_norm": 5.3202385902404785, "learning_rate": 3.4295729194594225e-06, "loss": 0.0605, "step": 326450 }, { "epoch": 3.21, "grad_norm": 6.932718753814697, "learning_rate": 3.4294487970051737e-06, "loss": 0.1738, "step": 326475 }, { "epoch": 3.21, "grad_norm": 8.379697799682617, "learning_rate": 3.429324674550925e-06, "loss": 0.0591, "step": 326500 }, { "epoch": 3.21, "grad_norm": 4.615877628326416, "learning_rate": 3.429200552096677e-06, "loss": 0.1514, "step": 326525 }, { "epoch": 3.21, "grad_norm": 14.074944496154785, "learning_rate": 3.429076429642428e-06, "loss": 0.0645, "step": 326550 }, { "epoch": 3.21, "grad_norm": 5.92446231842041, "learning_rate": 3.42895230718818e-06, "loss": 0.1671, "step": 326575 }, { "epoch": 3.21, "grad_norm": 7.751774311065674, "learning_rate": 3.4288281847339315e-06, "loss": 0.0823, "step": 326600 }, { "epoch": 3.21, "grad_norm": 5.162489414215088, "learning_rate": 3.428704062279683e-06, "loss": 0.183, "step": 326625 }, { "epoch": 3.21, "grad_norm": 9.457234382629395, "learning_rate": 3.4285799398254343e-06, "loss": 0.0682, "step": 326650 }, { "epoch": 3.21, "grad_norm": 3.8598122596740723, "learning_rate": 3.4284558173711864e-06, "loss": 0.2089, "step": 326675 }, { "epoch": 3.21, "grad_norm": 5.632236957550049, "learning_rate": 3.4283316949169376e-06, "loss": 0.0759, "step": 326700 }, { "epoch": 3.21, "grad_norm": 4.137948989868164, "learning_rate": 3.4282075724626888e-06, "loss": 0.2056, "step": 326725 }, { "epoch": 3.21, "grad_norm": 11.390714645385742, "learning_rate": 3.4280834500084404e-06, "loss": 0.0595, "step": 326750 }, { "epoch": 3.21, "grad_norm": 5.415554046630859, "learning_rate": 3.427959327554192e-06, "loss": 0.1772, "step": 326775 }, { "epoch": 3.21, "grad_norm": 6.9521660804748535, "learning_rate": 3.4278352050999437e-06, "loss": 0.0871, "step": 326800 }, { "epoch": 3.21, "grad_norm": 4.964362144470215, "learning_rate": 3.427711082645695e-06, "loss": 0.1906, "step": 326825 }, { "epoch": 3.21, "grad_norm": 9.788834571838379, "learning_rate": 3.427586960191447e-06, "loss": 0.079, "step": 326850 }, { "epoch": 3.21, "grad_norm": 3.837898015975952, "learning_rate": 3.427462837737198e-06, "loss": 0.193, "step": 326875 }, { "epoch": 3.21, "grad_norm": 8.56553840637207, "learning_rate": 3.42733871528295e-06, "loss": 0.0563, "step": 326900 }, { "epoch": 3.21, "grad_norm": 3.884941339492798, "learning_rate": 3.4272145928287014e-06, "loss": 0.1601, "step": 326925 }, { "epoch": 3.21, "grad_norm": 12.797785758972168, "learning_rate": 3.427090470374453e-06, "loss": 0.0719, "step": 326950 }, { "epoch": 3.21, "grad_norm": 4.672161102294922, "learning_rate": 3.4269663479202043e-06, "loss": 0.2032, "step": 326975 }, { "epoch": 3.22, "grad_norm": 9.67796516418457, "learning_rate": 3.426842225465956e-06, "loss": 0.0761, "step": 327000 }, { "epoch": 3.22, "grad_norm": 5.292535781860352, "learning_rate": 3.4267181030117076e-06, "loss": 0.1767, "step": 327025 }, { "epoch": 3.22, "grad_norm": 5.863797187805176, "learning_rate": 3.4265939805574588e-06, "loss": 0.0625, "step": 327050 }, { "epoch": 3.22, "grad_norm": 8.836250305175781, "learning_rate": 3.426469858103211e-06, "loss": 0.1834, "step": 327075 }, { "epoch": 3.22, "grad_norm": 10.436948776245117, "learning_rate": 3.426345735648962e-06, "loss": 0.0782, "step": 327100 }, { "epoch": 3.22, "grad_norm": 4.631341457366943, "learning_rate": 3.4262216131947137e-06, "loss": 0.126, "step": 327125 }, { "epoch": 3.22, "grad_norm": 7.157567977905273, "learning_rate": 3.4260974907404653e-06, "loss": 0.0779, "step": 327150 }, { "epoch": 3.22, "grad_norm": 7.985195159912109, "learning_rate": 3.4259783331843868e-06, "loss": 0.1996, "step": 327175 }, { "epoch": 3.22, "grad_norm": 8.408215522766113, "learning_rate": 3.4258542107301384e-06, "loss": 0.06, "step": 327200 }, { "epoch": 3.22, "grad_norm": 4.120987415313721, "learning_rate": 3.42573008827589e-06, "loss": 0.1816, "step": 327225 }, { "epoch": 3.22, "grad_norm": 17.03165626525879, "learning_rate": 3.4256059658216412e-06, "loss": 0.0961, "step": 327250 }, { "epoch": 3.22, "grad_norm": 4.615133762359619, "learning_rate": 3.4254818433673933e-06, "loss": 0.1437, "step": 327275 }, { "epoch": 3.22, "grad_norm": 36.58427810668945, "learning_rate": 3.4253577209131445e-06, "loss": 0.0672, "step": 327300 }, { "epoch": 3.22, "grad_norm": 6.335586071014404, "learning_rate": 3.4252335984588957e-06, "loss": 0.1666, "step": 327325 }, { "epoch": 3.22, "grad_norm": 10.756623268127441, "learning_rate": 3.4251094760046474e-06, "loss": 0.0733, "step": 327350 }, { "epoch": 3.22, "grad_norm": 5.0131964683532715, "learning_rate": 3.424985353550399e-06, "loss": 0.1724, "step": 327375 }, { "epoch": 3.22, "grad_norm": 4.7839789390563965, "learning_rate": 3.4248612310961506e-06, "loss": 0.0587, "step": 327400 }, { "epoch": 3.22, "grad_norm": 5.481651782989502, "learning_rate": 3.424737108641902e-06, "loss": 0.1759, "step": 327425 }, { "epoch": 3.22, "grad_norm": 3.8460309505462646, "learning_rate": 3.424612986187654e-06, "loss": 0.0848, "step": 327450 }, { "epoch": 3.22, "grad_norm": 3.7921981811523438, "learning_rate": 3.424488863733405e-06, "loss": 0.2231, "step": 327475 }, { "epoch": 3.22, "grad_norm": 7.545849323272705, "learning_rate": 3.4243647412791563e-06, "loss": 0.0698, "step": 327500 }, { "epoch": 3.22, "grad_norm": 7.092657566070557, "learning_rate": 3.4242406188249084e-06, "loss": 0.1662, "step": 327525 }, { "epoch": 3.22, "grad_norm": 10.21202564239502, "learning_rate": 3.4241164963706596e-06, "loss": 0.0745, "step": 327550 }, { "epoch": 3.22, "grad_norm": 5.468868732452393, "learning_rate": 3.4239923739164112e-06, "loss": 0.214, "step": 327575 }, { "epoch": 3.22, "grad_norm": 6.066534519195557, "learning_rate": 3.423868251462163e-06, "loss": 0.0552, "step": 327600 }, { "epoch": 3.22, "grad_norm": 12.654324531555176, "learning_rate": 3.4237441290079145e-06, "loss": 0.1447, "step": 327625 }, { "epoch": 3.22, "grad_norm": 4.091323375701904, "learning_rate": 3.4236200065536657e-06, "loss": 0.0625, "step": 327650 }, { "epoch": 3.22, "grad_norm": 5.420785903930664, "learning_rate": 3.4234958840994178e-06, "loss": 0.1869, "step": 327675 }, { "epoch": 3.22, "grad_norm": 11.09914493560791, "learning_rate": 3.423371761645169e-06, "loss": 0.0657, "step": 327700 }, { "epoch": 3.22, "grad_norm": 4.435754776000977, "learning_rate": 3.42324763919092e-06, "loss": 0.1867, "step": 327725 }, { "epoch": 3.22, "grad_norm": 9.514015197753906, "learning_rate": 3.4231235167366722e-06, "loss": 0.0863, "step": 327750 }, { "epoch": 3.22, "grad_norm": 3.869293212890625, "learning_rate": 3.4229993942824235e-06, "loss": 0.1705, "step": 327775 }, { "epoch": 3.22, "grad_norm": 9.980547904968262, "learning_rate": 3.422875271828175e-06, "loss": 0.066, "step": 327800 }, { "epoch": 3.22, "grad_norm": 5.21729040145874, "learning_rate": 3.4227511493739267e-06, "loss": 0.1623, "step": 327825 }, { "epoch": 3.22, "grad_norm": 12.56412124633789, "learning_rate": 3.4226270269196784e-06, "loss": 0.0658, "step": 327850 }, { "epoch": 3.22, "grad_norm": 5.15321683883667, "learning_rate": 3.4225029044654296e-06, "loss": 0.1746, "step": 327875 }, { "epoch": 3.22, "grad_norm": 10.854870796203613, "learning_rate": 3.4223787820111816e-06, "loss": 0.1077, "step": 327900 }, { "epoch": 3.22, "grad_norm": 5.0576581954956055, "learning_rate": 3.422254659556933e-06, "loss": 0.1542, "step": 327925 }, { "epoch": 3.22, "grad_norm": 13.443319320678711, "learning_rate": 3.422130537102684e-06, "loss": 0.0901, "step": 327950 }, { "epoch": 3.22, "grad_norm": 5.295966148376465, "learning_rate": 3.422006414648436e-06, "loss": 0.2049, "step": 327975 }, { "epoch": 3.22, "grad_norm": 9.218289375305176, "learning_rate": 3.4218822921941873e-06, "loss": 0.077, "step": 328000 }, { "epoch": 3.23, "grad_norm": 7.296899795532227, "learning_rate": 3.421758169739939e-06, "loss": 0.163, "step": 328025 }, { "epoch": 3.23, "grad_norm": 10.066600799560547, "learning_rate": 3.4216340472856906e-06, "loss": 0.0841, "step": 328050 }, { "epoch": 3.23, "grad_norm": 5.509835243225098, "learning_rate": 3.4215099248314422e-06, "loss": 0.177, "step": 328075 }, { "epoch": 3.23, "grad_norm": 5.244749069213867, "learning_rate": 3.4213858023771934e-06, "loss": 0.0517, "step": 328100 }, { "epoch": 3.23, "grad_norm": 5.350407123565674, "learning_rate": 3.4212616799229455e-06, "loss": 0.1569, "step": 328125 }, { "epoch": 3.23, "grad_norm": 8.878056526184082, "learning_rate": 3.4211375574686967e-06, "loss": 0.0632, "step": 328150 }, { "epoch": 3.23, "grad_norm": 4.579278945922852, "learning_rate": 3.421013435014448e-06, "loss": 0.181, "step": 328175 }, { "epoch": 3.23, "grad_norm": 6.210787773132324, "learning_rate": 3.4208893125601996e-06, "loss": 0.0671, "step": 328200 }, { "epoch": 3.23, "grad_norm": 4.472462177276611, "learning_rate": 3.420765190105951e-06, "loss": 0.1577, "step": 328225 }, { "epoch": 3.23, "grad_norm": 11.516626358032227, "learning_rate": 3.420641067651703e-06, "loss": 0.0722, "step": 328250 }, { "epoch": 3.23, "grad_norm": 4.389156818389893, "learning_rate": 3.420516945197454e-06, "loss": 0.1908, "step": 328275 }, { "epoch": 3.23, "grad_norm": 10.837924003601074, "learning_rate": 3.420392822743206e-06, "loss": 0.0543, "step": 328300 }, { "epoch": 3.23, "grad_norm": 6.455386161804199, "learning_rate": 3.4202687002889573e-06, "loss": 0.2016, "step": 328325 }, { "epoch": 3.23, "grad_norm": 7.794097423553467, "learning_rate": 3.4201445778347085e-06, "loss": 0.0513, "step": 328350 }, { "epoch": 3.23, "grad_norm": 4.6923627853393555, "learning_rate": 3.4200204553804606e-06, "loss": 0.2154, "step": 328375 }, { "epoch": 3.23, "grad_norm": 3.8559582233428955, "learning_rate": 3.4198963329262118e-06, "loss": 0.0689, "step": 328400 }, { "epoch": 3.23, "grad_norm": 3.5730602741241455, "learning_rate": 3.4197722104719634e-06, "loss": 0.1875, "step": 328425 }, { "epoch": 3.23, "grad_norm": 14.441152572631836, "learning_rate": 3.419648088017715e-06, "loss": 0.0755, "step": 328450 }, { "epoch": 3.23, "grad_norm": 8.762678146362305, "learning_rate": 3.4195239655634667e-06, "loss": 0.1619, "step": 328475 }, { "epoch": 3.23, "grad_norm": 6.490013122558594, "learning_rate": 3.419399843109218e-06, "loss": 0.0794, "step": 328500 }, { "epoch": 3.23, "grad_norm": 5.704248905181885, "learning_rate": 3.41927572065497e-06, "loss": 0.1847, "step": 328525 }, { "epoch": 3.23, "grad_norm": 9.229151725769043, "learning_rate": 3.419151598200721e-06, "loss": 0.0576, "step": 328550 }, { "epoch": 3.23, "grad_norm": 6.449161052703857, "learning_rate": 3.4190274757464724e-06, "loss": 0.1675, "step": 328575 }, { "epoch": 3.23, "grad_norm": 6.49468994140625, "learning_rate": 3.4189033532922244e-06, "loss": 0.0612, "step": 328600 }, { "epoch": 3.23, "grad_norm": 4.918094635009766, "learning_rate": 3.4187792308379757e-06, "loss": 0.1757, "step": 328625 }, { "epoch": 3.23, "grad_norm": 4.487167835235596, "learning_rate": 3.4186551083837273e-06, "loss": 0.0673, "step": 328650 }, { "epoch": 3.23, "grad_norm": 4.505337238311768, "learning_rate": 3.418530985929479e-06, "loss": 0.2161, "step": 328675 }, { "epoch": 3.23, "grad_norm": 9.978933334350586, "learning_rate": 3.4184068634752306e-06, "loss": 0.0874, "step": 328700 }, { "epoch": 3.23, "grad_norm": 6.258726596832275, "learning_rate": 3.4182827410209818e-06, "loss": 0.1894, "step": 328725 }, { "epoch": 3.23, "grad_norm": 8.556131362915039, "learning_rate": 3.418158618566734e-06, "loss": 0.0776, "step": 328750 }, { "epoch": 3.23, "grad_norm": 3.804030656814575, "learning_rate": 3.418034496112485e-06, "loss": 0.1676, "step": 328775 }, { "epoch": 3.23, "grad_norm": 7.888642311096191, "learning_rate": 3.4179103736582363e-06, "loss": 0.0689, "step": 328800 }, { "epoch": 3.23, "grad_norm": 5.612457752227783, "learning_rate": 3.4177862512039883e-06, "loss": 0.1638, "step": 328825 }, { "epoch": 3.23, "grad_norm": 9.725746154785156, "learning_rate": 3.4176621287497395e-06, "loss": 0.0857, "step": 328850 }, { "epoch": 3.23, "grad_norm": 7.946446418762207, "learning_rate": 3.417538006295491e-06, "loss": 0.2015, "step": 328875 }, { "epoch": 3.23, "grad_norm": 9.27942943572998, "learning_rate": 3.417413883841243e-06, "loss": 0.0773, "step": 328900 }, { "epoch": 3.23, "grad_norm": 7.0100836753845215, "learning_rate": 3.4172897613869944e-06, "loss": 0.1638, "step": 328925 }, { "epoch": 3.23, "grad_norm": 9.676787376403809, "learning_rate": 3.4171656389327456e-06, "loss": 0.0712, "step": 328950 }, { "epoch": 3.23, "grad_norm": 5.420897006988525, "learning_rate": 3.4170415164784977e-06, "loss": 0.1618, "step": 328975 }, { "epoch": 3.23, "grad_norm": 2.8571488857269287, "learning_rate": 3.416917394024249e-06, "loss": 0.0768, "step": 329000 }, { "epoch": 3.24, "grad_norm": 4.33789587020874, "learning_rate": 3.41679327157e-06, "loss": 0.1883, "step": 329025 }, { "epoch": 3.24, "grad_norm": 11.292352676391602, "learning_rate": 3.4166691491157518e-06, "loss": 0.0748, "step": 329050 }, { "epoch": 3.24, "grad_norm": 3.888814926147461, "learning_rate": 3.4165450266615034e-06, "loss": 0.1566, "step": 329075 }, { "epoch": 3.24, "grad_norm": 1.873477578163147, "learning_rate": 3.416420904207255e-06, "loss": 0.0801, "step": 329100 }, { "epoch": 3.24, "grad_norm": 9.649394989013672, "learning_rate": 3.4162967817530062e-06, "loss": 0.1947, "step": 329125 }, { "epoch": 3.24, "grad_norm": 10.883256912231445, "learning_rate": 3.4161726592987583e-06, "loss": 0.0723, "step": 329150 }, { "epoch": 3.24, "grad_norm": 5.964827537536621, "learning_rate": 3.4160485368445095e-06, "loss": 0.1438, "step": 329175 }, { "epoch": 3.24, "grad_norm": 12.629916191101074, "learning_rate": 3.4159244143902607e-06, "loss": 0.0713, "step": 329200 }, { "epoch": 3.24, "grad_norm": 2.7955267429351807, "learning_rate": 3.4158002919360128e-06, "loss": 0.144, "step": 329225 }, { "epoch": 3.24, "grad_norm": 4.1131062507629395, "learning_rate": 3.415676169481764e-06, "loss": 0.0638, "step": 329250 }, { "epoch": 3.24, "grad_norm": 6.9221930503845215, "learning_rate": 3.4155520470275156e-06, "loss": 0.1452, "step": 329275 }, { "epoch": 3.24, "grad_norm": 3.1779420375823975, "learning_rate": 3.4154279245732673e-06, "loss": 0.063, "step": 329300 }, { "epoch": 3.24, "grad_norm": 4.589847087860107, "learning_rate": 3.415303802119019e-06, "loss": 0.1606, "step": 329325 }, { "epoch": 3.24, "grad_norm": 8.612954139709473, "learning_rate": 3.41517967966477e-06, "loss": 0.0716, "step": 329350 }, { "epoch": 3.24, "grad_norm": 4.193745136260986, "learning_rate": 3.415060522108692e-06, "loss": 0.1736, "step": 329375 }, { "epoch": 3.24, "grad_norm": 10.854119300842285, "learning_rate": 3.414936399654443e-06, "loss": 0.0971, "step": 329400 }, { "epoch": 3.24, "grad_norm": 4.184703826904297, "learning_rate": 3.4148122772001952e-06, "loss": 0.1364, "step": 329425 }, { "epoch": 3.24, "grad_norm": 4.438168525695801, "learning_rate": 3.4146881547459465e-06, "loss": 0.0638, "step": 329450 }, { "epoch": 3.24, "grad_norm": 4.70329475402832, "learning_rate": 3.4145640322916977e-06, "loss": 0.1454, "step": 329475 }, { "epoch": 3.24, "grad_norm": 12.393953323364258, "learning_rate": 3.4144399098374497e-06, "loss": 0.0697, "step": 329500 }, { "epoch": 3.24, "grad_norm": 6.401658058166504, "learning_rate": 3.414315787383201e-06, "loss": 0.1861, "step": 329525 }, { "epoch": 3.24, "grad_norm": 7.734104633331299, "learning_rate": 3.4141916649289526e-06, "loss": 0.0647, "step": 329550 }, { "epoch": 3.24, "grad_norm": 4.619616508483887, "learning_rate": 3.414067542474704e-06, "loss": 0.1757, "step": 329575 }, { "epoch": 3.24, "grad_norm": 3.347536325454712, "learning_rate": 3.413943420020456e-06, "loss": 0.0808, "step": 329600 }, { "epoch": 3.24, "grad_norm": 6.633002758026123, "learning_rate": 3.413819297566207e-06, "loss": 0.1664, "step": 329625 }, { "epoch": 3.24, "grad_norm": 9.045726776123047, "learning_rate": 3.4136951751119583e-06, "loss": 0.0554, "step": 329650 }, { "epoch": 3.24, "grad_norm": 4.734727382659912, "learning_rate": 3.4135710526577103e-06, "loss": 0.1892, "step": 329675 }, { "epoch": 3.24, "grad_norm": 1.6763120889663696, "learning_rate": 3.4134469302034615e-06, "loss": 0.0836, "step": 329700 }, { "epoch": 3.24, "grad_norm": 4.621343612670898, "learning_rate": 3.413322807749213e-06, "loss": 0.1863, "step": 329725 }, { "epoch": 3.24, "grad_norm": 8.326834678649902, "learning_rate": 3.413198685294965e-06, "loss": 0.0592, "step": 329750 }, { "epoch": 3.24, "grad_norm": 6.435505390167236, "learning_rate": 3.4130745628407164e-06, "loss": 0.1699, "step": 329775 }, { "epoch": 3.24, "grad_norm": 16.339950561523438, "learning_rate": 3.4129504403864677e-06, "loss": 0.0668, "step": 329800 }, { "epoch": 3.24, "grad_norm": 4.4126129150390625, "learning_rate": 3.4128263179322197e-06, "loss": 0.1665, "step": 329825 }, { "epoch": 3.24, "grad_norm": 14.423070907592773, "learning_rate": 3.412702195477971e-06, "loss": 0.0974, "step": 329850 }, { "epoch": 3.24, "grad_norm": 3.75590443611145, "learning_rate": 3.412578073023722e-06, "loss": 0.1939, "step": 329875 }, { "epoch": 3.24, "grad_norm": 6.153167724609375, "learning_rate": 3.412453950569474e-06, "loss": 0.0504, "step": 329900 }, { "epoch": 3.24, "grad_norm": 6.977677822113037, "learning_rate": 3.4123298281152254e-06, "loss": 0.1864, "step": 329925 }, { "epoch": 3.24, "grad_norm": 8.959493637084961, "learning_rate": 3.412205705660977e-06, "loss": 0.0641, "step": 329950 }, { "epoch": 3.24, "grad_norm": 3.7212250232696533, "learning_rate": 3.4120815832067287e-06, "loss": 0.1417, "step": 329975 }, { "epoch": 3.24, "grad_norm": 4.67629337310791, "learning_rate": 3.4119574607524803e-06, "loss": 0.0731, "step": 330000 }, { "epoch": 3.24, "grad_norm": 5.606770038604736, "learning_rate": 3.4118333382982315e-06, "loss": 0.168, "step": 330025 }, { "epoch": 3.25, "grad_norm": 12.297465324401855, "learning_rate": 3.4117092158439836e-06, "loss": 0.0969, "step": 330050 }, { "epoch": 3.25, "grad_norm": 1.6161011457443237, "learning_rate": 3.411585093389735e-06, "loss": 0.1671, "step": 330075 }, { "epoch": 3.25, "grad_norm": 5.306107044219971, "learning_rate": 3.4114609709354864e-06, "loss": 0.0577, "step": 330100 }, { "epoch": 3.25, "grad_norm": 3.8346688747406006, "learning_rate": 3.411336848481238e-06, "loss": 0.1744, "step": 330125 }, { "epoch": 3.25, "grad_norm": 7.495055675506592, "learning_rate": 3.4112127260269897e-06, "loss": 0.061, "step": 330150 }, { "epoch": 3.25, "grad_norm": 5.451707363128662, "learning_rate": 3.411088603572741e-06, "loss": 0.1849, "step": 330175 }, { "epoch": 3.25, "grad_norm": 5.2582597732543945, "learning_rate": 3.410964481118493e-06, "loss": 0.0847, "step": 330200 }, { "epoch": 3.25, "grad_norm": 3.633091449737549, "learning_rate": 3.410840358664244e-06, "loss": 0.1796, "step": 330225 }, { "epoch": 3.25, "grad_norm": 8.218047142028809, "learning_rate": 3.4107162362099954e-06, "loss": 0.0726, "step": 330250 }, { "epoch": 3.25, "grad_norm": 5.627495765686035, "learning_rate": 3.4105921137557474e-06, "loss": 0.1515, "step": 330275 }, { "epoch": 3.25, "grad_norm": 5.063467502593994, "learning_rate": 3.4104679913014987e-06, "loss": 0.061, "step": 330300 }, { "epoch": 3.25, "grad_norm": 5.135143756866455, "learning_rate": 3.4103438688472503e-06, "loss": 0.2005, "step": 330325 }, { "epoch": 3.25, "grad_norm": 8.535012245178223, "learning_rate": 3.410219746393002e-06, "loss": 0.0651, "step": 330350 }, { "epoch": 3.25, "grad_norm": 4.987768173217773, "learning_rate": 3.4100956239387536e-06, "loss": 0.1735, "step": 330375 }, { "epoch": 3.25, "grad_norm": 13.479971885681152, "learning_rate": 3.4099715014845048e-06, "loss": 0.0819, "step": 330400 }, { "epoch": 3.25, "grad_norm": 2.5883851051330566, "learning_rate": 3.409847379030257e-06, "loss": 0.1563, "step": 330425 }, { "epoch": 3.25, "grad_norm": 9.751045227050781, "learning_rate": 3.409723256576008e-06, "loss": 0.0625, "step": 330450 }, { "epoch": 3.25, "grad_norm": 4.204762935638428, "learning_rate": 3.4095991341217593e-06, "loss": 0.1921, "step": 330475 }, { "epoch": 3.25, "grad_norm": 6.1939311027526855, "learning_rate": 3.409475011667511e-06, "loss": 0.0757, "step": 330500 }, { "epoch": 3.25, "grad_norm": 5.346383094787598, "learning_rate": 3.4093508892132625e-06, "loss": 0.1877, "step": 330525 }, { "epoch": 3.25, "grad_norm": 10.301251411437988, "learning_rate": 3.409226766759014e-06, "loss": 0.0768, "step": 330550 }, { "epoch": 3.25, "grad_norm": 3.8387248516082764, "learning_rate": 3.4091026443047654e-06, "loss": 0.1428, "step": 330575 }, { "epoch": 3.25, "grad_norm": 3.387678384780884, "learning_rate": 3.4089785218505174e-06, "loss": 0.0727, "step": 330600 }, { "epoch": 3.25, "grad_norm": 4.9469828605651855, "learning_rate": 3.4088543993962686e-06, "loss": 0.1925, "step": 330625 }, { "epoch": 3.25, "grad_norm": 4.107479095458984, "learning_rate": 3.40873027694202e-06, "loss": 0.068, "step": 330650 }, { "epoch": 3.25, "grad_norm": 5.396246910095215, "learning_rate": 3.408606154487772e-06, "loss": 0.1699, "step": 330675 }, { "epoch": 3.25, "grad_norm": 9.98751163482666, "learning_rate": 3.408482032033523e-06, "loss": 0.083, "step": 330700 }, { "epoch": 3.25, "grad_norm": 4.562262535095215, "learning_rate": 3.4083579095792748e-06, "loss": 0.183, "step": 330725 }, { "epoch": 3.25, "grad_norm": 7.4410576820373535, "learning_rate": 3.4082337871250264e-06, "loss": 0.0826, "step": 330750 }, { "epoch": 3.25, "grad_norm": 13.010295867919922, "learning_rate": 3.408109664670778e-06, "loss": 0.2114, "step": 330775 }, { "epoch": 3.25, "grad_norm": 16.848838806152344, "learning_rate": 3.4079855422165292e-06, "loss": 0.0881, "step": 330800 }, { "epoch": 3.25, "grad_norm": 5.315726280212402, "learning_rate": 3.4078614197622813e-06, "loss": 0.1684, "step": 330825 }, { "epoch": 3.25, "grad_norm": 8.048569679260254, "learning_rate": 3.4077372973080325e-06, "loss": 0.0712, "step": 330850 }, { "epoch": 3.25, "grad_norm": 3.7777621746063232, "learning_rate": 3.4076131748537837e-06, "loss": 0.1552, "step": 330875 }, { "epoch": 3.25, "grad_norm": 11.695185661315918, "learning_rate": 3.4074890523995358e-06, "loss": 0.0829, "step": 330900 }, { "epoch": 3.25, "grad_norm": 5.235741138458252, "learning_rate": 3.407364929945287e-06, "loss": 0.1973, "step": 330925 }, { "epoch": 3.25, "grad_norm": 11.921926498413086, "learning_rate": 3.4072408074910386e-06, "loss": 0.0821, "step": 330950 }, { "epoch": 3.25, "grad_norm": 4.42051362991333, "learning_rate": 3.4071166850367903e-06, "loss": 0.1786, "step": 330975 }, { "epoch": 3.25, "grad_norm": 6.174139022827148, "learning_rate": 3.406992562582542e-06, "loss": 0.0586, "step": 331000 }, { "epoch": 3.25, "grad_norm": 25.643177032470703, "learning_rate": 3.406868440128293e-06, "loss": 0.1965, "step": 331025 }, { "epoch": 3.25, "grad_norm": 10.034172058105469, "learning_rate": 3.406744317674045e-06, "loss": 0.0592, "step": 331050 }, { "epoch": 3.26, "grad_norm": 5.002789497375488, "learning_rate": 3.4066201952197964e-06, "loss": 0.2199, "step": 331075 }, { "epoch": 3.26, "grad_norm": 2.5195512771606445, "learning_rate": 3.4064960727655476e-06, "loss": 0.0548, "step": 331100 }, { "epoch": 3.26, "grad_norm": 4.659219741821289, "learning_rate": 3.4063719503112997e-06, "loss": 0.1765, "step": 331125 }, { "epoch": 3.26, "grad_norm": 7.6023783683776855, "learning_rate": 3.406247827857051e-06, "loss": 0.0801, "step": 331150 }, { "epoch": 3.26, "grad_norm": 6.561711311340332, "learning_rate": 3.4061237054028025e-06, "loss": 0.158, "step": 331175 }, { "epoch": 3.26, "grad_norm": 8.96542739868164, "learning_rate": 3.405999582948554e-06, "loss": 0.0538, "step": 331200 }, { "epoch": 3.26, "grad_norm": 2.7788760662078857, "learning_rate": 3.4058754604943058e-06, "loss": 0.1528, "step": 331225 }, { "epoch": 3.26, "grad_norm": 4.755132675170898, "learning_rate": 3.405751338040057e-06, "loss": 0.0735, "step": 331250 }, { "epoch": 3.26, "grad_norm": 4.327905654907227, "learning_rate": 3.405627215585809e-06, "loss": 0.1877, "step": 331275 }, { "epoch": 3.26, "grad_norm": 4.678091526031494, "learning_rate": 3.4055030931315602e-06, "loss": 0.0738, "step": 331300 }, { "epoch": 3.26, "grad_norm": 4.017287254333496, "learning_rate": 3.4053789706773115e-06, "loss": 0.175, "step": 331325 }, { "epoch": 3.26, "grad_norm": 10.903921127319336, "learning_rate": 3.405254848223063e-06, "loss": 0.0831, "step": 331350 }, { "epoch": 3.26, "grad_norm": 4.363287448883057, "learning_rate": 3.4051307257688147e-06, "loss": 0.1682, "step": 331375 }, { "epoch": 3.26, "grad_norm": 10.508602142333984, "learning_rate": 3.4050066033145664e-06, "loss": 0.0721, "step": 331400 }, { "epoch": 3.26, "grad_norm": 5.501246452331543, "learning_rate": 3.4048824808603176e-06, "loss": 0.1798, "step": 331425 }, { "epoch": 3.26, "grad_norm": 5.451865196228027, "learning_rate": 3.4047583584060696e-06, "loss": 0.0885, "step": 331450 }, { "epoch": 3.26, "grad_norm": 4.730838298797607, "learning_rate": 3.404634235951821e-06, "loss": 0.1736, "step": 331475 }, { "epoch": 3.26, "grad_norm": 5.113295078277588, "learning_rate": 3.404510113497572e-06, "loss": 0.0715, "step": 331500 }, { "epoch": 3.26, "grad_norm": 3.181814432144165, "learning_rate": 3.404385991043324e-06, "loss": 0.2101, "step": 331525 }, { "epoch": 3.26, "grad_norm": 9.341031074523926, "learning_rate": 3.4042618685890753e-06, "loss": 0.08, "step": 331550 }, { "epoch": 3.26, "grad_norm": 6.081996917724609, "learning_rate": 3.404137746134827e-06, "loss": 0.1845, "step": 331575 }, { "epoch": 3.26, "grad_norm": 7.374946117401123, "learning_rate": 3.4040136236805786e-06, "loss": 0.0915, "step": 331600 }, { "epoch": 3.26, "grad_norm": 4.294785976409912, "learning_rate": 3.4038895012263302e-06, "loss": 0.1366, "step": 331625 }, { "epoch": 3.26, "grad_norm": 11.368206024169922, "learning_rate": 3.4037653787720814e-06, "loss": 0.0791, "step": 331650 }, { "epoch": 3.26, "grad_norm": 6.3563232421875, "learning_rate": 3.4036412563178335e-06, "loss": 0.1819, "step": 331675 }, { "epoch": 3.26, "grad_norm": 5.108701705932617, "learning_rate": 3.4035171338635847e-06, "loss": 0.0683, "step": 331700 }, { "epoch": 3.26, "grad_norm": 5.548494338989258, "learning_rate": 3.403393011409336e-06, "loss": 0.2082, "step": 331725 }, { "epoch": 3.26, "grad_norm": 4.801632404327393, "learning_rate": 3.403268888955088e-06, "loss": 0.071, "step": 331750 }, { "epoch": 3.26, "grad_norm": 7.8986711502075195, "learning_rate": 3.403144766500839e-06, "loss": 0.1648, "step": 331775 }, { "epoch": 3.26, "grad_norm": 7.288205623626709, "learning_rate": 3.403020644046591e-06, "loss": 0.0841, "step": 331800 }, { "epoch": 3.26, "grad_norm": 2.9068403244018555, "learning_rate": 3.4029014864905123e-06, "loss": 0.1831, "step": 331825 }, { "epoch": 3.26, "grad_norm": 6.266060829162598, "learning_rate": 3.402777364036264e-06, "loss": 0.0833, "step": 331850 }, { "epoch": 3.26, "grad_norm": 5.352452754974365, "learning_rate": 3.402653241582015e-06, "loss": 0.1491, "step": 331875 }, { "epoch": 3.26, "grad_norm": 10.077751159667969, "learning_rate": 3.402529119127767e-06, "loss": 0.0772, "step": 331900 }, { "epoch": 3.26, "grad_norm": 5.23598575592041, "learning_rate": 3.4024049966735184e-06, "loss": 0.2186, "step": 331925 }, { "epoch": 3.26, "grad_norm": 14.059643745422363, "learning_rate": 3.4022808742192696e-06, "loss": 0.0586, "step": 331950 }, { "epoch": 3.26, "grad_norm": 9.856768608093262, "learning_rate": 3.4021567517650217e-06, "loss": 0.1871, "step": 331975 }, { "epoch": 3.26, "grad_norm": 6.462353229522705, "learning_rate": 3.402032629310773e-06, "loss": 0.0776, "step": 332000 }, { "epoch": 3.26, "grad_norm": 4.888151168823242, "learning_rate": 3.4019085068565245e-06, "loss": 0.1268, "step": 332025 }, { "epoch": 3.26, "grad_norm": 8.602148056030273, "learning_rate": 3.401784384402276e-06, "loss": 0.0783, "step": 332050 }, { "epoch": 3.27, "grad_norm": 4.138673782348633, "learning_rate": 3.4016602619480278e-06, "loss": 0.1779, "step": 332075 }, { "epoch": 3.27, "grad_norm": 4.638534069061279, "learning_rate": 3.401536139493779e-06, "loss": 0.0661, "step": 332100 }, { "epoch": 3.27, "grad_norm": 13.217716217041016, "learning_rate": 3.401412017039531e-06, "loss": 0.2051, "step": 332125 }, { "epoch": 3.27, "grad_norm": 8.044610977172852, "learning_rate": 3.4012878945852823e-06, "loss": 0.0595, "step": 332150 }, { "epoch": 3.27, "grad_norm": 6.158946514129639, "learning_rate": 3.4011637721310335e-06, "loss": 0.1499, "step": 332175 }, { "epoch": 3.27, "grad_norm": 5.02610445022583, "learning_rate": 3.4010396496767855e-06, "loss": 0.0656, "step": 332200 }, { "epoch": 3.27, "grad_norm": 5.122600078582764, "learning_rate": 3.4009155272225367e-06, "loss": 0.2048, "step": 332225 }, { "epoch": 3.27, "grad_norm": 11.67148494720459, "learning_rate": 3.4007914047682884e-06, "loss": 0.0611, "step": 332250 }, { "epoch": 3.27, "grad_norm": 4.2703447341918945, "learning_rate": 3.40066728231404e-06, "loss": 0.1845, "step": 332275 }, { "epoch": 3.27, "grad_norm": 8.674220085144043, "learning_rate": 3.4005431598597917e-06, "loss": 0.0936, "step": 332300 }, { "epoch": 3.27, "grad_norm": 5.759368419647217, "learning_rate": 3.400419037405543e-06, "loss": 0.2232, "step": 332325 }, { "epoch": 3.27, "grad_norm": 3.2900588512420654, "learning_rate": 3.400294914951295e-06, "loss": 0.0641, "step": 332350 }, { "epoch": 3.27, "grad_norm": 4.760621547698975, "learning_rate": 3.400170792497046e-06, "loss": 0.1519, "step": 332375 }, { "epoch": 3.27, "grad_norm": 11.033550262451172, "learning_rate": 3.4000466700427973e-06, "loss": 0.0605, "step": 332400 }, { "epoch": 3.27, "grad_norm": 5.27181339263916, "learning_rate": 3.3999225475885494e-06, "loss": 0.2281, "step": 332425 }, { "epoch": 3.27, "grad_norm": 11.843724250793457, "learning_rate": 3.3997984251343006e-06, "loss": 0.0841, "step": 332450 }, { "epoch": 3.27, "grad_norm": 3.627708911895752, "learning_rate": 3.3996743026800522e-06, "loss": 0.1779, "step": 332475 }, { "epoch": 3.27, "grad_norm": 8.268097877502441, "learning_rate": 3.399550180225804e-06, "loss": 0.0778, "step": 332500 }, { "epoch": 3.27, "grad_norm": 3.581155776977539, "learning_rate": 3.3994260577715555e-06, "loss": 0.1754, "step": 332525 }, { "epoch": 3.27, "grad_norm": 5.943260669708252, "learning_rate": 3.3993019353173067e-06, "loss": 0.0464, "step": 332550 }, { "epoch": 3.27, "grad_norm": 4.433899879455566, "learning_rate": 3.399177812863059e-06, "loss": 0.1578, "step": 332575 }, { "epoch": 3.27, "grad_norm": 10.921414375305176, "learning_rate": 3.39905369040881e-06, "loss": 0.069, "step": 332600 }, { "epoch": 3.27, "grad_norm": 4.587649345397949, "learning_rate": 3.3989295679545612e-06, "loss": 0.1764, "step": 332625 }, { "epoch": 3.27, "grad_norm": 13.3612060546875, "learning_rate": 3.3988054455003133e-06, "loss": 0.064, "step": 332650 }, { "epoch": 3.27, "grad_norm": 5.249680519104004, "learning_rate": 3.3986813230460645e-06, "loss": 0.1905, "step": 332675 }, { "epoch": 3.27, "grad_norm": 0.6706933975219727, "learning_rate": 3.398557200591816e-06, "loss": 0.0718, "step": 332700 }, { "epoch": 3.27, "grad_norm": 6.508938312530518, "learning_rate": 3.3984330781375678e-06, "loss": 0.156, "step": 332725 }, { "epoch": 3.27, "grad_norm": 10.115640640258789, "learning_rate": 3.3983089556833194e-06, "loss": 0.0813, "step": 332750 }, { "epoch": 3.27, "grad_norm": 4.541810512542725, "learning_rate": 3.3981848332290706e-06, "loss": 0.201, "step": 332775 }, { "epoch": 3.27, "grad_norm": 8.23853588104248, "learning_rate": 3.3980607107748222e-06, "loss": 0.0856, "step": 332800 }, { "epoch": 3.27, "grad_norm": 4.1070427894592285, "learning_rate": 3.397936588320574e-06, "loss": 0.1767, "step": 332825 }, { "epoch": 3.27, "grad_norm": 7.158209800720215, "learning_rate": 3.3978124658663255e-06, "loss": 0.0589, "step": 332850 }, { "epoch": 3.27, "grad_norm": 3.535700559616089, "learning_rate": 3.3976883434120767e-06, "loss": 0.1602, "step": 332875 }, { "epoch": 3.27, "grad_norm": 9.244157791137695, "learning_rate": 3.3975642209578288e-06, "loss": 0.0625, "step": 332900 }, { "epoch": 3.27, "grad_norm": 4.758669853210449, "learning_rate": 3.39744009850358e-06, "loss": 0.1774, "step": 332925 }, { "epoch": 3.27, "grad_norm": 7.4459099769592285, "learning_rate": 3.397315976049331e-06, "loss": 0.0664, "step": 332950 }, { "epoch": 3.27, "grad_norm": 4.634615898132324, "learning_rate": 3.3971918535950833e-06, "loss": 0.1838, "step": 332975 }, { "epoch": 3.27, "grad_norm": 11.115948677062988, "learning_rate": 3.3970677311408345e-06, "loss": 0.0755, "step": 333000 }, { "epoch": 3.27, "grad_norm": 6.010265350341797, "learning_rate": 3.396943608686586e-06, "loss": 0.2194, "step": 333025 }, { "epoch": 3.27, "grad_norm": 8.283760070800781, "learning_rate": 3.3968194862323377e-06, "loss": 0.0766, "step": 333050 }, { "epoch": 3.27, "grad_norm": 5.268774032592773, "learning_rate": 3.3966953637780894e-06, "loss": 0.1465, "step": 333075 }, { "epoch": 3.28, "grad_norm": 7.816094398498535, "learning_rate": 3.3965712413238406e-06, "loss": 0.0708, "step": 333100 }, { "epoch": 3.28, "grad_norm": 3.903827428817749, "learning_rate": 3.3964471188695926e-06, "loss": 0.1952, "step": 333125 }, { "epoch": 3.28, "grad_norm": 8.272769927978516, "learning_rate": 3.396322996415344e-06, "loss": 0.0631, "step": 333150 }, { "epoch": 3.28, "grad_norm": 3.7910077571868896, "learning_rate": 3.396198873961095e-06, "loss": 0.179, "step": 333175 }, { "epoch": 3.28, "grad_norm": 8.519214630126953, "learning_rate": 3.396074751506847e-06, "loss": 0.066, "step": 333200 }, { "epoch": 3.28, "grad_norm": 3.644491195678711, "learning_rate": 3.3959506290525983e-06, "loss": 0.1715, "step": 333225 }, { "epoch": 3.28, "grad_norm": 11.112944602966309, "learning_rate": 3.39582650659835e-06, "loss": 0.0741, "step": 333250 }, { "epoch": 3.28, "grad_norm": 3.798354148864746, "learning_rate": 3.3957023841441016e-06, "loss": 0.1466, "step": 333275 }, { "epoch": 3.28, "grad_norm": 5.858020782470703, "learning_rate": 3.3955782616898532e-06, "loss": 0.0689, "step": 333300 }, { "epoch": 3.28, "grad_norm": 3.5777435302734375, "learning_rate": 3.3954541392356045e-06, "loss": 0.2098, "step": 333325 }, { "epoch": 3.28, "grad_norm": 10.787718772888184, "learning_rate": 3.3953300167813565e-06, "loss": 0.0938, "step": 333350 }, { "epoch": 3.28, "grad_norm": 6.3131842613220215, "learning_rate": 3.3952058943271077e-06, "loss": 0.1662, "step": 333375 }, { "epoch": 3.28, "grad_norm": 7.249447822570801, "learning_rate": 3.395081771872859e-06, "loss": 0.0638, "step": 333400 }, { "epoch": 3.28, "grad_norm": 4.615776062011719, "learning_rate": 3.394957649418611e-06, "loss": 0.1443, "step": 333425 }, { "epoch": 3.28, "grad_norm": 8.952034950256348, "learning_rate": 3.394833526964362e-06, "loss": 0.0878, "step": 333450 }, { "epoch": 3.28, "grad_norm": 4.384696006774902, "learning_rate": 3.394709404510114e-06, "loss": 0.2026, "step": 333475 }, { "epoch": 3.28, "grad_norm": 8.886404037475586, "learning_rate": 3.3945852820558655e-06, "loss": 0.0578, "step": 333500 }, { "epoch": 3.28, "grad_norm": 3.8297152519226074, "learning_rate": 3.394461159601617e-06, "loss": 0.1918, "step": 333525 }, { "epoch": 3.28, "grad_norm": 8.140973091125488, "learning_rate": 3.3943370371473683e-06, "loss": 0.0684, "step": 333550 }, { "epoch": 3.28, "grad_norm": 7.526733875274658, "learning_rate": 3.3942129146931204e-06, "loss": 0.1461, "step": 333575 }, { "epoch": 3.28, "grad_norm": 10.203938484191895, "learning_rate": 3.3940887922388716e-06, "loss": 0.0493, "step": 333600 }, { "epoch": 3.28, "grad_norm": 3.6030726432800293, "learning_rate": 3.393964669784623e-06, "loss": 0.222, "step": 333625 }, { "epoch": 3.28, "grad_norm": 10.40263557434082, "learning_rate": 3.3938405473303744e-06, "loss": 0.0614, "step": 333650 }, { "epoch": 3.28, "grad_norm": 5.62547492980957, "learning_rate": 3.393716424876126e-06, "loss": 0.1892, "step": 333675 }, { "epoch": 3.28, "grad_norm": 7.121222972869873, "learning_rate": 3.3935923024218777e-06, "loss": 0.1004, "step": 333700 }, { "epoch": 3.28, "grad_norm": 6.976045608520508, "learning_rate": 3.393468179967629e-06, "loss": 0.2016, "step": 333725 }, { "epoch": 3.28, "grad_norm": 6.947322845458984, "learning_rate": 3.393344057513381e-06, "loss": 0.0671, "step": 333750 }, { "epoch": 3.28, "grad_norm": 4.221195697784424, "learning_rate": 3.393219935059132e-06, "loss": 0.1773, "step": 333775 }, { "epoch": 3.28, "grad_norm": 7.690750598907471, "learning_rate": 3.3930958126048834e-06, "loss": 0.0838, "step": 333800 }, { "epoch": 3.28, "grad_norm": 7.406391620635986, "learning_rate": 3.3929716901506355e-06, "loss": 0.1817, "step": 333825 }, { "epoch": 3.28, "grad_norm": 3.30594801902771, "learning_rate": 3.3928475676963867e-06, "loss": 0.0734, "step": 333850 }, { "epoch": 3.28, "grad_norm": 6.18604850769043, "learning_rate": 3.3927234452421383e-06, "loss": 0.1862, "step": 333875 }, { "epoch": 3.28, "grad_norm": 4.784651279449463, "learning_rate": 3.39259932278789e-06, "loss": 0.0629, "step": 333900 }, { "epoch": 3.28, "grad_norm": 5.222762107849121, "learning_rate": 3.3924752003336416e-06, "loss": 0.1697, "step": 333925 }, { "epoch": 3.28, "grad_norm": 5.761200428009033, "learning_rate": 3.3923510778793928e-06, "loss": 0.0824, "step": 333950 }, { "epoch": 3.28, "grad_norm": 5.2152276039123535, "learning_rate": 3.392226955425145e-06, "loss": 0.2013, "step": 333975 }, { "epoch": 3.28, "grad_norm": 12.167186737060547, "learning_rate": 3.392102832970896e-06, "loss": 0.0767, "step": 334000 }, { "epoch": 3.28, "grad_norm": 5.458395004272461, "learning_rate": 3.3919787105166473e-06, "loss": 0.1673, "step": 334025 }, { "epoch": 3.28, "grad_norm": 12.58460521697998, "learning_rate": 3.3918545880623993e-06, "loss": 0.0836, "step": 334050 }, { "epoch": 3.28, "grad_norm": 3.884896755218506, "learning_rate": 3.3917304656081505e-06, "loss": 0.2218, "step": 334075 }, { "epoch": 3.28, "grad_norm": 8.313478469848633, "learning_rate": 3.391606343153902e-06, "loss": 0.0786, "step": 334100 }, { "epoch": 3.29, "grad_norm": 7.1268839836120605, "learning_rate": 3.391482220699654e-06, "loss": 0.1775, "step": 334125 }, { "epoch": 3.29, "grad_norm": 4.055566787719727, "learning_rate": 3.3913580982454054e-06, "loss": 0.0712, "step": 334150 }, { "epoch": 3.29, "grad_norm": 4.8288679122924805, "learning_rate": 3.3912389406893265e-06, "loss": 0.1857, "step": 334175 }, { "epoch": 3.29, "grad_norm": 7.527451038360596, "learning_rate": 3.3911148182350785e-06, "loss": 0.0677, "step": 334200 }, { "epoch": 3.29, "grad_norm": 6.50435209274292, "learning_rate": 3.3909906957808297e-06, "loss": 0.16, "step": 334225 }, { "epoch": 3.29, "grad_norm": 12.229607582092285, "learning_rate": 3.390866573326581e-06, "loss": 0.0665, "step": 334250 }, { "epoch": 3.29, "grad_norm": 3.428619861602783, "learning_rate": 3.390742450872333e-06, "loss": 0.1881, "step": 334275 }, { "epoch": 3.29, "grad_norm": 10.003571510314941, "learning_rate": 3.3906183284180842e-06, "loss": 0.0845, "step": 334300 }, { "epoch": 3.29, "grad_norm": 6.195563793182373, "learning_rate": 3.390494205963836e-06, "loss": 0.2057, "step": 334325 }, { "epoch": 3.29, "grad_norm": 5.802414417266846, "learning_rate": 3.3903700835095875e-06, "loss": 0.0638, "step": 334350 }, { "epoch": 3.29, "grad_norm": 4.666550159454346, "learning_rate": 3.390245961055339e-06, "loss": 0.1843, "step": 334375 }, { "epoch": 3.29, "grad_norm": 5.164852619171143, "learning_rate": 3.3901218386010903e-06, "loss": 0.0628, "step": 334400 }, { "epoch": 3.29, "grad_norm": 5.9368977546691895, "learning_rate": 3.3899977161468424e-06, "loss": 0.1726, "step": 334425 }, { "epoch": 3.29, "grad_norm": 9.592652320861816, "learning_rate": 3.3898735936925936e-06, "loss": 0.0802, "step": 334450 }, { "epoch": 3.29, "grad_norm": 4.148362159729004, "learning_rate": 3.389749471238345e-06, "loss": 0.1837, "step": 334475 }, { "epoch": 3.29, "grad_norm": 0.8286067843437195, "learning_rate": 3.389625348784097e-06, "loss": 0.0698, "step": 334500 }, { "epoch": 3.29, "grad_norm": 3.8014628887176514, "learning_rate": 3.389501226329848e-06, "loss": 0.1883, "step": 334525 }, { "epoch": 3.29, "grad_norm": 4.593335151672363, "learning_rate": 3.3893771038755997e-06, "loss": 0.0696, "step": 334550 }, { "epoch": 3.29, "grad_norm": 7.949334144592285, "learning_rate": 3.3892529814213514e-06, "loss": 0.1855, "step": 334575 }, { "epoch": 3.29, "grad_norm": 2.3567941188812256, "learning_rate": 3.389128858967103e-06, "loss": 0.0688, "step": 334600 }, { "epoch": 3.29, "grad_norm": 4.722928524017334, "learning_rate": 3.389004736512854e-06, "loss": 0.1594, "step": 334625 }, { "epoch": 3.29, "grad_norm": 12.719712257385254, "learning_rate": 3.3888806140586063e-06, "loss": 0.0739, "step": 334650 }, { "epoch": 3.29, "grad_norm": 6.360005855560303, "learning_rate": 3.3887564916043575e-06, "loss": 0.1449, "step": 334675 }, { "epoch": 3.29, "grad_norm": 5.581308364868164, "learning_rate": 3.3886323691501087e-06, "loss": 0.0611, "step": 334700 }, { "epoch": 3.29, "grad_norm": 5.922702312469482, "learning_rate": 3.3885082466958607e-06, "loss": 0.1719, "step": 334725 }, { "epoch": 3.29, "grad_norm": 9.041354179382324, "learning_rate": 3.388384124241612e-06, "loss": 0.0844, "step": 334750 }, { "epoch": 3.29, "grad_norm": 4.9923505783081055, "learning_rate": 3.3882600017873636e-06, "loss": 0.1578, "step": 334775 }, { "epoch": 3.29, "grad_norm": 7.536735534667969, "learning_rate": 3.3881358793331152e-06, "loss": 0.0705, "step": 334800 }, { "epoch": 3.29, "grad_norm": 5.294032096862793, "learning_rate": 3.388011756878867e-06, "loss": 0.1712, "step": 334825 }, { "epoch": 3.29, "grad_norm": 5.959502696990967, "learning_rate": 3.387887634424618e-06, "loss": 0.0449, "step": 334850 }, { "epoch": 3.29, "grad_norm": 4.3059539794921875, "learning_rate": 3.38776351197037e-06, "loss": 0.1554, "step": 334875 }, { "epoch": 3.29, "grad_norm": 7.434376239776611, "learning_rate": 3.3876393895161213e-06, "loss": 0.0634, "step": 334900 }, { "epoch": 3.29, "grad_norm": 5.226577281951904, "learning_rate": 3.3875152670618726e-06, "loss": 0.1788, "step": 334925 }, { "epoch": 3.29, "grad_norm": 19.13877296447754, "learning_rate": 3.3873911446076246e-06, "loss": 0.0733, "step": 334950 }, { "epoch": 3.29, "grad_norm": 4.671113014221191, "learning_rate": 3.387267022153376e-06, "loss": 0.1727, "step": 334975 }, { "epoch": 3.29, "grad_norm": 11.831302642822266, "learning_rate": 3.3871428996991275e-06, "loss": 0.0693, "step": 335000 }, { "epoch": 3.29, "grad_norm": 4.036879539489746, "learning_rate": 3.3870187772448787e-06, "loss": 0.182, "step": 335025 }, { "epoch": 3.29, "grad_norm": 10.283425331115723, "learning_rate": 3.3868946547906307e-06, "loss": 0.0729, "step": 335050 }, { "epoch": 3.29, "grad_norm": 5.431056499481201, "learning_rate": 3.386770532336382e-06, "loss": 0.1789, "step": 335075 }, { "epoch": 3.29, "grad_norm": 3.5156402587890625, "learning_rate": 3.386646409882133e-06, "loss": 0.062, "step": 335100 }, { "epoch": 3.3, "grad_norm": 4.896014213562012, "learning_rate": 3.386522287427885e-06, "loss": 0.1355, "step": 335125 }, { "epoch": 3.3, "grad_norm": 4.720084190368652, "learning_rate": 3.3863981649736364e-06, "loss": 0.0746, "step": 335150 }, { "epoch": 3.3, "grad_norm": 4.471277713775635, "learning_rate": 3.386274042519388e-06, "loss": 0.1416, "step": 335175 }, { "epoch": 3.3, "grad_norm": 9.869226455688477, "learning_rate": 3.3861499200651397e-06, "loss": 0.0642, "step": 335200 }, { "epoch": 3.3, "grad_norm": 4.4742865562438965, "learning_rate": 3.3860257976108913e-06, "loss": 0.1883, "step": 335225 }, { "epoch": 3.3, "grad_norm": 7.960664749145508, "learning_rate": 3.3859016751566425e-06, "loss": 0.0818, "step": 335250 }, { "epoch": 3.3, "grad_norm": 8.135560989379883, "learning_rate": 3.3857775527023946e-06, "loss": 0.1822, "step": 335275 }, { "epoch": 3.3, "grad_norm": 0.8147193789482117, "learning_rate": 3.385653430248146e-06, "loss": 0.0511, "step": 335300 }, { "epoch": 3.3, "grad_norm": 7.321844100952148, "learning_rate": 3.385529307793897e-06, "loss": 0.18, "step": 335325 }, { "epoch": 3.3, "grad_norm": 6.385264873504639, "learning_rate": 3.385405185339649e-06, "loss": 0.0657, "step": 335350 }, { "epoch": 3.3, "grad_norm": 4.3110032081604, "learning_rate": 3.3852810628854003e-06, "loss": 0.1846, "step": 335375 }, { "epoch": 3.3, "grad_norm": 5.5603203773498535, "learning_rate": 3.385156940431152e-06, "loss": 0.0728, "step": 335400 }, { "epoch": 3.3, "grad_norm": 4.594220161437988, "learning_rate": 3.3850328179769036e-06, "loss": 0.1839, "step": 335425 }, { "epoch": 3.3, "grad_norm": 13.074259757995605, "learning_rate": 3.384908695522655e-06, "loss": 0.069, "step": 335450 }, { "epoch": 3.3, "grad_norm": 6.5682454109191895, "learning_rate": 3.3847845730684064e-06, "loss": 0.2123, "step": 335475 }, { "epoch": 3.3, "grad_norm": 5.337988376617432, "learning_rate": 3.3846604506141585e-06, "loss": 0.0561, "step": 335500 }, { "epoch": 3.3, "grad_norm": 3.949972152709961, "learning_rate": 3.3845363281599097e-06, "loss": 0.132, "step": 335525 }, { "epoch": 3.3, "grad_norm": 4.729210376739502, "learning_rate": 3.384412205705661e-06, "loss": 0.06, "step": 335550 }, { "epoch": 3.3, "grad_norm": 5.124168395996094, "learning_rate": 3.384288083251413e-06, "loss": 0.2038, "step": 335575 }, { "epoch": 3.3, "grad_norm": 12.330876350402832, "learning_rate": 3.384163960797164e-06, "loss": 0.0607, "step": 335600 }, { "epoch": 3.3, "grad_norm": 4.168741226196289, "learning_rate": 3.384039838342916e-06, "loss": 0.185, "step": 335625 }, { "epoch": 3.3, "grad_norm": 9.050237655639648, "learning_rate": 3.3839157158886674e-06, "loss": 0.0503, "step": 335650 }, { "epoch": 3.3, "grad_norm": 3.7658727169036865, "learning_rate": 3.383791593434419e-06, "loss": 0.1716, "step": 335675 }, { "epoch": 3.3, "grad_norm": 9.345342636108398, "learning_rate": 3.3836674709801703e-06, "loss": 0.0626, "step": 335700 }, { "epoch": 3.3, "grad_norm": 4.437014579772949, "learning_rate": 3.3835433485259223e-06, "loss": 0.191, "step": 335725 }, { "epoch": 3.3, "grad_norm": 11.371648788452148, "learning_rate": 3.3834192260716735e-06, "loss": 0.072, "step": 335750 }, { "epoch": 3.3, "grad_norm": 6.151017665863037, "learning_rate": 3.383295103617425e-06, "loss": 0.1643, "step": 335775 }, { "epoch": 3.3, "grad_norm": 6.352016925811768, "learning_rate": 3.383170981163177e-06, "loss": 0.0738, "step": 335800 }, { "epoch": 3.3, "grad_norm": 5.1291303634643555, "learning_rate": 3.3830468587089284e-06, "loss": 0.1742, "step": 335825 }, { "epoch": 3.3, "grad_norm": 13.682634353637695, "learning_rate": 3.3829227362546797e-06, "loss": 0.0801, "step": 335850 }, { "epoch": 3.3, "grad_norm": 3.859572172164917, "learning_rate": 3.382798613800431e-06, "loss": 0.1743, "step": 335875 }, { "epoch": 3.3, "grad_norm": 2.6760666370391846, "learning_rate": 3.382674491346183e-06, "loss": 0.0414, "step": 335900 }, { "epoch": 3.3, "grad_norm": 5.701512813568115, "learning_rate": 3.382550368891934e-06, "loss": 0.1357, "step": 335925 }, { "epoch": 3.3, "grad_norm": 4.870316505432129, "learning_rate": 3.3824262464376858e-06, "loss": 0.0745, "step": 335950 }, { "epoch": 3.3, "grad_norm": 4.236773490905762, "learning_rate": 3.3823021239834374e-06, "loss": 0.1624, "step": 335975 }, { "epoch": 3.3, "grad_norm": 4.76127290725708, "learning_rate": 3.382178001529189e-06, "loss": 0.0499, "step": 336000 }, { "epoch": 3.3, "grad_norm": 3.561911106109619, "learning_rate": 3.3820538790749403e-06, "loss": 0.2013, "step": 336025 }, { "epoch": 3.3, "grad_norm": 6.630865573883057, "learning_rate": 3.3819297566206923e-06, "loss": 0.0683, "step": 336050 }, { "epoch": 3.3, "grad_norm": 4.020751476287842, "learning_rate": 3.3818056341664435e-06, "loss": 0.1835, "step": 336075 }, { "epoch": 3.3, "grad_norm": 8.816449165344238, "learning_rate": 3.3816815117121947e-06, "loss": 0.063, "step": 336100 }, { "epoch": 3.3, "grad_norm": 4.047941207885742, "learning_rate": 3.381557389257947e-06, "loss": 0.191, "step": 336125 }, { "epoch": 3.31, "grad_norm": 5.942721843719482, "learning_rate": 3.381433266803698e-06, "loss": 0.0635, "step": 336150 }, { "epoch": 3.31, "grad_norm": 3.999882221221924, "learning_rate": 3.3813091443494496e-06, "loss": 0.1624, "step": 336175 }, { "epoch": 3.31, "grad_norm": 9.488557815551758, "learning_rate": 3.3811850218952013e-06, "loss": 0.0681, "step": 336200 }, { "epoch": 3.31, "grad_norm": 6.138167381286621, "learning_rate": 3.381060899440953e-06, "loss": 0.1795, "step": 336225 }, { "epoch": 3.31, "grad_norm": 5.073934555053711, "learning_rate": 3.380936776986704e-06, "loss": 0.1, "step": 336250 }, { "epoch": 3.31, "grad_norm": 6.320913791656494, "learning_rate": 3.380812654532456e-06, "loss": 0.1948, "step": 336275 }, { "epoch": 3.31, "grad_norm": 6.3396477699279785, "learning_rate": 3.3806885320782074e-06, "loss": 0.0605, "step": 336300 }, { "epoch": 3.31, "grad_norm": 3.840993881225586, "learning_rate": 3.3805644096239586e-06, "loss": 0.1653, "step": 336325 }, { "epoch": 3.31, "grad_norm": 2.03429913520813, "learning_rate": 3.3804402871697107e-06, "loss": 0.0645, "step": 336350 }, { "epoch": 3.31, "grad_norm": 6.39601469039917, "learning_rate": 3.380316164715462e-06, "loss": 0.1676, "step": 336375 }, { "epoch": 3.31, "grad_norm": 5.202895641326904, "learning_rate": 3.3801920422612135e-06, "loss": 0.0638, "step": 336400 }, { "epoch": 3.31, "grad_norm": 5.455444812774658, "learning_rate": 3.380072884705135e-06, "loss": 0.1681, "step": 336425 }, { "epoch": 3.31, "grad_norm": 7.272479057312012, "learning_rate": 3.3799487622508866e-06, "loss": 0.0885, "step": 336450 }, { "epoch": 3.31, "grad_norm": 4.619099140167236, "learning_rate": 3.379824639796638e-06, "loss": 0.1477, "step": 336475 }, { "epoch": 3.31, "grad_norm": 7.413942337036133, "learning_rate": 3.37970051734239e-06, "loss": 0.07, "step": 336500 }, { "epoch": 3.31, "grad_norm": 4.321178436279297, "learning_rate": 3.379576394888141e-06, "loss": 0.1719, "step": 336525 }, { "epoch": 3.31, "grad_norm": 8.499951362609863, "learning_rate": 3.3794522724338923e-06, "loss": 0.0947, "step": 336550 }, { "epoch": 3.31, "grad_norm": 4.392714023590088, "learning_rate": 3.3793281499796443e-06, "loss": 0.1781, "step": 336575 }, { "epoch": 3.31, "grad_norm": 9.94898509979248, "learning_rate": 3.3792040275253956e-06, "loss": 0.0717, "step": 336600 }, { "epoch": 3.31, "grad_norm": 4.996943950653076, "learning_rate": 3.379079905071147e-06, "loss": 0.1492, "step": 336625 }, { "epoch": 3.31, "grad_norm": 8.784162521362305, "learning_rate": 3.378955782616899e-06, "loss": 0.0578, "step": 336650 }, { "epoch": 3.31, "grad_norm": 6.032190322875977, "learning_rate": 3.3788316601626505e-06, "loss": 0.1605, "step": 336675 }, { "epoch": 3.31, "grad_norm": 4.747697353363037, "learning_rate": 3.3787075377084017e-06, "loss": 0.0788, "step": 336700 }, { "epoch": 3.31, "grad_norm": 2.883272886276245, "learning_rate": 3.3785834152541537e-06, "loss": 0.1586, "step": 336725 }, { "epoch": 3.31, "grad_norm": 3.451639413833618, "learning_rate": 3.378459292799905e-06, "loss": 0.0576, "step": 336750 }, { "epoch": 3.31, "grad_norm": 4.564689636230469, "learning_rate": 3.378335170345656e-06, "loss": 0.1904, "step": 336775 }, { "epoch": 3.31, "grad_norm": 12.99045467376709, "learning_rate": 3.3782110478914082e-06, "loss": 0.051, "step": 336800 }, { "epoch": 3.31, "grad_norm": 4.629150390625, "learning_rate": 3.3780869254371594e-06, "loss": 0.2037, "step": 336825 }, { "epoch": 3.31, "grad_norm": 5.9517292976379395, "learning_rate": 3.377962802982911e-06, "loss": 0.0641, "step": 336850 }, { "epoch": 3.31, "grad_norm": 4.593327045440674, "learning_rate": 3.3778386805286627e-06, "loss": 0.1866, "step": 336875 }, { "epoch": 3.31, "grad_norm": 9.29897689819336, "learning_rate": 3.3777145580744143e-06, "loss": 0.0702, "step": 336900 }, { "epoch": 3.31, "grad_norm": 5.622104644775391, "learning_rate": 3.3775904356201655e-06, "loss": 0.184, "step": 336925 }, { "epoch": 3.31, "grad_norm": 11.06243896484375, "learning_rate": 3.3774663131659176e-06, "loss": 0.0696, "step": 336950 }, { "epoch": 3.31, "grad_norm": 12.349837303161621, "learning_rate": 3.377342190711669e-06, "loss": 0.1784, "step": 336975 }, { "epoch": 3.31, "grad_norm": 5.647995471954346, "learning_rate": 3.37721806825742e-06, "loss": 0.0568, "step": 337000 }, { "epoch": 3.31, "grad_norm": 5.060055732727051, "learning_rate": 3.377093945803172e-06, "loss": 0.2034, "step": 337025 }, { "epoch": 3.31, "grad_norm": 11.509166717529297, "learning_rate": 3.3769698233489233e-06, "loss": 0.0756, "step": 337050 }, { "epoch": 3.31, "grad_norm": 4.708572864532471, "learning_rate": 3.376845700894675e-06, "loss": 0.1805, "step": 337075 }, { "epoch": 3.31, "grad_norm": 4.197775363922119, "learning_rate": 3.3767215784404266e-06, "loss": 0.0636, "step": 337100 }, { "epoch": 3.31, "grad_norm": 5.737817764282227, "learning_rate": 3.376597455986178e-06, "loss": 0.1979, "step": 337125 }, { "epoch": 3.31, "grad_norm": 8.388619422912598, "learning_rate": 3.3764733335319294e-06, "loss": 0.0715, "step": 337150 }, { "epoch": 3.32, "grad_norm": 3.32373046875, "learning_rate": 3.3763492110776815e-06, "loss": 0.198, "step": 337175 }, { "epoch": 3.32, "grad_norm": 4.3088250160217285, "learning_rate": 3.3762250886234327e-06, "loss": 0.0578, "step": 337200 }, { "epoch": 3.32, "grad_norm": 4.983644008636475, "learning_rate": 3.376100966169184e-06, "loss": 0.1895, "step": 337225 }, { "epoch": 3.32, "grad_norm": 8.64224910736084, "learning_rate": 3.375976843714936e-06, "loss": 0.0746, "step": 337250 }, { "epoch": 3.32, "grad_norm": 4.692904949188232, "learning_rate": 3.375852721260687e-06, "loss": 0.1649, "step": 337275 }, { "epoch": 3.32, "grad_norm": 4.827521324157715, "learning_rate": 3.375728598806439e-06, "loss": 0.0662, "step": 337300 }, { "epoch": 3.32, "grad_norm": 5.171250343322754, "learning_rate": 3.37560447635219e-06, "loss": 0.1711, "step": 337325 }, { "epoch": 3.32, "grad_norm": 11.409253120422363, "learning_rate": 3.375480353897942e-06, "loss": 0.0794, "step": 337350 }, { "epoch": 3.32, "grad_norm": 4.361122131347656, "learning_rate": 3.3753562314436933e-06, "loss": 0.1545, "step": 337375 }, { "epoch": 3.32, "grad_norm": 10.17105770111084, "learning_rate": 3.3752321089894445e-06, "loss": 0.081, "step": 337400 }, { "epoch": 3.32, "grad_norm": 5.639451503753662, "learning_rate": 3.3751079865351965e-06, "loss": 0.2544, "step": 337425 }, { "epoch": 3.32, "grad_norm": 14.077750205993652, "learning_rate": 3.3749838640809478e-06, "loss": 0.0823, "step": 337450 }, { "epoch": 3.32, "grad_norm": 5.083176612854004, "learning_rate": 3.3748597416266994e-06, "loss": 0.1613, "step": 337475 }, { "epoch": 3.32, "grad_norm": 2.252720355987549, "learning_rate": 3.374735619172451e-06, "loss": 0.08, "step": 337500 }, { "epoch": 3.32, "grad_norm": 2.9971790313720703, "learning_rate": 3.3746114967182027e-06, "loss": 0.1988, "step": 337525 }, { "epoch": 3.32, "grad_norm": 8.281357765197754, "learning_rate": 3.374487374263954e-06, "loss": 0.0562, "step": 337550 }, { "epoch": 3.32, "grad_norm": 3.2505531311035156, "learning_rate": 3.374363251809706e-06, "loss": 0.2263, "step": 337575 }, { "epoch": 3.32, "grad_norm": 7.297093868255615, "learning_rate": 3.374239129355457e-06, "loss": 0.0741, "step": 337600 }, { "epoch": 3.32, "grad_norm": 5.474214553833008, "learning_rate": 3.3741150069012084e-06, "loss": 0.1618, "step": 337625 }, { "epoch": 3.32, "grad_norm": 12.984756469726562, "learning_rate": 3.3739908844469604e-06, "loss": 0.0747, "step": 337650 }, { "epoch": 3.32, "grad_norm": 4.096920967102051, "learning_rate": 3.3738667619927116e-06, "loss": 0.2001, "step": 337675 }, { "epoch": 3.32, "grad_norm": 10.528204917907715, "learning_rate": 3.3737426395384633e-06, "loss": 0.0842, "step": 337700 }, { "epoch": 3.32, "grad_norm": 4.891407012939453, "learning_rate": 3.373618517084215e-06, "loss": 0.2067, "step": 337725 }, { "epoch": 3.32, "grad_norm": 11.26734447479248, "learning_rate": 3.3734943946299665e-06, "loss": 0.0737, "step": 337750 }, { "epoch": 3.32, "grad_norm": 3.4764246940612793, "learning_rate": 3.3733702721757177e-06, "loss": 0.1741, "step": 337775 }, { "epoch": 3.32, "grad_norm": 6.604677677154541, "learning_rate": 3.37324614972147e-06, "loss": 0.0644, "step": 337800 }, { "epoch": 3.32, "grad_norm": 3.6526248455047607, "learning_rate": 3.373122027267221e-06, "loss": 0.1728, "step": 337825 }, { "epoch": 3.32, "grad_norm": 12.153202056884766, "learning_rate": 3.3729979048129722e-06, "loss": 0.0624, "step": 337850 }, { "epoch": 3.32, "grad_norm": 4.506582260131836, "learning_rate": 3.3728737823587243e-06, "loss": 0.1832, "step": 337875 }, { "epoch": 3.32, "grad_norm": 7.561613082885742, "learning_rate": 3.3727496599044755e-06, "loss": 0.0688, "step": 337900 }, { "epoch": 3.32, "grad_norm": 4.145701885223389, "learning_rate": 3.372625537450227e-06, "loss": 0.1576, "step": 337925 }, { "epoch": 3.32, "grad_norm": 13.288676261901855, "learning_rate": 3.3725014149959788e-06, "loss": 0.0635, "step": 337950 }, { "epoch": 3.32, "grad_norm": 3.191549062728882, "learning_rate": 3.3723772925417304e-06, "loss": 0.1771, "step": 337975 }, { "epoch": 3.32, "grad_norm": 9.120455741882324, "learning_rate": 3.3722531700874816e-06, "loss": 0.0711, "step": 338000 }, { "epoch": 3.32, "grad_norm": 4.832174301147461, "learning_rate": 3.3721290476332337e-06, "loss": 0.1526, "step": 338025 }, { "epoch": 3.32, "grad_norm": 5.750442028045654, "learning_rate": 3.372004925178985e-06, "loss": 0.0675, "step": 338050 }, { "epoch": 3.32, "grad_norm": 7.2752203941345215, "learning_rate": 3.371880802724736e-06, "loss": 0.1878, "step": 338075 }, { "epoch": 3.32, "grad_norm": 5.712705135345459, "learning_rate": 3.371756680270488e-06, "loss": 0.0818, "step": 338100 }, { "epoch": 3.32, "grad_norm": 3.4396398067474365, "learning_rate": 3.3716325578162394e-06, "loss": 0.1848, "step": 338125 }, { "epoch": 3.32, "grad_norm": 13.974980354309082, "learning_rate": 3.371508435361991e-06, "loss": 0.0818, "step": 338150 }, { "epoch": 3.32, "grad_norm": 6.076005935668945, "learning_rate": 3.371384312907742e-06, "loss": 0.2069, "step": 338175 }, { "epoch": 3.33, "grad_norm": 12.368499755859375, "learning_rate": 3.3712601904534943e-06, "loss": 0.0799, "step": 338200 }, { "epoch": 3.33, "grad_norm": 6.612626552581787, "learning_rate": 3.3711360679992455e-06, "loss": 0.2127, "step": 338225 }, { "epoch": 3.33, "grad_norm": 6.1010260581970215, "learning_rate": 3.3710119455449967e-06, "loss": 0.0749, "step": 338250 }, { "epoch": 3.33, "grad_norm": 5.291478157043457, "learning_rate": 3.3708878230907487e-06, "loss": 0.1619, "step": 338275 }, { "epoch": 3.33, "grad_norm": 11.022356033325195, "learning_rate": 3.3707637006365e-06, "loss": 0.0894, "step": 338300 }, { "epoch": 3.33, "grad_norm": 3.560532331466675, "learning_rate": 3.3706395781822516e-06, "loss": 0.1608, "step": 338325 }, { "epoch": 3.33, "grad_norm": 12.4937162399292, "learning_rate": 3.3705154557280032e-06, "loss": 0.0562, "step": 338350 }, { "epoch": 3.33, "grad_norm": 3.34029221534729, "learning_rate": 3.370391333273755e-06, "loss": 0.1549, "step": 338375 }, { "epoch": 3.33, "grad_norm": 6.1164069175720215, "learning_rate": 3.370267210819506e-06, "loss": 0.0847, "step": 338400 }, { "epoch": 3.33, "grad_norm": 5.704058647155762, "learning_rate": 3.370143088365258e-06, "loss": 0.1679, "step": 338425 }, { "epoch": 3.33, "grad_norm": 8.470385551452637, "learning_rate": 3.3700189659110093e-06, "loss": 0.0598, "step": 338450 }, { "epoch": 3.33, "grad_norm": 5.192416667938232, "learning_rate": 3.3698948434567606e-06, "loss": 0.1495, "step": 338475 }, { "epoch": 3.33, "grad_norm": 6.957571506500244, "learning_rate": 3.3697707210025126e-06, "loss": 0.0803, "step": 338500 }, { "epoch": 3.33, "grad_norm": 4.144990921020508, "learning_rate": 3.3696515634464336e-06, "loss": 0.1853, "step": 338525 }, { "epoch": 3.33, "grad_norm": 10.12633228302002, "learning_rate": 3.3695274409921857e-06, "loss": 0.0787, "step": 338550 }, { "epoch": 3.33, "grad_norm": 4.0560197830200195, "learning_rate": 3.369403318537937e-06, "loss": 0.1646, "step": 338575 }, { "epoch": 3.33, "grad_norm": 12.938091278076172, "learning_rate": 3.3692791960836885e-06, "loss": 0.079, "step": 338600 }, { "epoch": 3.33, "grad_norm": 5.101025581359863, "learning_rate": 3.36915507362944e-06, "loss": 0.1586, "step": 338625 }, { "epoch": 3.33, "grad_norm": 6.666966438293457, "learning_rate": 3.369030951175192e-06, "loss": 0.068, "step": 338650 }, { "epoch": 3.33, "grad_norm": 5.128100872039795, "learning_rate": 3.368906828720943e-06, "loss": 0.203, "step": 338675 }, { "epoch": 3.33, "grad_norm": 7.302223205566406, "learning_rate": 3.368782706266695e-06, "loss": 0.0718, "step": 338700 }, { "epoch": 3.33, "grad_norm": 3.7238500118255615, "learning_rate": 3.3686585838124463e-06, "loss": 0.1687, "step": 338725 }, { "epoch": 3.33, "grad_norm": 9.764643669128418, "learning_rate": 3.368534461358198e-06, "loss": 0.0555, "step": 338750 }, { "epoch": 3.33, "grad_norm": 3.56238055229187, "learning_rate": 3.368410338903949e-06, "loss": 0.216, "step": 338775 }, { "epoch": 3.33, "grad_norm": 2.931966781616211, "learning_rate": 3.368286216449701e-06, "loss": 0.0831, "step": 338800 }, { "epoch": 3.33, "grad_norm": 4.277218818664551, "learning_rate": 3.3681620939954524e-06, "loss": 0.1471, "step": 338825 }, { "epoch": 3.33, "grad_norm": 5.968966484069824, "learning_rate": 3.3680379715412036e-06, "loss": 0.0931, "step": 338850 }, { "epoch": 3.33, "grad_norm": 2.1151816844940186, "learning_rate": 3.3679138490869557e-06, "loss": 0.1779, "step": 338875 }, { "epoch": 3.33, "grad_norm": 6.441847801208496, "learning_rate": 3.367789726632707e-06, "loss": 0.0646, "step": 338900 }, { "epoch": 3.33, "grad_norm": 4.079080104827881, "learning_rate": 3.3676656041784585e-06, "loss": 0.2281, "step": 338925 }, { "epoch": 3.33, "grad_norm": 8.430957794189453, "learning_rate": 3.36754148172421e-06, "loss": 0.0605, "step": 338950 }, { "epoch": 3.33, "grad_norm": 5.1998772621154785, "learning_rate": 3.367417359269962e-06, "loss": 0.1643, "step": 338975 }, { "epoch": 3.33, "grad_norm": 4.598179340362549, "learning_rate": 3.367293236815713e-06, "loss": 0.062, "step": 339000 }, { "epoch": 3.33, "grad_norm": 4.016729354858398, "learning_rate": 3.367169114361465e-06, "loss": 0.1577, "step": 339025 }, { "epoch": 3.33, "grad_norm": 7.60959529876709, "learning_rate": 3.3670449919072163e-06, "loss": 0.0545, "step": 339050 }, { "epoch": 3.33, "grad_norm": 5.213680267333984, "learning_rate": 3.3669208694529675e-06, "loss": 0.1992, "step": 339075 }, { "epoch": 3.33, "grad_norm": 5.828026294708252, "learning_rate": 3.3667967469987196e-06, "loss": 0.0518, "step": 339100 }, { "epoch": 3.33, "grad_norm": 3.4267237186431885, "learning_rate": 3.3666726245444708e-06, "loss": 0.165, "step": 339125 }, { "epoch": 3.33, "grad_norm": 12.328907012939453, "learning_rate": 3.3665485020902224e-06, "loss": 0.1002, "step": 339150 }, { "epoch": 3.33, "grad_norm": 4.668949604034424, "learning_rate": 3.366424379635974e-06, "loss": 0.2092, "step": 339175 }, { "epoch": 3.34, "grad_norm": 8.584037780761719, "learning_rate": 3.3663002571817257e-06, "loss": 0.0705, "step": 339200 }, { "epoch": 3.34, "grad_norm": 5.008962631225586, "learning_rate": 3.366176134727477e-06, "loss": 0.1547, "step": 339225 }, { "epoch": 3.34, "grad_norm": 6.2552714347839355, "learning_rate": 3.366052012273229e-06, "loss": 0.0737, "step": 339250 }, { "epoch": 3.34, "grad_norm": 4.522035121917725, "learning_rate": 3.36592788981898e-06, "loss": 0.1847, "step": 339275 }, { "epoch": 3.34, "grad_norm": 13.720488548278809, "learning_rate": 3.3658037673647314e-06, "loss": 0.1009, "step": 339300 }, { "epoch": 3.34, "grad_norm": 4.724287986755371, "learning_rate": 3.3656796449104834e-06, "loss": 0.185, "step": 339325 }, { "epoch": 3.34, "grad_norm": 6.786708354949951, "learning_rate": 3.3655555224562346e-06, "loss": 0.0629, "step": 339350 }, { "epoch": 3.34, "grad_norm": 4.525421142578125, "learning_rate": 3.3654314000019863e-06, "loss": 0.2051, "step": 339375 }, { "epoch": 3.34, "grad_norm": 5.807633876800537, "learning_rate": 3.365307277547738e-06, "loss": 0.0822, "step": 339400 }, { "epoch": 3.34, "grad_norm": 4.434759616851807, "learning_rate": 3.3651831550934895e-06, "loss": 0.1636, "step": 339425 }, { "epoch": 3.34, "grad_norm": 8.93583869934082, "learning_rate": 3.3650590326392408e-06, "loss": 0.078, "step": 339450 }, { "epoch": 3.34, "grad_norm": 4.886803150177002, "learning_rate": 3.364934910184993e-06, "loss": 0.1418, "step": 339475 }, { "epoch": 3.34, "grad_norm": 9.964566230773926, "learning_rate": 3.364810787730744e-06, "loss": 0.0664, "step": 339500 }, { "epoch": 3.34, "grad_norm": 2.9103341102600098, "learning_rate": 3.3646866652764952e-06, "loss": 0.2072, "step": 339525 }, { "epoch": 3.34, "grad_norm": 10.560892105102539, "learning_rate": 3.3645625428222473e-06, "loss": 0.0972, "step": 339550 }, { "epoch": 3.34, "grad_norm": 3.975806951522827, "learning_rate": 3.3644384203679985e-06, "loss": 0.2046, "step": 339575 }, { "epoch": 3.34, "grad_norm": 10.217962265014648, "learning_rate": 3.36431429791375e-06, "loss": 0.0919, "step": 339600 }, { "epoch": 3.34, "grad_norm": 5.903943061828613, "learning_rate": 3.3641901754595013e-06, "loss": 0.1882, "step": 339625 }, { "epoch": 3.34, "grad_norm": 3.563845634460449, "learning_rate": 3.3640660530052534e-06, "loss": 0.0836, "step": 339650 }, { "epoch": 3.34, "grad_norm": 7.486373424530029, "learning_rate": 3.3639419305510046e-06, "loss": 0.2171, "step": 339675 }, { "epoch": 3.34, "grad_norm": 14.868484497070312, "learning_rate": 3.363817808096756e-06, "loss": 0.0643, "step": 339700 }, { "epoch": 3.34, "grad_norm": 5.636387348175049, "learning_rate": 3.363693685642508e-06, "loss": 0.1853, "step": 339725 }, { "epoch": 3.34, "grad_norm": 7.22939395904541, "learning_rate": 3.363569563188259e-06, "loss": 0.0782, "step": 339750 }, { "epoch": 3.34, "grad_norm": 5.466374397277832, "learning_rate": 3.3634454407340107e-06, "loss": 0.1783, "step": 339775 }, { "epoch": 3.34, "grad_norm": 10.720259666442871, "learning_rate": 3.3633213182797624e-06, "loss": 0.0807, "step": 339800 }, { "epoch": 3.34, "grad_norm": 5.374087810516357, "learning_rate": 3.363197195825514e-06, "loss": 0.1904, "step": 339825 }, { "epoch": 3.34, "grad_norm": 9.911002159118652, "learning_rate": 3.3630730733712652e-06, "loss": 0.0714, "step": 339850 }, { "epoch": 3.34, "grad_norm": 4.720535755157471, "learning_rate": 3.3629489509170173e-06, "loss": 0.1821, "step": 339875 }, { "epoch": 3.34, "grad_norm": 8.938779830932617, "learning_rate": 3.3628248284627685e-06, "loss": 0.0574, "step": 339900 }, { "epoch": 3.34, "grad_norm": 8.377283096313477, "learning_rate": 3.3627007060085197e-06, "loss": 0.1856, "step": 339925 }, { "epoch": 3.34, "grad_norm": 7.679731845855713, "learning_rate": 3.3625765835542718e-06, "loss": 0.0686, "step": 339950 }, { "epoch": 3.34, "grad_norm": 4.458683967590332, "learning_rate": 3.362452461100023e-06, "loss": 0.1602, "step": 339975 }, { "epoch": 3.34, "grad_norm": 5.2707085609436035, "learning_rate": 3.3623283386457746e-06, "loss": 0.0667, "step": 340000 }, { "epoch": 3.34, "eval_loss": 0.6330240368843079, "eval_runtime": 6144.5504, "eval_samples_per_second": 1.541, "eval_steps_per_second": 0.193, "eval_wer": 0.12108092448335801, "step": 340000 }, { "epoch": 3.34, "grad_norm": 5.6743292808532715, "learning_rate": 3.3622042161915262e-06, "loss": 0.1934, "step": 340025 }, { "epoch": 3.34, "grad_norm": 6.3823628425598145, "learning_rate": 3.362080093737278e-06, "loss": 0.0782, "step": 340050 }, { "epoch": 3.34, "grad_norm": 5.283663272857666, "learning_rate": 3.361955971283029e-06, "loss": 0.1941, "step": 340075 }, { "epoch": 3.34, "grad_norm": 7.461223125457764, "learning_rate": 3.361831848828781e-06, "loss": 0.0558, "step": 340100 }, { "epoch": 3.34, "grad_norm": 4.52401876449585, "learning_rate": 3.3617077263745324e-06, "loss": 0.1612, "step": 340125 }, { "epoch": 3.34, "grad_norm": 6.924449443817139, "learning_rate": 3.3615836039202836e-06, "loss": 0.0629, "step": 340150 }, { "epoch": 3.34, "grad_norm": 6.902011394500732, "learning_rate": 3.3614594814660356e-06, "loss": 0.1772, "step": 340175 }, { "epoch": 3.34, "grad_norm": 9.593001365661621, "learning_rate": 3.361335359011787e-06, "loss": 0.089, "step": 340200 }, { "epoch": 3.35, "grad_norm": 5.063266754150391, "learning_rate": 3.3612112365575385e-06, "loss": 0.1597, "step": 340225 }, { "epoch": 3.35, "grad_norm": 5.196486473083496, "learning_rate": 3.36108711410329e-06, "loss": 0.0668, "step": 340250 }, { "epoch": 3.35, "grad_norm": 6.231166839599609, "learning_rate": 3.3609629916490417e-06, "loss": 0.2356, "step": 340275 }, { "epoch": 3.35, "grad_norm": 8.249950408935547, "learning_rate": 3.360838869194793e-06, "loss": 0.0804, "step": 340300 }, { "epoch": 3.35, "grad_norm": 6.483425140380859, "learning_rate": 3.360714746740545e-06, "loss": 0.1707, "step": 340325 }, { "epoch": 3.35, "grad_norm": 3.5874435901641846, "learning_rate": 3.3605906242862962e-06, "loss": 0.07, "step": 340350 }, { "epoch": 3.35, "grad_norm": 6.438587665557861, "learning_rate": 3.3604665018320474e-06, "loss": 0.197, "step": 340375 }, { "epoch": 3.35, "grad_norm": 5.323225498199463, "learning_rate": 3.3603423793777995e-06, "loss": 0.0787, "step": 340400 }, { "epoch": 3.35, "grad_norm": 4.731539726257324, "learning_rate": 3.3602182569235507e-06, "loss": 0.1783, "step": 340425 }, { "epoch": 3.35, "grad_norm": 5.946048259735107, "learning_rate": 3.3600941344693023e-06, "loss": 0.0571, "step": 340450 }, { "epoch": 3.35, "grad_norm": 3.9069983959198, "learning_rate": 3.3599700120150535e-06, "loss": 0.2313, "step": 340475 }, { "epoch": 3.35, "grad_norm": 14.70448112487793, "learning_rate": 3.3598458895608056e-06, "loss": 0.0679, "step": 340500 }, { "epoch": 3.35, "grad_norm": 6.950829029083252, "learning_rate": 3.359721767106557e-06, "loss": 0.1997, "step": 340525 }, { "epoch": 3.35, "grad_norm": 8.742354393005371, "learning_rate": 3.359597644652308e-06, "loss": 0.0829, "step": 340550 }, { "epoch": 3.35, "grad_norm": 4.306331157684326, "learning_rate": 3.35947352219806e-06, "loss": 0.1842, "step": 340575 }, { "epoch": 3.35, "grad_norm": 7.6716814041137695, "learning_rate": 3.3593493997438113e-06, "loss": 0.0692, "step": 340600 }, { "epoch": 3.35, "grad_norm": 4.2584381103515625, "learning_rate": 3.359225277289563e-06, "loss": 0.1612, "step": 340625 }, { "epoch": 3.35, "grad_norm": 8.391695022583008, "learning_rate": 3.3591011548353146e-06, "loss": 0.0681, "step": 340650 }, { "epoch": 3.35, "grad_norm": 4.690446376800537, "learning_rate": 3.358977032381066e-06, "loss": 0.1717, "step": 340675 }, { "epoch": 3.35, "grad_norm": 10.065502166748047, "learning_rate": 3.3588529099268174e-06, "loss": 0.0559, "step": 340700 }, { "epoch": 3.35, "grad_norm": 5.153003692626953, "learning_rate": 3.3587287874725695e-06, "loss": 0.1863, "step": 340725 }, { "epoch": 3.35, "grad_norm": 7.021056175231934, "learning_rate": 3.3586046650183207e-06, "loss": 0.0691, "step": 340750 }, { "epoch": 3.35, "grad_norm": 4.4968037605285645, "learning_rate": 3.358480542564072e-06, "loss": 0.1872, "step": 340775 }, { "epoch": 3.35, "grad_norm": 2.2163548469543457, "learning_rate": 3.358356420109824e-06, "loss": 0.0558, "step": 340800 }, { "epoch": 3.35, "grad_norm": 4.251528263092041, "learning_rate": 3.358232297655575e-06, "loss": 0.153, "step": 340825 }, { "epoch": 3.35, "grad_norm": 3.6110644340515137, "learning_rate": 3.358108175201327e-06, "loss": 0.0584, "step": 340850 }, { "epoch": 3.35, "grad_norm": 5.434885025024414, "learning_rate": 3.3579840527470784e-06, "loss": 0.164, "step": 340875 }, { "epoch": 3.35, "grad_norm": 6.686816692352295, "learning_rate": 3.35785993029283e-06, "loss": 0.0476, "step": 340900 }, { "epoch": 3.35, "grad_norm": 6.161413192749023, "learning_rate": 3.3577358078385813e-06, "loss": 0.2025, "step": 340925 }, { "epoch": 3.35, "grad_norm": 5.237517356872559, "learning_rate": 3.3576116853843333e-06, "loss": 0.0776, "step": 340950 }, { "epoch": 3.35, "grad_norm": 7.63665246963501, "learning_rate": 3.3574875629300846e-06, "loss": 0.2155, "step": 340975 }, { "epoch": 3.35, "grad_norm": 13.557106018066406, "learning_rate": 3.3573634404758358e-06, "loss": 0.0633, "step": 341000 }, { "epoch": 3.35, "grad_norm": 5.337932586669922, "learning_rate": 3.357239318021588e-06, "loss": 0.174, "step": 341025 }, { "epoch": 3.35, "grad_norm": 10.844948768615723, "learning_rate": 3.357115195567339e-06, "loss": 0.0867, "step": 341050 }, { "epoch": 3.35, "grad_norm": 4.632619380950928, "learning_rate": 3.3569910731130907e-06, "loss": 0.2048, "step": 341075 }, { "epoch": 3.35, "grad_norm": 7.9608893394470215, "learning_rate": 3.3568669506588423e-06, "loss": 0.0906, "step": 341100 }, { "epoch": 3.35, "grad_norm": 5.13591194152832, "learning_rate": 3.356742828204594e-06, "loss": 0.148, "step": 341125 }, { "epoch": 3.35, "grad_norm": 5.665168285369873, "learning_rate": 3.356618705750345e-06, "loss": 0.0882, "step": 341150 }, { "epoch": 3.35, "grad_norm": 3.538424015045166, "learning_rate": 3.356494583296097e-06, "loss": 0.1897, "step": 341175 }, { "epoch": 3.35, "grad_norm": 12.13551139831543, "learning_rate": 3.3563704608418484e-06, "loss": 0.0998, "step": 341200 }, { "epoch": 3.35, "grad_norm": 7.808832168579102, "learning_rate": 3.3562463383875996e-06, "loss": 0.1598, "step": 341225 }, { "epoch": 3.36, "grad_norm": 9.40817928314209, "learning_rate": 3.3561222159333517e-06, "loss": 0.0948, "step": 341250 }, { "epoch": 3.36, "grad_norm": 7.58582067489624, "learning_rate": 3.355998093479103e-06, "loss": 0.1645, "step": 341275 }, { "epoch": 3.36, "grad_norm": 11.802912712097168, "learning_rate": 3.3558739710248545e-06, "loss": 0.0923, "step": 341300 }, { "epoch": 3.36, "grad_norm": 4.021930694580078, "learning_rate": 3.3557498485706058e-06, "loss": 0.2075, "step": 341325 }, { "epoch": 3.36, "grad_norm": 4.970388889312744, "learning_rate": 3.355625726116358e-06, "loss": 0.0709, "step": 341350 }, { "epoch": 3.36, "grad_norm": 4.600016117095947, "learning_rate": 3.355501603662109e-06, "loss": 0.2038, "step": 341375 }, { "epoch": 3.36, "grad_norm": 14.064419746398926, "learning_rate": 3.3553774812078602e-06, "loss": 0.0799, "step": 341400 }, { "epoch": 3.36, "grad_norm": 5.341699600219727, "learning_rate": 3.3552533587536123e-06, "loss": 0.1897, "step": 341425 }, { "epoch": 3.36, "grad_norm": 11.52564525604248, "learning_rate": 3.3551292362993635e-06, "loss": 0.0713, "step": 341450 }, { "epoch": 3.36, "grad_norm": 4.103871822357178, "learning_rate": 3.355005113845115e-06, "loss": 0.1846, "step": 341475 }, { "epoch": 3.36, "grad_norm": 14.03317642211914, "learning_rate": 3.3548809913908668e-06, "loss": 0.0621, "step": 341500 }, { "epoch": 3.36, "grad_norm": 3.4970505237579346, "learning_rate": 3.3547568689366184e-06, "loss": 0.1845, "step": 341525 }, { "epoch": 3.36, "grad_norm": 7.829535007476807, "learning_rate": 3.3546327464823696e-06, "loss": 0.0368, "step": 341550 }, { "epoch": 3.36, "grad_norm": 4.5888590812683105, "learning_rate": 3.3545086240281217e-06, "loss": 0.1668, "step": 341575 }, { "epoch": 3.36, "grad_norm": 9.96627140045166, "learning_rate": 3.354384501573873e-06, "loss": 0.0905, "step": 341600 }, { "epoch": 3.36, "grad_norm": 4.041897773742676, "learning_rate": 3.3542603791196245e-06, "loss": 0.1682, "step": 341625 }, { "epoch": 3.36, "grad_norm": 10.397100448608398, "learning_rate": 3.354136256665376e-06, "loss": 0.0806, "step": 341650 }, { "epoch": 3.36, "grad_norm": 5.920467853546143, "learning_rate": 3.354012134211128e-06, "loss": 0.16, "step": 341675 }, { "epoch": 3.36, "grad_norm": 7.292354583740234, "learning_rate": 3.353888011756879e-06, "loss": 0.0786, "step": 341700 }, { "epoch": 3.36, "grad_norm": 4.579442024230957, "learning_rate": 3.353763889302631e-06, "loss": 0.1997, "step": 341725 }, { "epoch": 3.36, "grad_norm": 14.588831901550293, "learning_rate": 3.3536397668483823e-06, "loss": 0.0652, "step": 341750 }, { "epoch": 3.36, "grad_norm": 3.2439541816711426, "learning_rate": 3.353520609292304e-06, "loss": 0.219, "step": 341775 }, { "epoch": 3.36, "grad_norm": 7.631211280822754, "learning_rate": 3.3533964868380554e-06, "loss": 0.0659, "step": 341800 }, { "epoch": 3.36, "grad_norm": 4.984212398529053, "learning_rate": 3.3532723643838066e-06, "loss": 0.168, "step": 341825 }, { "epoch": 3.36, "grad_norm": 13.354260444641113, "learning_rate": 3.3531482419295586e-06, "loss": 0.0929, "step": 341850 }, { "epoch": 3.36, "grad_norm": 3.398966073989868, "learning_rate": 3.35302411947531e-06, "loss": 0.2014, "step": 341875 }, { "epoch": 3.36, "grad_norm": 3.6521387100219727, "learning_rate": 3.3528999970210615e-06, "loss": 0.0584, "step": 341900 }, { "epoch": 3.36, "grad_norm": 5.500734329223633, "learning_rate": 3.3527758745668127e-06, "loss": 0.1546, "step": 341925 }, { "epoch": 3.36, "grad_norm": 11.646583557128906, "learning_rate": 3.3526517521125647e-06, "loss": 0.0648, "step": 341950 }, { "epoch": 3.36, "grad_norm": 6.473566055297852, "learning_rate": 3.352527629658316e-06, "loss": 0.1742, "step": 341975 }, { "epoch": 3.36, "grad_norm": 10.644266128540039, "learning_rate": 3.352403507204067e-06, "loss": 0.0713, "step": 342000 }, { "epoch": 3.36, "grad_norm": 5.5225749015808105, "learning_rate": 3.3522793847498192e-06, "loss": 0.1717, "step": 342025 }, { "epoch": 3.36, "grad_norm": 21.14946746826172, "learning_rate": 3.3521552622955704e-06, "loss": 0.0794, "step": 342050 }, { "epoch": 3.36, "grad_norm": 8.996843338012695, "learning_rate": 3.352031139841322e-06, "loss": 0.2055, "step": 342075 }, { "epoch": 3.36, "grad_norm": 1.225081443786621, "learning_rate": 3.3519070173870737e-06, "loss": 0.0661, "step": 342100 }, { "epoch": 3.36, "grad_norm": 5.107405662536621, "learning_rate": 3.3517828949328253e-06, "loss": 0.1806, "step": 342125 }, { "epoch": 3.36, "grad_norm": 10.62401294708252, "learning_rate": 3.3516587724785766e-06, "loss": 0.1008, "step": 342150 }, { "epoch": 3.36, "grad_norm": 4.599379539489746, "learning_rate": 3.3515346500243286e-06, "loss": 0.1686, "step": 342175 }, { "epoch": 3.36, "grad_norm": 6.779048919677734, "learning_rate": 3.35141052757008e-06, "loss": 0.0603, "step": 342200 }, { "epoch": 3.36, "grad_norm": 3.4039061069488525, "learning_rate": 3.351286405115831e-06, "loss": 0.1339, "step": 342225 }, { "epoch": 3.37, "grad_norm": 9.438420295715332, "learning_rate": 3.351162282661583e-06, "loss": 0.0687, "step": 342250 }, { "epoch": 3.37, "grad_norm": 5.590184211730957, "learning_rate": 3.3510381602073343e-06, "loss": 0.202, "step": 342275 }, { "epoch": 3.37, "grad_norm": 11.816486358642578, "learning_rate": 3.350914037753086e-06, "loss": 0.0635, "step": 342300 }, { "epoch": 3.37, "grad_norm": 4.365297794342041, "learning_rate": 3.3507899152988376e-06, "loss": 0.1859, "step": 342325 }, { "epoch": 3.37, "grad_norm": 16.187803268432617, "learning_rate": 3.3506657928445892e-06, "loss": 0.0919, "step": 342350 }, { "epoch": 3.37, "grad_norm": 4.386391639709473, "learning_rate": 3.3505416703903404e-06, "loss": 0.1803, "step": 342375 }, { "epoch": 3.37, "grad_norm": 8.95105266571045, "learning_rate": 3.3504175479360925e-06, "loss": 0.0868, "step": 342400 }, { "epoch": 3.37, "grad_norm": 7.316788196563721, "learning_rate": 3.3502934254818437e-06, "loss": 0.2332, "step": 342425 }, { "epoch": 3.37, "grad_norm": 11.015458106994629, "learning_rate": 3.350169303027595e-06, "loss": 0.0801, "step": 342450 }, { "epoch": 3.37, "grad_norm": 3.8354129791259766, "learning_rate": 3.350045180573347e-06, "loss": 0.169, "step": 342475 }, { "epoch": 3.37, "grad_norm": 22.049150466918945, "learning_rate": 3.349921058119098e-06, "loss": 0.0745, "step": 342500 }, { "epoch": 3.37, "grad_norm": 3.3624017238616943, "learning_rate": 3.34979693566485e-06, "loss": 0.1916, "step": 342525 }, { "epoch": 3.37, "grad_norm": 9.789358139038086, "learning_rate": 3.3496728132106014e-06, "loss": 0.0607, "step": 342550 }, { "epoch": 3.37, "grad_norm": 5.175902843475342, "learning_rate": 3.349548690756353e-06, "loss": 0.1651, "step": 342575 }, { "epoch": 3.37, "grad_norm": 8.53073787689209, "learning_rate": 3.3494245683021043e-06, "loss": 0.0773, "step": 342600 }, { "epoch": 3.37, "grad_norm": 6.186384677886963, "learning_rate": 3.3493004458478563e-06, "loss": 0.1911, "step": 342625 }, { "epoch": 3.37, "grad_norm": 4.319038391113281, "learning_rate": 3.3491763233936076e-06, "loss": 0.0724, "step": 342650 }, { "epoch": 3.37, "grad_norm": 2.6806483268737793, "learning_rate": 3.3490522009393588e-06, "loss": 0.1737, "step": 342675 }, { "epoch": 3.37, "grad_norm": 8.031222343444824, "learning_rate": 3.348928078485111e-06, "loss": 0.0632, "step": 342700 }, { "epoch": 3.37, "grad_norm": 15.357575416564941, "learning_rate": 3.348803956030862e-06, "loss": 0.207, "step": 342725 }, { "epoch": 3.37, "grad_norm": 4.744668006896973, "learning_rate": 3.3486798335766137e-06, "loss": 0.065, "step": 342750 }, { "epoch": 3.37, "grad_norm": 3.7815628051757812, "learning_rate": 3.348555711122365e-06, "loss": 0.2064, "step": 342775 }, { "epoch": 3.37, "grad_norm": 9.070907592773438, "learning_rate": 3.348431588668117e-06, "loss": 0.0902, "step": 342800 }, { "epoch": 3.37, "grad_norm": 5.460723876953125, "learning_rate": 3.348307466213868e-06, "loss": 0.1787, "step": 342825 }, { "epoch": 3.37, "grad_norm": 6.978508472442627, "learning_rate": 3.3481833437596194e-06, "loss": 0.0527, "step": 342850 }, { "epoch": 3.37, "grad_norm": 5.048795223236084, "learning_rate": 3.3480592213053714e-06, "loss": 0.1747, "step": 342875 }, { "epoch": 3.37, "grad_norm": 3.820314645767212, "learning_rate": 3.3479350988511226e-06, "loss": 0.0833, "step": 342900 }, { "epoch": 3.37, "grad_norm": 4.026559352874756, "learning_rate": 3.3478109763968743e-06, "loss": 0.214, "step": 342925 }, { "epoch": 3.37, "grad_norm": 9.519174575805664, "learning_rate": 3.347686853942626e-06, "loss": 0.0517, "step": 342950 }, { "epoch": 3.37, "grad_norm": 7.470043659210205, "learning_rate": 3.3475627314883775e-06, "loss": 0.168, "step": 342975 }, { "epoch": 3.37, "grad_norm": 13.044426918029785, "learning_rate": 3.3474386090341288e-06, "loss": 0.0721, "step": 343000 }, { "epoch": 3.37, "grad_norm": 4.681022644042969, "learning_rate": 3.347314486579881e-06, "loss": 0.1894, "step": 343025 }, { "epoch": 3.37, "grad_norm": 3.6085424423217773, "learning_rate": 3.347190364125632e-06, "loss": 0.061, "step": 343050 }, { "epoch": 3.37, "grad_norm": 4.304724216461182, "learning_rate": 3.3470662416713832e-06, "loss": 0.1665, "step": 343075 }, { "epoch": 3.37, "grad_norm": 3.7589080333709717, "learning_rate": 3.3469421192171353e-06, "loss": 0.0612, "step": 343100 }, { "epoch": 3.37, "grad_norm": 4.116124629974365, "learning_rate": 3.3468179967628865e-06, "loss": 0.2094, "step": 343125 }, { "epoch": 3.37, "grad_norm": 5.880206108093262, "learning_rate": 3.346693874308638e-06, "loss": 0.0799, "step": 343150 }, { "epoch": 3.37, "grad_norm": 3.6630492210388184, "learning_rate": 3.3465697518543898e-06, "loss": 0.1761, "step": 343175 }, { "epoch": 3.37, "grad_norm": 18.06920623779297, "learning_rate": 3.3464456294001414e-06, "loss": 0.0673, "step": 343200 }, { "epoch": 3.37, "grad_norm": 9.124463081359863, "learning_rate": 3.3463215069458926e-06, "loss": 0.2005, "step": 343225 }, { "epoch": 3.37, "grad_norm": 9.75468921661377, "learning_rate": 3.3461973844916447e-06, "loss": 0.0644, "step": 343250 }, { "epoch": 3.38, "grad_norm": 4.841627597808838, "learning_rate": 3.346073262037396e-06, "loss": 0.1945, "step": 343275 }, { "epoch": 3.38, "grad_norm": 12.731024742126465, "learning_rate": 3.345949139583147e-06, "loss": 0.0623, "step": 343300 }, { "epoch": 3.38, "grad_norm": 5.238053798675537, "learning_rate": 3.345825017128899e-06, "loss": 0.1742, "step": 343325 }, { "epoch": 3.38, "grad_norm": 13.103837966918945, "learning_rate": 3.3457008946746504e-06, "loss": 0.0845, "step": 343350 }, { "epoch": 3.38, "grad_norm": 4.962043762207031, "learning_rate": 3.345576772220402e-06, "loss": 0.1733, "step": 343375 }, { "epoch": 3.38, "grad_norm": 6.506507396697998, "learning_rate": 3.3454526497661536e-06, "loss": 0.0631, "step": 343400 }, { "epoch": 3.38, "grad_norm": 4.070659160614014, "learning_rate": 3.3453285273119053e-06, "loss": 0.1946, "step": 343425 }, { "epoch": 3.38, "grad_norm": 4.82274055480957, "learning_rate": 3.3452044048576565e-06, "loss": 0.0798, "step": 343450 }, { "epoch": 3.38, "grad_norm": 3.2129030227661133, "learning_rate": 3.3450802824034086e-06, "loss": 0.2104, "step": 343475 }, { "epoch": 3.38, "grad_norm": 0.7004774808883667, "learning_rate": 3.3449561599491598e-06, "loss": 0.0691, "step": 343500 }, { "epoch": 3.38, "grad_norm": 5.110654354095459, "learning_rate": 3.344832037494911e-06, "loss": 0.1883, "step": 343525 }, { "epoch": 3.38, "grad_norm": 12.121248245239258, "learning_rate": 3.344707915040663e-06, "loss": 0.0801, "step": 343550 }, { "epoch": 3.38, "grad_norm": 4.973170280456543, "learning_rate": 3.3445837925864142e-06, "loss": 0.1633, "step": 343575 }, { "epoch": 3.38, "grad_norm": 10.870159149169922, "learning_rate": 3.344459670132166e-06, "loss": 0.0596, "step": 343600 }, { "epoch": 3.38, "grad_norm": 4.102426528930664, "learning_rate": 3.344335547677917e-06, "loss": 0.1798, "step": 343625 }, { "epoch": 3.38, "grad_norm": 7.073559761047363, "learning_rate": 3.344211425223669e-06, "loss": 0.0589, "step": 343650 }, { "epoch": 3.38, "grad_norm": 4.83234167098999, "learning_rate": 3.3440873027694204e-06, "loss": 0.2124, "step": 343675 }, { "epoch": 3.38, "grad_norm": 10.007615089416504, "learning_rate": 3.3439631803151716e-06, "loss": 0.0731, "step": 343700 }, { "epoch": 3.38, "grad_norm": 3.7012202739715576, "learning_rate": 3.3438390578609236e-06, "loss": 0.1958, "step": 343725 }, { "epoch": 3.38, "grad_norm": 6.4947285652160645, "learning_rate": 3.343714935406675e-06, "loss": 0.0785, "step": 343750 }, { "epoch": 3.38, "grad_norm": 3.7048392295837402, "learning_rate": 3.3435908129524265e-06, "loss": 0.1846, "step": 343775 }, { "epoch": 3.38, "grad_norm": 7.266770362854004, "learning_rate": 3.343466690498178e-06, "loss": 0.0716, "step": 343800 }, { "epoch": 3.38, "grad_norm": 5.1059722900390625, "learning_rate": 3.3433475329420996e-06, "loss": 0.1547, "step": 343825 }, { "epoch": 3.38, "grad_norm": 3.0844314098358154, "learning_rate": 3.343223410487851e-06, "loss": 0.0775, "step": 343850 }, { "epoch": 3.38, "grad_norm": 7.537475109100342, "learning_rate": 3.343099288033603e-06, "loss": 0.2219, "step": 343875 }, { "epoch": 3.38, "grad_norm": 1.809377670288086, "learning_rate": 3.342975165579354e-06, "loss": 0.048, "step": 343900 }, { "epoch": 3.38, "grad_norm": 6.665894508361816, "learning_rate": 3.342851043125106e-06, "loss": 0.2029, "step": 343925 }, { "epoch": 3.38, "grad_norm": 6.364456653594971, "learning_rate": 3.3427269206708573e-06, "loss": 0.0536, "step": 343950 }, { "epoch": 3.38, "grad_norm": 5.230493545532227, "learning_rate": 3.3426027982166085e-06, "loss": 0.1788, "step": 343975 }, { "epoch": 3.38, "grad_norm": 9.39238452911377, "learning_rate": 3.3424786757623606e-06, "loss": 0.0632, "step": 344000 }, { "epoch": 3.38, "grad_norm": 5.2545366287231445, "learning_rate": 3.342354553308112e-06, "loss": 0.2112, "step": 344025 }, { "epoch": 3.38, "grad_norm": 14.406881332397461, "learning_rate": 3.3422304308538634e-06, "loss": 0.0648, "step": 344050 }, { "epoch": 3.38, "grad_norm": 6.065993309020996, "learning_rate": 3.342106308399615e-06, "loss": 0.1777, "step": 344075 }, { "epoch": 3.38, "grad_norm": 11.506081581115723, "learning_rate": 3.3419821859453667e-06, "loss": 0.0851, "step": 344100 }, { "epoch": 3.38, "grad_norm": 5.405593395233154, "learning_rate": 3.341858063491118e-06, "loss": 0.1781, "step": 344125 }, { "epoch": 3.38, "grad_norm": 9.313551902770996, "learning_rate": 3.34173394103687e-06, "loss": 0.0779, "step": 344150 }, { "epoch": 3.38, "grad_norm": 4.256618022918701, "learning_rate": 3.341609818582621e-06, "loss": 0.1707, "step": 344175 }, { "epoch": 3.38, "grad_norm": 6.008944988250732, "learning_rate": 3.3414856961283724e-06, "loss": 0.0866, "step": 344200 }, { "epoch": 3.38, "grad_norm": 4.506153583526611, "learning_rate": 3.341361573674124e-06, "loss": 0.1646, "step": 344225 }, { "epoch": 3.38, "grad_norm": 3.347247362136841, "learning_rate": 3.3412374512198757e-06, "loss": 0.0635, "step": 344250 }, { "epoch": 3.38, "grad_norm": 6.413330554962158, "learning_rate": 3.3411133287656273e-06, "loss": 0.1339, "step": 344275 }, { "epoch": 3.39, "grad_norm": 10.218571662902832, "learning_rate": 3.3409892063113785e-06, "loss": 0.0667, "step": 344300 }, { "epoch": 3.39, "grad_norm": 5.358119964599609, "learning_rate": 3.3408650838571306e-06, "loss": 0.1945, "step": 344325 }, { "epoch": 3.39, "grad_norm": 10.465544700622559, "learning_rate": 3.3407409614028818e-06, "loss": 0.0989, "step": 344350 }, { "epoch": 3.39, "grad_norm": 5.012463092803955, "learning_rate": 3.340616838948633e-06, "loss": 0.1655, "step": 344375 }, { "epoch": 3.39, "grad_norm": 7.651758193969727, "learning_rate": 3.340492716494385e-06, "loss": 0.0498, "step": 344400 }, { "epoch": 3.39, "grad_norm": 6.455014705657959, "learning_rate": 3.3403685940401363e-06, "loss": 0.1918, "step": 344425 }, { "epoch": 3.39, "grad_norm": 2.082301616668701, "learning_rate": 3.340244471585888e-06, "loss": 0.0655, "step": 344450 }, { "epoch": 3.39, "grad_norm": 5.417227745056152, "learning_rate": 3.3401203491316395e-06, "loss": 0.2035, "step": 344475 }, { "epoch": 3.39, "grad_norm": 41.19582748413086, "learning_rate": 3.339996226677391e-06, "loss": 0.1052, "step": 344500 }, { "epoch": 3.39, "grad_norm": 4.420826435089111, "learning_rate": 3.3398721042231424e-06, "loss": 0.1748, "step": 344525 }, { "epoch": 3.39, "grad_norm": 9.565342903137207, "learning_rate": 3.3397479817688944e-06, "loss": 0.0731, "step": 344550 }, { "epoch": 3.39, "grad_norm": 6.519472599029541, "learning_rate": 3.3396238593146456e-06, "loss": 0.1502, "step": 344575 }, { "epoch": 3.39, "grad_norm": 6.083717346191406, "learning_rate": 3.3394997368603973e-06, "loss": 0.0771, "step": 344600 }, { "epoch": 3.39, "grad_norm": 4.159630298614502, "learning_rate": 3.339375614406149e-06, "loss": 0.1641, "step": 344625 }, { "epoch": 3.39, "grad_norm": 12.621164321899414, "learning_rate": 3.3392514919519006e-06, "loss": 0.0515, "step": 344650 }, { "epoch": 3.39, "grad_norm": 5.149418354034424, "learning_rate": 3.3391273694976518e-06, "loss": 0.1726, "step": 344675 }, { "epoch": 3.39, "grad_norm": 6.567816734313965, "learning_rate": 3.339003247043404e-06, "loss": 0.0718, "step": 344700 }, { "epoch": 3.39, "grad_norm": 3.569472312927246, "learning_rate": 3.338879124589155e-06, "loss": 0.1857, "step": 344725 }, { "epoch": 3.39, "grad_norm": 8.006359100341797, "learning_rate": 3.3387550021349062e-06, "loss": 0.0761, "step": 344750 }, { "epoch": 3.39, "grad_norm": 4.752532482147217, "learning_rate": 3.3386308796806583e-06, "loss": 0.1847, "step": 344775 }, { "epoch": 3.39, "grad_norm": 10.668195724487305, "learning_rate": 3.3385067572264095e-06, "loss": 0.075, "step": 344800 }, { "epoch": 3.39, "grad_norm": 4.506371974945068, "learning_rate": 3.338382634772161e-06, "loss": 0.2089, "step": 344825 }, { "epoch": 3.39, "grad_norm": 8.23017692565918, "learning_rate": 3.3382585123179128e-06, "loss": 0.0941, "step": 344850 }, { "epoch": 3.39, "grad_norm": 4.666482925415039, "learning_rate": 3.3381343898636644e-06, "loss": 0.1783, "step": 344875 }, { "epoch": 3.39, "grad_norm": 7.066542148590088, "learning_rate": 3.3380102674094156e-06, "loss": 0.0993, "step": 344900 }, { "epoch": 3.39, "grad_norm": 7.90833854675293, "learning_rate": 3.3378861449551677e-06, "loss": 0.1667, "step": 344925 }, { "epoch": 3.39, "grad_norm": 7.12700080871582, "learning_rate": 3.337762022500919e-06, "loss": 0.0723, "step": 344950 }, { "epoch": 3.39, "grad_norm": 4.369777679443359, "learning_rate": 3.33763790004667e-06, "loss": 0.2212, "step": 344975 }, { "epoch": 3.39, "grad_norm": 8.171384811401367, "learning_rate": 3.337513777592422e-06, "loss": 0.088, "step": 345000 }, { "epoch": 3.39, "grad_norm": 13.942225456237793, "learning_rate": 3.3373896551381734e-06, "loss": 0.1839, "step": 345025 }, { "epoch": 3.39, "grad_norm": 9.019211769104004, "learning_rate": 3.337265532683925e-06, "loss": 0.086, "step": 345050 }, { "epoch": 3.39, "grad_norm": 5.2151079177856445, "learning_rate": 3.3371414102296762e-06, "loss": 0.1633, "step": 345075 }, { "epoch": 3.39, "grad_norm": 3.8176732063293457, "learning_rate": 3.3370172877754283e-06, "loss": 0.081, "step": 345100 }, { "epoch": 3.39, "grad_norm": 5.26391077041626, "learning_rate": 3.3368931653211795e-06, "loss": 0.1682, "step": 345125 }, { "epoch": 3.39, "grad_norm": 10.176897048950195, "learning_rate": 3.3367690428669307e-06, "loss": 0.0695, "step": 345150 }, { "epoch": 3.39, "grad_norm": 4.195568084716797, "learning_rate": 3.3366449204126828e-06, "loss": 0.1821, "step": 345175 }, { "epoch": 3.39, "grad_norm": 5.752238750457764, "learning_rate": 3.336520797958434e-06, "loss": 0.0687, "step": 345200 }, { "epoch": 3.39, "grad_norm": 7.0934295654296875, "learning_rate": 3.3363966755041856e-06, "loss": 0.2, "step": 345225 }, { "epoch": 3.39, "grad_norm": 9.064373970031738, "learning_rate": 3.3362725530499373e-06, "loss": 0.0857, "step": 345250 }, { "epoch": 3.39, "grad_norm": 5.461475372314453, "learning_rate": 3.336148430595689e-06, "loss": 0.1988, "step": 345275 }, { "epoch": 3.4, "grad_norm": 1.080986499786377, "learning_rate": 3.33602430814144e-06, "loss": 0.0556, "step": 345300 }, { "epoch": 3.4, "grad_norm": 4.246536731719971, "learning_rate": 3.335900185687192e-06, "loss": 0.2268, "step": 345325 }, { "epoch": 3.4, "grad_norm": 6.9167609214782715, "learning_rate": 3.3357760632329434e-06, "loss": 0.0497, "step": 345350 }, { "epoch": 3.4, "grad_norm": 5.519754409790039, "learning_rate": 3.3356519407786946e-06, "loss": 0.1543, "step": 345375 }, { "epoch": 3.4, "grad_norm": 11.414597511291504, "learning_rate": 3.3355278183244466e-06, "loss": 0.0631, "step": 345400 }, { "epoch": 3.4, "grad_norm": 3.7045514583587646, "learning_rate": 3.335403695870198e-06, "loss": 0.2016, "step": 345425 }, { "epoch": 3.4, "grad_norm": 5.586761474609375, "learning_rate": 3.3352795734159495e-06, "loss": 0.0678, "step": 345450 }, { "epoch": 3.4, "grad_norm": 3.6558353900909424, "learning_rate": 3.335155450961701e-06, "loss": 0.185, "step": 345475 }, { "epoch": 3.4, "grad_norm": 11.60964298248291, "learning_rate": 3.3350313285074528e-06, "loss": 0.0656, "step": 345500 }, { "epoch": 3.4, "grad_norm": 3.7403573989868164, "learning_rate": 3.334907206053204e-06, "loss": 0.1475, "step": 345525 }, { "epoch": 3.4, "grad_norm": 5.123289585113525, "learning_rate": 3.334783083598956e-06, "loss": 0.0439, "step": 345550 }, { "epoch": 3.4, "grad_norm": 6.515044212341309, "learning_rate": 3.3346589611447072e-06, "loss": 0.1755, "step": 345575 }, { "epoch": 3.4, "grad_norm": 10.998177528381348, "learning_rate": 3.3345348386904584e-06, "loss": 0.0949, "step": 345600 }, { "epoch": 3.4, "grad_norm": 5.798000335693359, "learning_rate": 3.3344107162362105e-06, "loss": 0.1669, "step": 345625 }, { "epoch": 3.4, "grad_norm": 16.14859962463379, "learning_rate": 3.3342865937819617e-06, "loss": 0.0752, "step": 345650 }, { "epoch": 3.4, "grad_norm": 5.970012664794922, "learning_rate": 3.3341624713277134e-06, "loss": 0.1921, "step": 345675 }, { "epoch": 3.4, "grad_norm": 7.4614787101745605, "learning_rate": 3.334038348873465e-06, "loss": 0.0804, "step": 345700 }, { "epoch": 3.4, "grad_norm": 10.308853149414062, "learning_rate": 3.3339142264192166e-06, "loss": 0.1906, "step": 345725 }, { "epoch": 3.4, "grad_norm": 4.363576412200928, "learning_rate": 3.333790103964968e-06, "loss": 0.0635, "step": 345750 }, { "epoch": 3.4, "grad_norm": 4.678902626037598, "learning_rate": 3.33366598151072e-06, "loss": 0.1994, "step": 345775 }, { "epoch": 3.4, "grad_norm": 2.8488099575042725, "learning_rate": 3.333541859056471e-06, "loss": 0.0567, "step": 345800 }, { "epoch": 3.4, "grad_norm": 4.475423812866211, "learning_rate": 3.333422701500392e-06, "loss": 0.1402, "step": 345825 }, { "epoch": 3.4, "grad_norm": 1.4988727569580078, "learning_rate": 3.333298579046144e-06, "loss": 0.0589, "step": 345850 }, { "epoch": 3.4, "grad_norm": 3.610241174697876, "learning_rate": 3.3331744565918954e-06, "loss": 0.1901, "step": 345875 }, { "epoch": 3.4, "grad_norm": 5.862758159637451, "learning_rate": 3.333050334137647e-06, "loss": 0.0787, "step": 345900 }, { "epoch": 3.4, "grad_norm": 6.117302417755127, "learning_rate": 3.3329262116833987e-06, "loss": 0.1684, "step": 345925 }, { "epoch": 3.4, "grad_norm": 7.5033769607543945, "learning_rate": 3.3328020892291503e-06, "loss": 0.0534, "step": 345950 }, { "epoch": 3.4, "grad_norm": 4.587841987609863, "learning_rate": 3.3326779667749015e-06, "loss": 0.1693, "step": 345975 }, { "epoch": 3.4, "grad_norm": 4.760577201843262, "learning_rate": 3.3325538443206536e-06, "loss": 0.0796, "step": 346000 }, { "epoch": 3.4, "grad_norm": 4.697577953338623, "learning_rate": 3.3324297218664048e-06, "loss": 0.2058, "step": 346025 }, { "epoch": 3.4, "grad_norm": 18.856361389160156, "learning_rate": 3.332305599412156e-06, "loss": 0.0771, "step": 346050 }, { "epoch": 3.4, "grad_norm": 7.997659683227539, "learning_rate": 3.332181476957908e-06, "loss": 0.1581, "step": 346075 }, { "epoch": 3.4, "grad_norm": 11.032371520996094, "learning_rate": 3.3320573545036593e-06, "loss": 0.0795, "step": 346100 }, { "epoch": 3.4, "grad_norm": 7.06962776184082, "learning_rate": 3.331933232049411e-06, "loss": 0.201, "step": 346125 }, { "epoch": 3.4, "grad_norm": 13.774943351745605, "learning_rate": 3.3318091095951625e-06, "loss": 0.0831, "step": 346150 }, { "epoch": 3.4, "grad_norm": 4.362688064575195, "learning_rate": 3.331684987140914e-06, "loss": 0.1965, "step": 346175 }, { "epoch": 3.4, "grad_norm": 5.552514553070068, "learning_rate": 3.3315608646866654e-06, "loss": 0.0527, "step": 346200 }, { "epoch": 3.4, "grad_norm": 4.068994522094727, "learning_rate": 3.3314367422324174e-06, "loss": 0.2064, "step": 346225 }, { "epoch": 3.4, "grad_norm": 4.874994277954102, "learning_rate": 3.3313126197781687e-06, "loss": 0.0657, "step": 346250 }, { "epoch": 3.4, "grad_norm": 7.40572452545166, "learning_rate": 3.33118849732392e-06, "loss": 0.1501, "step": 346275 }, { "epoch": 3.4, "grad_norm": 12.291060447692871, "learning_rate": 3.331064374869672e-06, "loss": 0.0694, "step": 346300 }, { "epoch": 3.41, "grad_norm": 3.5418941974639893, "learning_rate": 3.330940252415423e-06, "loss": 0.212, "step": 346325 }, { "epoch": 3.41, "grad_norm": 3.159841537475586, "learning_rate": 3.3308161299611748e-06, "loss": 0.0692, "step": 346350 }, { "epoch": 3.41, "grad_norm": 5.705971717834473, "learning_rate": 3.3306920075069264e-06, "loss": 0.1686, "step": 346375 }, { "epoch": 3.41, "grad_norm": 9.649675369262695, "learning_rate": 3.330567885052678e-06, "loss": 0.0761, "step": 346400 }, { "epoch": 3.41, "grad_norm": 3.4598982334136963, "learning_rate": 3.3304437625984293e-06, "loss": 0.1898, "step": 346425 }, { "epoch": 3.41, "grad_norm": 5.146483421325684, "learning_rate": 3.3303196401441813e-06, "loss": 0.0415, "step": 346450 }, { "epoch": 3.41, "grad_norm": 5.767154216766357, "learning_rate": 3.3301955176899325e-06, "loss": 0.1605, "step": 346475 }, { "epoch": 3.41, "grad_norm": 19.43533706665039, "learning_rate": 3.3300713952356837e-06, "loss": 0.0811, "step": 346500 }, { "epoch": 3.41, "grad_norm": 7.360235691070557, "learning_rate": 3.3299472727814354e-06, "loss": 0.2134, "step": 346525 }, { "epoch": 3.41, "grad_norm": 2.5480730533599854, "learning_rate": 3.329823150327187e-06, "loss": 0.088, "step": 346550 }, { "epoch": 3.41, "grad_norm": 5.7991814613342285, "learning_rate": 3.3296990278729386e-06, "loss": 0.1332, "step": 346575 }, { "epoch": 3.41, "grad_norm": 8.477262496948242, "learning_rate": 3.32957490541869e-06, "loss": 0.0694, "step": 346600 }, { "epoch": 3.41, "grad_norm": 4.324174880981445, "learning_rate": 3.329450782964442e-06, "loss": 0.1708, "step": 346625 }, { "epoch": 3.41, "grad_norm": 4.237969398498535, "learning_rate": 3.329326660510193e-06, "loss": 0.048, "step": 346650 }, { "epoch": 3.41, "grad_norm": 3.332430839538574, "learning_rate": 3.3292025380559443e-06, "loss": 0.1886, "step": 346675 }, { "epoch": 3.41, "grad_norm": 8.286515235900879, "learning_rate": 3.3290784156016964e-06, "loss": 0.0762, "step": 346700 }, { "epoch": 3.41, "grad_norm": 4.821057319641113, "learning_rate": 3.3289542931474476e-06, "loss": 0.1824, "step": 346725 }, { "epoch": 3.41, "grad_norm": 15.264013290405273, "learning_rate": 3.3288301706931992e-06, "loss": 0.1013, "step": 346750 }, { "epoch": 3.41, "grad_norm": 4.153275012969971, "learning_rate": 3.328706048238951e-06, "loss": 0.2096, "step": 346775 }, { "epoch": 3.41, "grad_norm": 7.366017818450928, "learning_rate": 3.3285819257847025e-06, "loss": 0.0782, "step": 346800 }, { "epoch": 3.41, "grad_norm": 9.162252426147461, "learning_rate": 3.3284578033304537e-06, "loss": 0.2022, "step": 346825 }, { "epoch": 3.41, "grad_norm": 1.7735912799835205, "learning_rate": 3.3283336808762058e-06, "loss": 0.0751, "step": 346850 }, { "epoch": 3.41, "grad_norm": 7.721749782562256, "learning_rate": 3.328209558421957e-06, "loss": 0.1914, "step": 346875 }, { "epoch": 3.41, "grad_norm": 4.1276421546936035, "learning_rate": 3.328085435967708e-06, "loss": 0.0571, "step": 346900 }, { "epoch": 3.41, "grad_norm": 2.847806453704834, "learning_rate": 3.3279613135134603e-06, "loss": 0.1499, "step": 346925 }, { "epoch": 3.41, "grad_norm": 4.614055156707764, "learning_rate": 3.3278371910592115e-06, "loss": 0.0825, "step": 346950 }, { "epoch": 3.41, "grad_norm": 6.469359874725342, "learning_rate": 3.327713068604963e-06, "loss": 0.1552, "step": 346975 }, { "epoch": 3.41, "grad_norm": 6.266521453857422, "learning_rate": 3.3275889461507147e-06, "loss": 0.0702, "step": 347000 }, { "epoch": 3.41, "grad_norm": 6.327500343322754, "learning_rate": 3.3274648236964664e-06, "loss": 0.2257, "step": 347025 }, { "epoch": 3.41, "grad_norm": 11.875863075256348, "learning_rate": 3.3273407012422176e-06, "loss": 0.0627, "step": 347050 }, { "epoch": 3.41, "grad_norm": 4.477530479431152, "learning_rate": 3.3272165787879696e-06, "loss": 0.1922, "step": 347075 }, { "epoch": 3.41, "grad_norm": 16.949127197265625, "learning_rate": 3.327092456333721e-06, "loss": 0.0944, "step": 347100 }, { "epoch": 3.41, "grad_norm": 6.630100250244141, "learning_rate": 3.326968333879472e-06, "loss": 0.1888, "step": 347125 }, { "epoch": 3.41, "grad_norm": 8.713849067687988, "learning_rate": 3.326844211425224e-06, "loss": 0.0875, "step": 347150 }, { "epoch": 3.41, "grad_norm": 3.8631346225738525, "learning_rate": 3.3267200889709753e-06, "loss": 0.1718, "step": 347175 }, { "epoch": 3.41, "grad_norm": 10.795482635498047, "learning_rate": 3.326595966516727e-06, "loss": 0.0532, "step": 347200 }, { "epoch": 3.41, "grad_norm": 5.115203380584717, "learning_rate": 3.3264718440624786e-06, "loss": 0.1548, "step": 347225 }, { "epoch": 3.41, "grad_norm": 14.183805465698242, "learning_rate": 3.3263477216082302e-06, "loss": 0.0715, "step": 347250 }, { "epoch": 3.41, "grad_norm": 3.0528793334960938, "learning_rate": 3.3262235991539815e-06, "loss": 0.1552, "step": 347275 }, { "epoch": 3.41, "grad_norm": 5.607237815856934, "learning_rate": 3.3260994766997335e-06, "loss": 0.0635, "step": 347300 }, { "epoch": 3.41, "grad_norm": 5.06236457824707, "learning_rate": 3.3259753542454847e-06, "loss": 0.1658, "step": 347325 }, { "epoch": 3.42, "grad_norm": 8.012511253356934, "learning_rate": 3.3258512317912364e-06, "loss": 0.0721, "step": 347350 }, { "epoch": 3.42, "grad_norm": 3.935340166091919, "learning_rate": 3.3257271093369876e-06, "loss": 0.1676, "step": 347375 }, { "epoch": 3.42, "grad_norm": 10.419675827026367, "learning_rate": 3.325602986882739e-06, "loss": 0.0725, "step": 347400 }, { "epoch": 3.42, "grad_norm": 10.894194602966309, "learning_rate": 3.325478864428491e-06, "loss": 0.1725, "step": 347425 }, { "epoch": 3.42, "grad_norm": 8.558361053466797, "learning_rate": 3.325354741974242e-06, "loss": 0.0806, "step": 347450 }, { "epoch": 3.42, "grad_norm": 7.969795227050781, "learning_rate": 3.325230619519994e-06, "loss": 0.2014, "step": 347475 }, { "epoch": 3.42, "grad_norm": 5.139554023742676, "learning_rate": 3.3251064970657453e-06, "loss": 0.0707, "step": 347500 }, { "epoch": 3.42, "grad_norm": 4.502435207366943, "learning_rate": 3.324982374611497e-06, "loss": 0.1802, "step": 347525 }, { "epoch": 3.42, "grad_norm": 8.945180892944336, "learning_rate": 3.3248582521572486e-06, "loss": 0.0683, "step": 347550 }, { "epoch": 3.42, "grad_norm": 2.5728631019592285, "learning_rate": 3.3247341297030002e-06, "loss": 0.1602, "step": 347575 }, { "epoch": 3.42, "grad_norm": 3.800327777862549, "learning_rate": 3.3246100072487514e-06, "loss": 0.083, "step": 347600 }, { "epoch": 3.42, "grad_norm": 4.208467960357666, "learning_rate": 3.3244858847945035e-06, "loss": 0.1809, "step": 347625 }, { "epoch": 3.42, "grad_norm": 5.623556137084961, "learning_rate": 3.3243617623402547e-06, "loss": 0.07, "step": 347650 }, { "epoch": 3.42, "grad_norm": 5.774777412414551, "learning_rate": 3.324237639886006e-06, "loss": 0.1881, "step": 347675 }, { "epoch": 3.42, "grad_norm": 14.309953689575195, "learning_rate": 3.324113517431758e-06, "loss": 0.0704, "step": 347700 }, { "epoch": 3.42, "grad_norm": 5.865586757659912, "learning_rate": 3.323989394977509e-06, "loss": 0.1969, "step": 347725 }, { "epoch": 3.42, "grad_norm": 10.725885391235352, "learning_rate": 3.323865272523261e-06, "loss": 0.0695, "step": 347750 }, { "epoch": 3.42, "grad_norm": 5.532303810119629, "learning_rate": 3.3237411500690125e-06, "loss": 0.1907, "step": 347775 }, { "epoch": 3.42, "grad_norm": 14.625090599060059, "learning_rate": 3.323617027614764e-06, "loss": 0.0671, "step": 347800 }, { "epoch": 3.42, "grad_norm": 5.156599044799805, "learning_rate": 3.3234929051605153e-06, "loss": 0.2079, "step": 347825 }, { "epoch": 3.42, "grad_norm": 15.118478775024414, "learning_rate": 3.3233687827062674e-06, "loss": 0.0939, "step": 347850 }, { "epoch": 3.42, "grad_norm": 2.6842639446258545, "learning_rate": 3.3232446602520186e-06, "loss": 0.1783, "step": 347875 }, { "epoch": 3.42, "grad_norm": 7.155452728271484, "learning_rate": 3.3231205377977698e-06, "loss": 0.0624, "step": 347900 }, { "epoch": 3.42, "grad_norm": 3.299262523651123, "learning_rate": 3.322996415343522e-06, "loss": 0.1794, "step": 347925 }, { "epoch": 3.42, "grad_norm": 9.540169715881348, "learning_rate": 3.322872292889273e-06, "loss": 0.058, "step": 347950 }, { "epoch": 3.42, "grad_norm": 5.08456563949585, "learning_rate": 3.3227481704350247e-06, "loss": 0.1694, "step": 347975 }, { "epoch": 3.42, "grad_norm": 10.405970573425293, "learning_rate": 3.3226240479807763e-06, "loss": 0.0859, "step": 348000 }, { "epoch": 3.42, "grad_norm": 5.119429588317871, "learning_rate": 3.322499925526528e-06, "loss": 0.1826, "step": 348025 }, { "epoch": 3.42, "grad_norm": 3.9235546588897705, "learning_rate": 3.322375803072279e-06, "loss": 0.0619, "step": 348050 }, { "epoch": 3.42, "grad_norm": 5.915188789367676, "learning_rate": 3.3222516806180312e-06, "loss": 0.1837, "step": 348075 }, { "epoch": 3.42, "grad_norm": 5.663497447967529, "learning_rate": 3.3221275581637824e-06, "loss": 0.0863, "step": 348100 }, { "epoch": 3.42, "grad_norm": 4.9272780418396, "learning_rate": 3.3220034357095337e-06, "loss": 0.1975, "step": 348125 }, { "epoch": 3.42, "grad_norm": 7.182781219482422, "learning_rate": 3.3218793132552857e-06, "loss": 0.0724, "step": 348150 }, { "epoch": 3.42, "grad_norm": 3.3850741386413574, "learning_rate": 3.321755190801037e-06, "loss": 0.212, "step": 348175 }, { "epoch": 3.42, "grad_norm": 8.42502212524414, "learning_rate": 3.3216310683467886e-06, "loss": 0.076, "step": 348200 }, { "epoch": 3.42, "grad_norm": 5.3835039138793945, "learning_rate": 3.3215069458925398e-06, "loss": 0.1899, "step": 348225 }, { "epoch": 3.42, "grad_norm": 10.90582275390625, "learning_rate": 3.321382823438292e-06, "loss": 0.0886, "step": 348250 }, { "epoch": 3.42, "grad_norm": 3.944669723510742, "learning_rate": 3.321258700984043e-06, "loss": 0.2181, "step": 348275 }, { "epoch": 3.42, "grad_norm": 7.54016637802124, "learning_rate": 3.3211345785297943e-06, "loss": 0.0586, "step": 348300 }, { "epoch": 3.42, "grad_norm": 3.9858484268188477, "learning_rate": 3.3210104560755463e-06, "loss": 0.1745, "step": 348325 }, { "epoch": 3.43, "grad_norm": 7.128366947174072, "learning_rate": 3.3208863336212975e-06, "loss": 0.0717, "step": 348350 }, { "epoch": 3.43, "grad_norm": 5.636579990386963, "learning_rate": 3.320762211167049e-06, "loss": 0.1509, "step": 348375 }, { "epoch": 3.43, "grad_norm": 6.286494731903076, "learning_rate": 3.320638088712801e-06, "loss": 0.0802, "step": 348400 }, { "epoch": 3.43, "grad_norm": 6.113216876983643, "learning_rate": 3.3205139662585524e-06, "loss": 0.1688, "step": 348425 }, { "epoch": 3.43, "grad_norm": 9.925969123840332, "learning_rate": 3.3203898438043036e-06, "loss": 0.0761, "step": 348450 }, { "epoch": 3.43, "grad_norm": 5.055477142333984, "learning_rate": 3.3202657213500557e-06, "loss": 0.1639, "step": 348475 }, { "epoch": 3.43, "grad_norm": 11.243982315063477, "learning_rate": 3.320141598895807e-06, "loss": 0.0603, "step": 348500 }, { "epoch": 3.43, "grad_norm": 5.3663434982299805, "learning_rate": 3.320017476441558e-06, "loss": 0.2278, "step": 348525 }, { "epoch": 3.43, "grad_norm": 5.874199867248535, "learning_rate": 3.31989335398731e-06, "loss": 0.0716, "step": 348550 }, { "epoch": 3.43, "grad_norm": 4.726767063140869, "learning_rate": 3.3197692315330614e-06, "loss": 0.1575, "step": 348575 }, { "epoch": 3.43, "grad_norm": 7.568332195281982, "learning_rate": 3.319645109078813e-06, "loss": 0.0582, "step": 348600 }, { "epoch": 3.43, "grad_norm": 5.270962715148926, "learning_rate": 3.3195209866245647e-06, "loss": 0.1614, "step": 348625 }, { "epoch": 3.43, "grad_norm": 6.116958141326904, "learning_rate": 3.3193968641703163e-06, "loss": 0.0751, "step": 348650 }, { "epoch": 3.43, "grad_norm": 5.662648677825928, "learning_rate": 3.3192727417160675e-06, "loss": 0.2352, "step": 348675 }, { "epoch": 3.43, "grad_norm": 9.288451194763184, "learning_rate": 3.3191486192618196e-06, "loss": 0.0766, "step": 348700 }, { "epoch": 3.43, "grad_norm": 3.9470083713531494, "learning_rate": 3.3190244968075708e-06, "loss": 0.1814, "step": 348725 }, { "epoch": 3.43, "grad_norm": 7.646665096282959, "learning_rate": 3.318900374353322e-06, "loss": 0.0753, "step": 348750 }, { "epoch": 3.43, "grad_norm": 4.835182189941406, "learning_rate": 3.318776251899074e-06, "loss": 0.2016, "step": 348775 }, { "epoch": 3.43, "grad_norm": 10.576841354370117, "learning_rate": 3.3186521294448253e-06, "loss": 0.0649, "step": 348800 }, { "epoch": 3.43, "grad_norm": 5.270274639129639, "learning_rate": 3.318528006990577e-06, "loss": 0.1773, "step": 348825 }, { "epoch": 3.43, "grad_norm": 9.971263885498047, "learning_rate": 3.3184038845363285e-06, "loss": 0.056, "step": 348850 }, { "epoch": 3.43, "grad_norm": 3.907776117324829, "learning_rate": 3.31827976208208e-06, "loss": 0.1837, "step": 348875 }, { "epoch": 3.43, "grad_norm": 2.3480236530303955, "learning_rate": 3.3181556396278314e-06, "loss": 0.0677, "step": 348900 }, { "epoch": 3.43, "grad_norm": 5.9271955490112305, "learning_rate": 3.3180315171735834e-06, "loss": 0.1766, "step": 348925 }, { "epoch": 3.43, "grad_norm": 9.288759231567383, "learning_rate": 3.3179073947193346e-06, "loss": 0.0726, "step": 348950 }, { "epoch": 3.43, "grad_norm": 7.392476558685303, "learning_rate": 3.317783272265086e-06, "loss": 0.1793, "step": 348975 }, { "epoch": 3.43, "grad_norm": 6.506258010864258, "learning_rate": 3.317659149810838e-06, "loss": 0.0367, "step": 349000 }, { "epoch": 3.43, "grad_norm": 9.349997520446777, "learning_rate": 3.317535027356589e-06, "loss": 0.1928, "step": 349025 }, { "epoch": 3.43, "grad_norm": 5.732436180114746, "learning_rate": 3.3174109049023408e-06, "loss": 0.0547, "step": 349050 }, { "epoch": 3.43, "grad_norm": 4.616460800170898, "learning_rate": 3.317286782448092e-06, "loss": 0.152, "step": 349075 }, { "epoch": 3.43, "grad_norm": 10.22033405303955, "learning_rate": 3.317162659993844e-06, "loss": 0.0721, "step": 349100 }, { "epoch": 3.43, "grad_norm": 5.494321823120117, "learning_rate": 3.3170385375395952e-06, "loss": 0.1747, "step": 349125 }, { "epoch": 3.43, "grad_norm": 8.413047790527344, "learning_rate": 3.3169144150853465e-06, "loss": 0.0718, "step": 349150 }, { "epoch": 3.43, "grad_norm": 4.594276428222656, "learning_rate": 3.3167902926310985e-06, "loss": 0.1605, "step": 349175 }, { "epoch": 3.43, "grad_norm": 15.04460334777832, "learning_rate": 3.3166661701768497e-06, "loss": 0.081, "step": 349200 }, { "epoch": 3.43, "grad_norm": 3.9032490253448486, "learning_rate": 3.3165420477226014e-06, "loss": 0.2022, "step": 349225 }, { "epoch": 3.43, "grad_norm": 5.444197177886963, "learning_rate": 3.316417925268353e-06, "loss": 0.0698, "step": 349250 }, { "epoch": 3.43, "grad_norm": 4.31749963760376, "learning_rate": 3.3162938028141046e-06, "loss": 0.1806, "step": 349275 }, { "epoch": 3.43, "grad_norm": 9.86609172821045, "learning_rate": 3.316169680359856e-06, "loss": 0.0685, "step": 349300 }, { "epoch": 3.43, "grad_norm": 5.82390832901001, "learning_rate": 3.316045557905608e-06, "loss": 0.1445, "step": 349325 }, { "epoch": 3.43, "grad_norm": 5.8905229568481445, "learning_rate": 3.315921435451359e-06, "loss": 0.0491, "step": 349350 }, { "epoch": 3.44, "grad_norm": 5.463900089263916, "learning_rate": 3.3157973129971103e-06, "loss": 0.1751, "step": 349375 }, { "epoch": 3.44, "grad_norm": 7.265468597412109, "learning_rate": 3.3156731905428624e-06, "loss": 0.054, "step": 349400 }, { "epoch": 3.44, "grad_norm": 3.5721969604492188, "learning_rate": 3.3155490680886136e-06, "loss": 0.1842, "step": 349425 }, { "epoch": 3.44, "grad_norm": 10.855008125305176, "learning_rate": 3.3154249456343652e-06, "loss": 0.0998, "step": 349450 }, { "epoch": 3.44, "grad_norm": 5.568252086639404, "learning_rate": 3.315300823180117e-06, "loss": 0.194, "step": 349475 }, { "epoch": 3.44, "grad_norm": 9.10469913482666, "learning_rate": 3.3151767007258685e-06, "loss": 0.0938, "step": 349500 }, { "epoch": 3.44, "grad_norm": 5.425349712371826, "learning_rate": 3.3150525782716197e-06, "loss": 0.1817, "step": 349525 }, { "epoch": 3.44, "grad_norm": 5.421577453613281, "learning_rate": 3.3149284558173718e-06, "loss": 0.0813, "step": 349550 }, { "epoch": 3.44, "grad_norm": 4.780803680419922, "learning_rate": 3.314804333363123e-06, "loss": 0.1818, "step": 349575 }, { "epoch": 3.44, "grad_norm": 5.85488224029541, "learning_rate": 3.314680210908874e-06, "loss": 0.0784, "step": 349600 }, { "epoch": 3.44, "grad_norm": 4.716854095458984, "learning_rate": 3.3145560884546262e-06, "loss": 0.2309, "step": 349625 }, { "epoch": 3.44, "grad_norm": 4.741396903991699, "learning_rate": 3.3144319660003775e-06, "loss": 0.0672, "step": 349650 }, { "epoch": 3.44, "grad_norm": 5.936684608459473, "learning_rate": 3.314307843546129e-06, "loss": 0.1571, "step": 349675 }, { "epoch": 3.44, "grad_norm": 9.430633544921875, "learning_rate": 3.3141837210918807e-06, "loss": 0.0727, "step": 349700 }, { "epoch": 3.44, "grad_norm": 9.065958023071289, "learning_rate": 3.3140595986376324e-06, "loss": 0.2101, "step": 349725 }, { "epoch": 3.44, "grad_norm": 12.000591278076172, "learning_rate": 3.3139354761833836e-06, "loss": 0.0853, "step": 349750 }, { "epoch": 3.44, "grad_norm": 5.329405784606934, "learning_rate": 3.3138113537291356e-06, "loss": 0.1804, "step": 349775 }, { "epoch": 3.44, "grad_norm": 9.973969459533691, "learning_rate": 3.313687231274887e-06, "loss": 0.0711, "step": 349800 }, { "epoch": 3.44, "grad_norm": 4.900655269622803, "learning_rate": 3.313563108820638e-06, "loss": 0.2175, "step": 349825 }, { "epoch": 3.44, "grad_norm": 2.382554531097412, "learning_rate": 3.31343898636639e-06, "loss": 0.0923, "step": 349850 }, { "epoch": 3.44, "grad_norm": 12.828520774841309, "learning_rate": 3.313319828810311e-06, "loss": 0.1974, "step": 349875 }, { "epoch": 3.44, "grad_norm": 7.53327751159668, "learning_rate": 3.3131957063560628e-06, "loss": 0.0778, "step": 349900 }, { "epoch": 3.44, "grad_norm": 5.374624252319336, "learning_rate": 3.3130715839018144e-06, "loss": 0.1681, "step": 349925 }, { "epoch": 3.44, "grad_norm": 8.668685913085938, "learning_rate": 3.312947461447566e-06, "loss": 0.0844, "step": 349950 }, { "epoch": 3.44, "grad_norm": 4.378701686859131, "learning_rate": 3.3128233389933173e-06, "loss": 0.1541, "step": 349975 }, { "epoch": 3.44, "grad_norm": 7.376828670501709, "learning_rate": 3.3126992165390693e-06, "loss": 0.0713, "step": 350000 }, { "epoch": 3.44, "grad_norm": 3.969841480255127, "learning_rate": 3.3125750940848205e-06, "loss": 0.1664, "step": 350025 }, { "epoch": 3.44, "grad_norm": 9.72647762298584, "learning_rate": 3.3124509716305717e-06, "loss": 0.0711, "step": 350050 }, { "epoch": 3.44, "grad_norm": 5.048351764678955, "learning_rate": 3.312326849176324e-06, "loss": 0.1901, "step": 350075 }, { "epoch": 3.44, "grad_norm": 16.13064193725586, "learning_rate": 3.312202726722075e-06, "loss": 0.0657, "step": 350100 }, { "epoch": 3.44, "grad_norm": 3.7434675693511963, "learning_rate": 3.3120786042678266e-06, "loss": 0.2543, "step": 350125 }, { "epoch": 3.44, "grad_norm": 10.639007568359375, "learning_rate": 3.3119544818135783e-06, "loss": 0.0715, "step": 350150 }, { "epoch": 3.44, "grad_norm": 4.651103973388672, "learning_rate": 3.31183035935933e-06, "loss": 0.1735, "step": 350175 }, { "epoch": 3.44, "grad_norm": 8.529566764831543, "learning_rate": 3.311706236905081e-06, "loss": 0.0631, "step": 350200 }, { "epoch": 3.44, "grad_norm": 2.6794402599334717, "learning_rate": 3.311582114450833e-06, "loss": 0.1812, "step": 350225 }, { "epoch": 3.44, "grad_norm": 6.607969284057617, "learning_rate": 3.3114579919965844e-06, "loss": 0.0612, "step": 350250 }, { "epoch": 3.44, "grad_norm": 5.5651655197143555, "learning_rate": 3.311333869542336e-06, "loss": 0.155, "step": 350275 }, { "epoch": 3.44, "grad_norm": 7.268320083618164, "learning_rate": 3.3112097470880877e-06, "loss": 0.0743, "step": 350300 }, { "epoch": 3.44, "grad_norm": 10.7701997756958, "learning_rate": 3.3110856246338393e-06, "loss": 0.2348, "step": 350325 }, { "epoch": 3.44, "grad_norm": 9.484556198120117, "learning_rate": 3.3109615021795905e-06, "loss": 0.0691, "step": 350350 }, { "epoch": 3.44, "grad_norm": 4.629771709442139, "learning_rate": 3.3108373797253426e-06, "loss": 0.1831, "step": 350375 }, { "epoch": 3.45, "grad_norm": 7.188766002655029, "learning_rate": 3.3107132572710938e-06, "loss": 0.0693, "step": 350400 }, { "epoch": 3.45, "grad_norm": 3.710736036300659, "learning_rate": 3.310589134816845e-06, "loss": 0.1597, "step": 350425 }, { "epoch": 3.45, "grad_norm": 22.960926055908203, "learning_rate": 3.310465012362597e-06, "loss": 0.0786, "step": 350450 }, { "epoch": 3.45, "grad_norm": 6.330512523651123, "learning_rate": 3.3103408899083483e-06, "loss": 0.1943, "step": 350475 }, { "epoch": 3.45, "grad_norm": 3.9829399585723877, "learning_rate": 3.3102167674541e-06, "loss": 0.0872, "step": 350500 }, { "epoch": 3.45, "grad_norm": 5.744307041168213, "learning_rate": 3.310092644999851e-06, "loss": 0.1604, "step": 350525 }, { "epoch": 3.45, "grad_norm": 8.512492179870605, "learning_rate": 3.309968522545603e-06, "loss": 0.0698, "step": 350550 }, { "epoch": 3.45, "grad_norm": 5.314022064208984, "learning_rate": 3.3098444000913544e-06, "loss": 0.1448, "step": 350575 }, { "epoch": 3.45, "grad_norm": 7.439365863800049, "learning_rate": 3.3097202776371056e-06, "loss": 0.0645, "step": 350600 }, { "epoch": 3.45, "grad_norm": 3.514836549758911, "learning_rate": 3.3095961551828576e-06, "loss": 0.1754, "step": 350625 }, { "epoch": 3.45, "grad_norm": 9.887431144714355, "learning_rate": 3.309472032728609e-06, "loss": 0.0708, "step": 350650 }, { "epoch": 3.45, "grad_norm": 4.139499187469482, "learning_rate": 3.3093479102743605e-06, "loss": 0.1721, "step": 350675 }, { "epoch": 3.45, "grad_norm": 7.01707124710083, "learning_rate": 3.309223787820112e-06, "loss": 0.0923, "step": 350700 }, { "epoch": 3.45, "grad_norm": 4.876312255859375, "learning_rate": 3.3090996653658638e-06, "loss": 0.1809, "step": 350725 }, { "epoch": 3.45, "grad_norm": 6.4796061515808105, "learning_rate": 3.308975542911615e-06, "loss": 0.054, "step": 350750 }, { "epoch": 3.45, "grad_norm": 4.315120697021484, "learning_rate": 3.308851420457367e-06, "loss": 0.2001, "step": 350775 }, { "epoch": 3.45, "grad_norm": 3.069624185562134, "learning_rate": 3.3087272980031182e-06, "loss": 0.0677, "step": 350800 }, { "epoch": 3.45, "grad_norm": 2.721653699874878, "learning_rate": 3.3086031755488695e-06, "loss": 0.1775, "step": 350825 }, { "epoch": 3.45, "grad_norm": 8.501615524291992, "learning_rate": 3.3084790530946215e-06, "loss": 0.0699, "step": 350850 }, { "epoch": 3.45, "grad_norm": 13.876014709472656, "learning_rate": 3.3083549306403727e-06, "loss": 0.1781, "step": 350875 }, { "epoch": 3.45, "grad_norm": 5.03215217590332, "learning_rate": 3.3082308081861244e-06, "loss": 0.0535, "step": 350900 }, { "epoch": 3.45, "grad_norm": 3.997168779373169, "learning_rate": 3.308106685731876e-06, "loss": 0.1502, "step": 350925 }, { "epoch": 3.45, "grad_norm": 7.150359630584717, "learning_rate": 3.3079825632776276e-06, "loss": 0.0815, "step": 350950 }, { "epoch": 3.45, "grad_norm": 4.599595069885254, "learning_rate": 3.307858440823379e-06, "loss": 0.1663, "step": 350975 }, { "epoch": 3.45, "grad_norm": 17.091768264770508, "learning_rate": 3.307734318369131e-06, "loss": 0.0885, "step": 351000 }, { "epoch": 3.45, "grad_norm": 3.758918285369873, "learning_rate": 3.307610195914882e-06, "loss": 0.1743, "step": 351025 }, { "epoch": 3.45, "grad_norm": 67.33397674560547, "learning_rate": 3.3074860734606333e-06, "loss": 0.0904, "step": 351050 }, { "epoch": 3.45, "grad_norm": 5.421295642852783, "learning_rate": 3.307366915904555e-06, "loss": 0.1892, "step": 351075 }, { "epoch": 3.45, "grad_norm": 8.488892555236816, "learning_rate": 3.3072427934503064e-06, "loss": 0.0635, "step": 351100 }, { "epoch": 3.45, "grad_norm": 7.602400302886963, "learning_rate": 3.307118670996058e-06, "loss": 0.2043, "step": 351125 }, { "epoch": 3.45, "grad_norm": 4.8493194580078125, "learning_rate": 3.3069945485418097e-06, "loss": 0.0855, "step": 351150 }, { "epoch": 3.45, "grad_norm": 4.6528520584106445, "learning_rate": 3.3068704260875613e-06, "loss": 0.2311, "step": 351175 }, { "epoch": 3.45, "grad_norm": 8.349205017089844, "learning_rate": 3.3067463036333125e-06, "loss": 0.088, "step": 351200 }, { "epoch": 3.45, "grad_norm": 5.42432975769043, "learning_rate": 3.3066221811790646e-06, "loss": 0.1727, "step": 351225 }, { "epoch": 3.45, "grad_norm": 5.054111957550049, "learning_rate": 3.306498058724816e-06, "loss": 0.0677, "step": 351250 }, { "epoch": 3.45, "grad_norm": 3.649034023284912, "learning_rate": 3.306373936270567e-06, "loss": 0.2038, "step": 351275 }, { "epoch": 3.45, "grad_norm": 10.697624206542969, "learning_rate": 3.306249813816319e-06, "loss": 0.0779, "step": 351300 }, { "epoch": 3.45, "grad_norm": 4.394155502319336, "learning_rate": 3.3061256913620703e-06, "loss": 0.1707, "step": 351325 }, { "epoch": 3.45, "grad_norm": 7.15280294418335, "learning_rate": 3.306001568907822e-06, "loss": 0.0589, "step": 351350 }, { "epoch": 3.45, "grad_norm": 5.433685302734375, "learning_rate": 3.3058774464535735e-06, "loss": 0.1823, "step": 351375 }, { "epoch": 3.46, "grad_norm": 5.603508949279785, "learning_rate": 3.305753323999325e-06, "loss": 0.0542, "step": 351400 }, { "epoch": 3.46, "grad_norm": 4.302981853485107, "learning_rate": 3.3056292015450764e-06, "loss": 0.1697, "step": 351425 }, { "epoch": 3.46, "grad_norm": 9.961082458496094, "learning_rate": 3.3055050790908285e-06, "loss": 0.0684, "step": 351450 }, { "epoch": 3.46, "grad_norm": 3.713347911834717, "learning_rate": 3.3053809566365797e-06, "loss": 0.1597, "step": 351475 }, { "epoch": 3.46, "grad_norm": 12.153068542480469, "learning_rate": 3.305256834182331e-06, "loss": 0.0514, "step": 351500 }, { "epoch": 3.46, "grad_norm": 2.9860026836395264, "learning_rate": 3.305132711728083e-06, "loss": 0.1616, "step": 351525 }, { "epoch": 3.46, "grad_norm": 6.354877948760986, "learning_rate": 3.305008589273834e-06, "loss": 0.0764, "step": 351550 }, { "epoch": 3.46, "grad_norm": 5.83951473236084, "learning_rate": 3.3048844668195858e-06, "loss": 0.1568, "step": 351575 }, { "epoch": 3.46, "grad_norm": 7.877013683319092, "learning_rate": 3.3047603443653374e-06, "loss": 0.0722, "step": 351600 }, { "epoch": 3.46, "grad_norm": 5.264118194580078, "learning_rate": 3.304636221911089e-06, "loss": 0.1902, "step": 351625 }, { "epoch": 3.46, "grad_norm": 7.102202415466309, "learning_rate": 3.3045120994568403e-06, "loss": 0.0759, "step": 351650 }, { "epoch": 3.46, "grad_norm": 4.206773281097412, "learning_rate": 3.3043879770025923e-06, "loss": 0.1862, "step": 351675 }, { "epoch": 3.46, "grad_norm": 9.279465675354004, "learning_rate": 3.3042638545483435e-06, "loss": 0.0545, "step": 351700 }, { "epoch": 3.46, "grad_norm": 4.183521747589111, "learning_rate": 3.3041397320940947e-06, "loss": 0.2012, "step": 351725 }, { "epoch": 3.46, "grad_norm": 9.939201354980469, "learning_rate": 3.304015609639847e-06, "loss": 0.0774, "step": 351750 }, { "epoch": 3.46, "grad_norm": 4.773289203643799, "learning_rate": 3.303891487185598e-06, "loss": 0.1831, "step": 351775 }, { "epoch": 3.46, "grad_norm": 4.542191505432129, "learning_rate": 3.3037673647313497e-06, "loss": 0.0968, "step": 351800 }, { "epoch": 3.46, "grad_norm": 8.26281452178955, "learning_rate": 3.3036432422771013e-06, "loss": 0.2078, "step": 351825 }, { "epoch": 3.46, "grad_norm": 3.9738452434539795, "learning_rate": 3.303519119822853e-06, "loss": 0.0958, "step": 351850 }, { "epoch": 3.46, "grad_norm": 7.665196418762207, "learning_rate": 3.303394997368604e-06, "loss": 0.1906, "step": 351875 }, { "epoch": 3.46, "grad_norm": 6.673989772796631, "learning_rate": 3.303270874914356e-06, "loss": 0.071, "step": 351900 }, { "epoch": 3.46, "grad_norm": 5.435502052307129, "learning_rate": 3.3031467524601074e-06, "loss": 0.1779, "step": 351925 }, { "epoch": 3.46, "grad_norm": 12.893476486206055, "learning_rate": 3.3030226300058586e-06, "loss": 0.0734, "step": 351950 }, { "epoch": 3.46, "grad_norm": 4.10478401184082, "learning_rate": 3.3028985075516102e-06, "loss": 0.1657, "step": 351975 }, { "epoch": 3.46, "grad_norm": 4.89050817489624, "learning_rate": 3.302774385097362e-06, "loss": 0.0633, "step": 352000 }, { "epoch": 3.46, "grad_norm": 5.371189117431641, "learning_rate": 3.3026502626431135e-06, "loss": 0.2146, "step": 352025 }, { "epoch": 3.46, "grad_norm": 6.712238311767578, "learning_rate": 3.3025261401888647e-06, "loss": 0.0633, "step": 352050 }, { "epoch": 3.46, "grad_norm": 5.217956066131592, "learning_rate": 3.3024020177346168e-06, "loss": 0.228, "step": 352075 }, { "epoch": 3.46, "grad_norm": 4.995025634765625, "learning_rate": 3.302277895280368e-06, "loss": 0.0589, "step": 352100 }, { "epoch": 3.46, "grad_norm": 4.652365207672119, "learning_rate": 3.302153772826119e-06, "loss": 0.1724, "step": 352125 }, { "epoch": 3.46, "grad_norm": 6.214806079864502, "learning_rate": 3.3020296503718713e-06, "loss": 0.084, "step": 352150 }, { "epoch": 3.46, "grad_norm": 6.61215353012085, "learning_rate": 3.3019055279176225e-06, "loss": 0.1818, "step": 352175 }, { "epoch": 3.46, "grad_norm": 9.808767318725586, "learning_rate": 3.301781405463374e-06, "loss": 0.0649, "step": 352200 }, { "epoch": 3.46, "grad_norm": 3.998556613922119, "learning_rate": 3.3016572830091258e-06, "loss": 0.1711, "step": 352225 }, { "epoch": 3.46, "grad_norm": 6.971159934997559, "learning_rate": 3.3015331605548774e-06, "loss": 0.0575, "step": 352250 }, { "epoch": 3.46, "grad_norm": 4.7511067390441895, "learning_rate": 3.3014090381006286e-06, "loss": 0.2016, "step": 352275 }, { "epoch": 3.46, "grad_norm": 13.326948165893555, "learning_rate": 3.3012849156463807e-06, "loss": 0.0911, "step": 352300 }, { "epoch": 3.46, "grad_norm": 7.734925270080566, "learning_rate": 3.301160793192132e-06, "loss": 0.1788, "step": 352325 }, { "epoch": 3.46, "grad_norm": 10.568488121032715, "learning_rate": 3.301036670737883e-06, "loss": 0.0641, "step": 352350 }, { "epoch": 3.46, "grad_norm": 5.0047807693481445, "learning_rate": 3.300912548283635e-06, "loss": 0.2002, "step": 352375 }, { "epoch": 3.46, "grad_norm": 8.961427688598633, "learning_rate": 3.3007884258293863e-06, "loss": 0.0807, "step": 352400 }, { "epoch": 3.47, "grad_norm": 5.451277732849121, "learning_rate": 3.300664303375138e-06, "loss": 0.1836, "step": 352425 }, { "epoch": 3.47, "grad_norm": 9.823437690734863, "learning_rate": 3.3005401809208896e-06, "loss": 0.0675, "step": 352450 }, { "epoch": 3.47, "grad_norm": 6.0490007400512695, "learning_rate": 3.3004160584666413e-06, "loss": 0.1738, "step": 352475 }, { "epoch": 3.47, "grad_norm": 9.254547119140625, "learning_rate": 3.3002919360123925e-06, "loss": 0.0676, "step": 352500 }, { "epoch": 3.47, "grad_norm": 3.1462135314941406, "learning_rate": 3.3001678135581445e-06, "loss": 0.1805, "step": 352525 }, { "epoch": 3.47, "grad_norm": 13.187281608581543, "learning_rate": 3.3000436911038957e-06, "loss": 0.0799, "step": 352550 }, { "epoch": 3.47, "grad_norm": 6.142897129058838, "learning_rate": 3.299919568649647e-06, "loss": 0.1805, "step": 352575 }, { "epoch": 3.47, "grad_norm": 8.835609436035156, "learning_rate": 3.299795446195399e-06, "loss": 0.083, "step": 352600 }, { "epoch": 3.47, "grad_norm": 6.177483558654785, "learning_rate": 3.2996713237411502e-06, "loss": 0.1968, "step": 352625 }, { "epoch": 3.47, "grad_norm": 9.805525779724121, "learning_rate": 3.299547201286902e-06, "loss": 0.0716, "step": 352650 }, { "epoch": 3.47, "grad_norm": 4.5208420753479, "learning_rate": 3.2994230788326535e-06, "loss": 0.2111, "step": 352675 }, { "epoch": 3.47, "grad_norm": 8.04988956451416, "learning_rate": 3.299298956378405e-06, "loss": 0.0596, "step": 352700 }, { "epoch": 3.47, "grad_norm": 5.086166858673096, "learning_rate": 3.2991748339241563e-06, "loss": 0.1921, "step": 352725 }, { "epoch": 3.47, "grad_norm": 8.39663028717041, "learning_rate": 3.2990507114699084e-06, "loss": 0.0663, "step": 352750 }, { "epoch": 3.47, "grad_norm": 8.548224449157715, "learning_rate": 3.2989265890156596e-06, "loss": 0.1565, "step": 352775 }, { "epoch": 3.47, "grad_norm": 15.898269653320312, "learning_rate": 3.298802466561411e-06, "loss": 0.0869, "step": 352800 }, { "epoch": 3.47, "grad_norm": 7.2844414710998535, "learning_rate": 3.2986783441071624e-06, "loss": 0.1737, "step": 352825 }, { "epoch": 3.47, "grad_norm": 4.228723526000977, "learning_rate": 3.298554221652914e-06, "loss": 0.0605, "step": 352850 }, { "epoch": 3.47, "grad_norm": 4.454251766204834, "learning_rate": 3.2984300991986657e-06, "loss": 0.1568, "step": 352875 }, { "epoch": 3.47, "grad_norm": 8.971534729003906, "learning_rate": 3.298305976744417e-06, "loss": 0.0544, "step": 352900 }, { "epoch": 3.47, "grad_norm": 5.019927024841309, "learning_rate": 3.298181854290169e-06, "loss": 0.162, "step": 352925 }, { "epoch": 3.47, "grad_norm": 4.593644618988037, "learning_rate": 3.29805773183592e-06, "loss": 0.0617, "step": 352950 }, { "epoch": 3.47, "grad_norm": 6.000188827514648, "learning_rate": 3.2979336093816714e-06, "loss": 0.1645, "step": 352975 }, { "epoch": 3.47, "grad_norm": 4.264408588409424, "learning_rate": 3.2978094869274235e-06, "loss": 0.0699, "step": 353000 }, { "epoch": 3.47, "grad_norm": 3.728532314300537, "learning_rate": 3.2976853644731747e-06, "loss": 0.1307, "step": 353025 }, { "epoch": 3.47, "grad_norm": 11.58987045288086, "learning_rate": 3.2975612420189263e-06, "loss": 0.0921, "step": 353050 }, { "epoch": 3.47, "grad_norm": 3.6681859493255615, "learning_rate": 3.297437119564678e-06, "loss": 0.1621, "step": 353075 }, { "epoch": 3.47, "grad_norm": 8.250770568847656, "learning_rate": 3.2973129971104296e-06, "loss": 0.0627, "step": 353100 }, { "epoch": 3.47, "grad_norm": 4.513421535491943, "learning_rate": 3.297188874656181e-06, "loss": 0.1959, "step": 353125 }, { "epoch": 3.47, "grad_norm": 6.6486639976501465, "learning_rate": 3.297064752201933e-06, "loss": 0.0806, "step": 353150 }, { "epoch": 3.47, "grad_norm": 7.006109714508057, "learning_rate": 3.296940629747684e-06, "loss": 0.1361, "step": 353175 }, { "epoch": 3.47, "grad_norm": 9.73591136932373, "learning_rate": 3.2968165072934357e-06, "loss": 0.0811, "step": 353200 }, { "epoch": 3.47, "grad_norm": 5.828732967376709, "learning_rate": 3.2966923848391873e-06, "loss": 0.2288, "step": 353225 }, { "epoch": 3.47, "grad_norm": 6.2671990394592285, "learning_rate": 3.296568262384939e-06, "loss": 0.0582, "step": 353250 }, { "epoch": 3.47, "grad_norm": 5.040102005004883, "learning_rate": 3.29644413993069e-06, "loss": 0.1986, "step": 353275 }, { "epoch": 3.47, "grad_norm": 11.090483665466309, "learning_rate": 3.2963200174764422e-06, "loss": 0.0987, "step": 353300 }, { "epoch": 3.47, "grad_norm": 4.090771198272705, "learning_rate": 3.2961958950221935e-06, "loss": 0.2186, "step": 353325 }, { "epoch": 3.47, "grad_norm": 9.99233341217041, "learning_rate": 3.2960717725679447e-06, "loss": 0.0724, "step": 353350 }, { "epoch": 3.47, "grad_norm": 4.577861785888672, "learning_rate": 3.2959476501136967e-06, "loss": 0.1607, "step": 353375 }, { "epoch": 3.47, "grad_norm": 5.907905578613281, "learning_rate": 3.295823527659448e-06, "loss": 0.0834, "step": 353400 }, { "epoch": 3.47, "grad_norm": 4.844710350036621, "learning_rate": 3.2956994052051996e-06, "loss": 0.1633, "step": 353425 }, { "epoch": 3.48, "grad_norm": 8.386363983154297, "learning_rate": 3.295575282750951e-06, "loss": 0.0871, "step": 353450 }, { "epoch": 3.48, "grad_norm": 4.317234039306641, "learning_rate": 3.295451160296703e-06, "loss": 0.1605, "step": 353475 }, { "epoch": 3.48, "grad_norm": 10.889877319335938, "learning_rate": 3.295327037842454e-06, "loss": 0.0641, "step": 353500 }, { "epoch": 3.48, "grad_norm": 3.683164358139038, "learning_rate": 3.295202915388206e-06, "loss": 0.1843, "step": 353525 }, { "epoch": 3.48, "grad_norm": 15.645800590515137, "learning_rate": 3.2950787929339573e-06, "loss": 0.073, "step": 353550 }, { "epoch": 3.48, "grad_norm": 5.544762134552002, "learning_rate": 3.2949546704797085e-06, "loss": 0.1784, "step": 353575 }, { "epoch": 3.48, "grad_norm": 9.131208419799805, "learning_rate": 3.2948305480254606e-06, "loss": 0.0601, "step": 353600 }, { "epoch": 3.48, "grad_norm": 6.133011341094971, "learning_rate": 3.294706425571212e-06, "loss": 0.1649, "step": 353625 }, { "epoch": 3.48, "grad_norm": 6.5992021560668945, "learning_rate": 3.2945823031169634e-06, "loss": 0.0676, "step": 353650 }, { "epoch": 3.48, "grad_norm": 5.024015426635742, "learning_rate": 3.2944581806627147e-06, "loss": 0.1834, "step": 353675 }, { "epoch": 3.48, "grad_norm": 9.870713233947754, "learning_rate": 3.2943340582084667e-06, "loss": 0.0765, "step": 353700 }, { "epoch": 3.48, "grad_norm": 4.921339988708496, "learning_rate": 3.294209935754218e-06, "loss": 0.1929, "step": 353725 }, { "epoch": 3.48, "grad_norm": 16.42936897277832, "learning_rate": 3.294085813299969e-06, "loss": 0.1009, "step": 353750 }, { "epoch": 3.48, "grad_norm": 3.842285633087158, "learning_rate": 3.293961690845721e-06, "loss": 0.2147, "step": 353775 }, { "epoch": 3.48, "grad_norm": 6.987142562866211, "learning_rate": 3.2938375683914724e-06, "loss": 0.0645, "step": 353800 }, { "epoch": 3.48, "grad_norm": 6.760077953338623, "learning_rate": 3.293713445937224e-06, "loss": 0.1715, "step": 353825 }, { "epoch": 3.48, "grad_norm": 1.4786865711212158, "learning_rate": 3.2935893234829757e-06, "loss": 0.0496, "step": 353850 }, { "epoch": 3.48, "grad_norm": 7.732788562774658, "learning_rate": 3.2934652010287273e-06, "loss": 0.2048, "step": 353875 }, { "epoch": 3.48, "grad_norm": 7.871372222900391, "learning_rate": 3.2933410785744785e-06, "loss": 0.0807, "step": 353900 }, { "epoch": 3.48, "grad_norm": 4.696154594421387, "learning_rate": 3.2932169561202306e-06, "loss": 0.2019, "step": 353925 }, { "epoch": 3.48, "grad_norm": 12.299579620361328, "learning_rate": 3.2930928336659818e-06, "loss": 0.0604, "step": 353950 }, { "epoch": 3.48, "grad_norm": 3.7068512439727783, "learning_rate": 3.292968711211733e-06, "loss": 0.1585, "step": 353975 }, { "epoch": 3.48, "grad_norm": 8.548576354980469, "learning_rate": 3.292844588757485e-06, "loss": 0.087, "step": 354000 }, { "epoch": 3.48, "grad_norm": 5.373757839202881, "learning_rate": 3.2927204663032363e-06, "loss": 0.1992, "step": 354025 }, { "epoch": 3.48, "grad_norm": 9.886786460876465, "learning_rate": 3.292596343848988e-06, "loss": 0.0525, "step": 354050 }, { "epoch": 3.48, "grad_norm": 4.891067028045654, "learning_rate": 3.2924722213947395e-06, "loss": 0.2077, "step": 354075 }, { "epoch": 3.48, "grad_norm": 7.917667388916016, "learning_rate": 3.292348098940491e-06, "loss": 0.054, "step": 354100 }, { "epoch": 3.48, "grad_norm": 5.946413516998291, "learning_rate": 3.2922239764862424e-06, "loss": 0.166, "step": 354125 }, { "epoch": 3.48, "grad_norm": 12.079440116882324, "learning_rate": 3.2920998540319944e-06, "loss": 0.0815, "step": 354150 }, { "epoch": 3.48, "grad_norm": 4.287467956542969, "learning_rate": 3.2919757315777457e-06, "loss": 0.1708, "step": 354175 }, { "epoch": 3.48, "grad_norm": 5.481043338775635, "learning_rate": 3.291851609123497e-06, "loss": 0.0762, "step": 354200 }, { "epoch": 3.48, "grad_norm": 4.059860706329346, "learning_rate": 3.291727486669249e-06, "loss": 0.1808, "step": 354225 }, { "epoch": 3.48, "grad_norm": 6.511974334716797, "learning_rate": 3.291603364215e-06, "loss": 0.0796, "step": 354250 }, { "epoch": 3.48, "grad_norm": 5.722099781036377, "learning_rate": 3.2914792417607518e-06, "loss": 0.1441, "step": 354275 }, { "epoch": 3.48, "grad_norm": 9.40972900390625, "learning_rate": 3.2913551193065034e-06, "loss": 0.0762, "step": 354300 }, { "epoch": 3.48, "grad_norm": 5.69827938079834, "learning_rate": 3.291230996852255e-06, "loss": 0.1787, "step": 354325 }, { "epoch": 3.48, "grad_norm": 5.241607666015625, "learning_rate": 3.2911068743980063e-06, "loss": 0.0698, "step": 354350 }, { "epoch": 3.48, "grad_norm": 5.652988910675049, "learning_rate": 3.2909827519437583e-06, "loss": 0.213, "step": 354375 }, { "epoch": 3.48, "grad_norm": 10.093180656433105, "learning_rate": 3.2908586294895095e-06, "loss": 0.055, "step": 354400 }, { "epoch": 3.48, "grad_norm": 7.003540515899658, "learning_rate": 3.2907345070352607e-06, "loss": 0.2304, "step": 354425 }, { "epoch": 3.49, "grad_norm": 7.7115702629089355, "learning_rate": 3.290610384581013e-06, "loss": 0.0716, "step": 354450 }, { "epoch": 3.49, "grad_norm": 5.3457417488098145, "learning_rate": 3.290486262126764e-06, "loss": 0.1862, "step": 354475 }, { "epoch": 3.49, "grad_norm": 8.772969245910645, "learning_rate": 3.2903621396725156e-06, "loss": 0.0517, "step": 354500 }, { "epoch": 3.49, "grad_norm": 4.905041217803955, "learning_rate": 3.290238017218267e-06, "loss": 0.1881, "step": 354525 }, { "epoch": 3.49, "grad_norm": 4.570329666137695, "learning_rate": 3.290113894764019e-06, "loss": 0.0834, "step": 354550 }, { "epoch": 3.49, "grad_norm": 4.723003387451172, "learning_rate": 3.28999473720794e-06, "loss": 0.2066, "step": 354575 }, { "epoch": 3.49, "grad_norm": 4.500800609588623, "learning_rate": 3.289870614753692e-06, "loss": 0.0691, "step": 354600 }, { "epoch": 3.49, "grad_norm": 5.809078216552734, "learning_rate": 3.289746492299443e-06, "loss": 0.1912, "step": 354625 }, { "epoch": 3.49, "grad_norm": 7.869068145751953, "learning_rate": 3.2896223698451944e-06, "loss": 0.0892, "step": 354650 }, { "epoch": 3.49, "grad_norm": 5.589217185974121, "learning_rate": 3.2894982473909465e-06, "loss": 0.1698, "step": 354675 }, { "epoch": 3.49, "grad_norm": 35.143394470214844, "learning_rate": 3.2893741249366977e-06, "loss": 0.0774, "step": 354700 }, { "epoch": 3.49, "grad_norm": 8.487812995910645, "learning_rate": 3.2892500024824493e-06, "loss": 0.2018, "step": 354725 }, { "epoch": 3.49, "grad_norm": 6.99962854385376, "learning_rate": 3.289125880028201e-06, "loss": 0.093, "step": 354750 }, { "epoch": 3.49, "grad_norm": 5.193014621734619, "learning_rate": 3.2890017575739526e-06, "loss": 0.221, "step": 354775 }, { "epoch": 3.49, "grad_norm": 10.708909034729004, "learning_rate": 3.288877635119704e-06, "loss": 0.0785, "step": 354800 }, { "epoch": 3.49, "grad_norm": 4.211935520172119, "learning_rate": 3.288753512665456e-06, "loss": 0.1855, "step": 354825 }, { "epoch": 3.49, "grad_norm": 7.3122878074646, "learning_rate": 3.288629390211207e-06, "loss": 0.0717, "step": 354850 }, { "epoch": 3.49, "grad_norm": 4.009523391723633, "learning_rate": 3.2885052677569583e-06, "loss": 0.2025, "step": 354875 }, { "epoch": 3.49, "grad_norm": 5.409256935119629, "learning_rate": 3.2883811453027103e-06, "loss": 0.06, "step": 354900 }, { "epoch": 3.49, "grad_norm": 12.48989486694336, "learning_rate": 3.2882570228484616e-06, "loss": 0.1916, "step": 354925 }, { "epoch": 3.49, "grad_norm": 0.5809037089347839, "learning_rate": 3.288132900394213e-06, "loss": 0.0602, "step": 354950 }, { "epoch": 3.49, "grad_norm": 4.542558670043945, "learning_rate": 3.288008777939965e-06, "loss": 0.1711, "step": 354975 }, { "epoch": 3.49, "grad_norm": 10.45190715789795, "learning_rate": 3.2878846554857165e-06, "loss": 0.0762, "step": 355000 }, { "epoch": 3.49, "grad_norm": 5.1880879402160645, "learning_rate": 3.2877605330314677e-06, "loss": 0.1872, "step": 355025 }, { "epoch": 3.49, "grad_norm": 11.439258575439453, "learning_rate": 3.2876364105772197e-06, "loss": 0.0789, "step": 355050 }, { "epoch": 3.49, "grad_norm": 4.282793045043945, "learning_rate": 3.287512288122971e-06, "loss": 0.1681, "step": 355075 }, { "epoch": 3.49, "grad_norm": 10.75300407409668, "learning_rate": 3.287388165668722e-06, "loss": 0.0897, "step": 355100 }, { "epoch": 3.49, "grad_norm": 4.82418155670166, "learning_rate": 3.287264043214474e-06, "loss": 0.2037, "step": 355125 }, { "epoch": 3.49, "grad_norm": 14.344461441040039, "learning_rate": 3.2871399207602254e-06, "loss": 0.0766, "step": 355150 }, { "epoch": 3.49, "grad_norm": 6.045772075653076, "learning_rate": 3.287015798305977e-06, "loss": 0.1774, "step": 355175 }, { "epoch": 3.49, "grad_norm": 8.951903343200684, "learning_rate": 3.2868916758517283e-06, "loss": 0.0727, "step": 355200 }, { "epoch": 3.49, "grad_norm": 21.795700073242188, "learning_rate": 3.2867675533974803e-06, "loss": 0.1906, "step": 355225 }, { "epoch": 3.49, "grad_norm": 9.775988578796387, "learning_rate": 3.2866434309432315e-06, "loss": 0.095, "step": 355250 }, { "epoch": 3.49, "grad_norm": 6.287474155426025, "learning_rate": 3.2865193084889828e-06, "loss": 0.155, "step": 355275 }, { "epoch": 3.49, "grad_norm": 1.1485799551010132, "learning_rate": 3.286395186034735e-06, "loss": 0.0774, "step": 355300 }, { "epoch": 3.49, "grad_norm": 5.319151878356934, "learning_rate": 3.286271063580486e-06, "loss": 0.1594, "step": 355325 }, { "epoch": 3.49, "grad_norm": 6.203914165496826, "learning_rate": 3.2861469411262377e-06, "loss": 0.0504, "step": 355350 }, { "epoch": 3.49, "grad_norm": 7.598555564880371, "learning_rate": 3.2860228186719893e-06, "loss": 0.1921, "step": 355375 }, { "epoch": 3.49, "grad_norm": 8.379128456115723, "learning_rate": 3.285898696217741e-06, "loss": 0.0688, "step": 355400 }, { "epoch": 3.49, "grad_norm": 5.138711929321289, "learning_rate": 3.285774573763492e-06, "loss": 0.1569, "step": 355425 }, { "epoch": 3.49, "grad_norm": 3.3123130798339844, "learning_rate": 3.285650451309244e-06, "loss": 0.06, "step": 355450 }, { "epoch": 3.5, "grad_norm": 3.7052297592163086, "learning_rate": 3.2855263288549954e-06, "loss": 0.1947, "step": 355475 }, { "epoch": 3.5, "grad_norm": 9.19910717010498, "learning_rate": 3.2854022064007466e-06, "loss": 0.0698, "step": 355500 }, { "epoch": 3.5, "grad_norm": 4.584941864013672, "learning_rate": 3.2852780839464987e-06, "loss": 0.1763, "step": 355525 }, { "epoch": 3.5, "grad_norm": 8.876066207885742, "learning_rate": 3.28515396149225e-06, "loss": 0.0842, "step": 355550 }, { "epoch": 3.5, "grad_norm": 7.717942237854004, "learning_rate": 3.2850298390380015e-06, "loss": 0.1876, "step": 355575 }, { "epoch": 3.5, "grad_norm": 5.1467390060424805, "learning_rate": 3.284905716583753e-06, "loss": 0.0632, "step": 355600 }, { "epoch": 3.5, "grad_norm": 4.735379219055176, "learning_rate": 3.284781594129505e-06, "loss": 0.1774, "step": 355625 }, { "epoch": 3.5, "grad_norm": 7.540468692779541, "learning_rate": 3.284657471675256e-06, "loss": 0.055, "step": 355650 }, { "epoch": 3.5, "grad_norm": 6.551947116851807, "learning_rate": 3.284533349221008e-06, "loss": 0.2086, "step": 355675 }, { "epoch": 3.5, "grad_norm": 4.660171985626221, "learning_rate": 3.2844092267667593e-06, "loss": 0.0586, "step": 355700 }, { "epoch": 3.5, "grad_norm": 5.68841552734375, "learning_rate": 3.2842851043125105e-06, "loss": 0.1664, "step": 355725 }, { "epoch": 3.5, "grad_norm": 6.686117649078369, "learning_rate": 3.2841609818582625e-06, "loss": 0.0706, "step": 355750 }, { "epoch": 3.5, "grad_norm": 5.423543453216553, "learning_rate": 3.2840368594040138e-06, "loss": 0.1655, "step": 355775 }, { "epoch": 3.5, "grad_norm": 13.842301368713379, "learning_rate": 3.2839127369497654e-06, "loss": 0.0788, "step": 355800 }, { "epoch": 3.5, "grad_norm": 5.380305290222168, "learning_rate": 3.283788614495517e-06, "loss": 0.1695, "step": 355825 }, { "epoch": 3.5, "grad_norm": 14.146384239196777, "learning_rate": 3.2836644920412687e-06, "loss": 0.0751, "step": 355850 }, { "epoch": 3.5, "grad_norm": 5.488011360168457, "learning_rate": 3.28354036958702e-06, "loss": 0.1324, "step": 355875 }, { "epoch": 3.5, "grad_norm": 7.256119728088379, "learning_rate": 3.283416247132772e-06, "loss": 0.0491, "step": 355900 }, { "epoch": 3.5, "grad_norm": 4.914528846740723, "learning_rate": 3.283292124678523e-06, "loss": 0.2113, "step": 355925 }, { "epoch": 3.5, "grad_norm": 7.179560661315918, "learning_rate": 3.2831680022242744e-06, "loss": 0.0758, "step": 355950 }, { "epoch": 3.5, "grad_norm": 5.1460161209106445, "learning_rate": 3.283043879770026e-06, "loss": 0.157, "step": 355975 }, { "epoch": 3.5, "grad_norm": 9.065985679626465, "learning_rate": 3.2829197573157776e-06, "loss": 0.0612, "step": 356000 }, { "epoch": 3.5, "grad_norm": 5.824950218200684, "learning_rate": 3.2827956348615293e-06, "loss": 0.1761, "step": 356025 }, { "epoch": 3.5, "grad_norm": 0.23691323399543762, "learning_rate": 3.2826715124072805e-06, "loss": 0.0703, "step": 356050 }, { "epoch": 3.5, "grad_norm": 3.6021289825439453, "learning_rate": 3.2825473899530325e-06, "loss": 0.1708, "step": 356075 }, { "epoch": 3.5, "grad_norm": 5.741420269012451, "learning_rate": 3.2824232674987837e-06, "loss": 0.0656, "step": 356100 }, { "epoch": 3.5, "grad_norm": 4.847342491149902, "learning_rate": 3.2822991450445354e-06, "loss": 0.2106, "step": 356125 }, { "epoch": 3.5, "grad_norm": 13.617868423461914, "learning_rate": 3.282175022590287e-06, "loss": 0.0704, "step": 356150 }, { "epoch": 3.5, "grad_norm": 7.29591178894043, "learning_rate": 3.2820509001360386e-06, "loss": 0.2173, "step": 356175 }, { "epoch": 3.5, "grad_norm": 8.11989688873291, "learning_rate": 3.28192677768179e-06, "loss": 0.0529, "step": 356200 }, { "epoch": 3.5, "grad_norm": 4.291134357452393, "learning_rate": 3.281802655227542e-06, "loss": 0.1264, "step": 356225 }, { "epoch": 3.5, "grad_norm": 4.609512805938721, "learning_rate": 3.281678532773293e-06, "loss": 0.0802, "step": 356250 }, { "epoch": 3.5, "grad_norm": 3.4486265182495117, "learning_rate": 3.2815544103190443e-06, "loss": 0.154, "step": 356275 }, { "epoch": 3.5, "grad_norm": 10.71624755859375, "learning_rate": 3.2814302878647964e-06, "loss": 0.0837, "step": 356300 }, { "epoch": 3.5, "grad_norm": 3.595672369003296, "learning_rate": 3.2813061654105476e-06, "loss": 0.1794, "step": 356325 }, { "epoch": 3.5, "grad_norm": 8.389840126037598, "learning_rate": 3.2811820429562992e-06, "loss": 0.082, "step": 356350 }, { "epoch": 3.5, "grad_norm": 5.723150253295898, "learning_rate": 3.281057920502051e-06, "loss": 0.1995, "step": 356375 }, { "epoch": 3.5, "grad_norm": 8.068381309509277, "learning_rate": 3.2809337980478025e-06, "loss": 0.0647, "step": 356400 }, { "epoch": 3.5, "grad_norm": 5.715661525726318, "learning_rate": 3.2808096755935537e-06, "loss": 0.154, "step": 356425 }, { "epoch": 3.5, "grad_norm": 1.7204679250717163, "learning_rate": 3.2806855531393058e-06, "loss": 0.0463, "step": 356450 }, { "epoch": 3.5, "grad_norm": 4.244558334350586, "learning_rate": 3.280561430685057e-06, "loss": 0.2031, "step": 356475 }, { "epoch": 3.51, "grad_norm": 7.310125827789307, "learning_rate": 3.280437308230808e-06, "loss": 0.0749, "step": 356500 }, { "epoch": 3.51, "grad_norm": 6.310694217681885, "learning_rate": 3.2803131857765603e-06, "loss": 0.2067, "step": 356525 }, { "epoch": 3.51, "grad_norm": 14.423507690429688, "learning_rate": 3.2801890633223115e-06, "loss": 0.0702, "step": 356550 }, { "epoch": 3.51, "grad_norm": 5.542691230773926, "learning_rate": 3.280064940868063e-06, "loss": 0.1422, "step": 356575 }, { "epoch": 3.51, "grad_norm": 7.724393844604492, "learning_rate": 3.2799408184138147e-06, "loss": 0.0775, "step": 356600 }, { "epoch": 3.51, "grad_norm": 3.057011842727661, "learning_rate": 3.2798166959595664e-06, "loss": 0.1888, "step": 356625 }, { "epoch": 3.51, "grad_norm": 8.176065444946289, "learning_rate": 3.2796925735053176e-06, "loss": 0.0588, "step": 356650 }, { "epoch": 3.51, "grad_norm": 4.013972282409668, "learning_rate": 3.2795684510510697e-06, "loss": 0.2143, "step": 356675 }, { "epoch": 3.51, "grad_norm": 6.536259174346924, "learning_rate": 3.279444328596821e-06, "loss": 0.0865, "step": 356700 }, { "epoch": 3.51, "grad_norm": 4.4555134773254395, "learning_rate": 3.279320206142572e-06, "loss": 0.1949, "step": 356725 }, { "epoch": 3.51, "grad_norm": 9.505603790283203, "learning_rate": 3.279196083688324e-06, "loss": 0.065, "step": 356750 }, { "epoch": 3.51, "grad_norm": 3.0519044399261475, "learning_rate": 3.2790719612340753e-06, "loss": 0.1426, "step": 356775 }, { "epoch": 3.51, "grad_norm": 8.474445343017578, "learning_rate": 3.278947838779827e-06, "loss": 0.0837, "step": 356800 }, { "epoch": 3.51, "grad_norm": 5.149880886077881, "learning_rate": 3.278823716325578e-06, "loss": 0.2007, "step": 356825 }, { "epoch": 3.51, "grad_norm": 4.056891918182373, "learning_rate": 3.2786995938713302e-06, "loss": 0.0526, "step": 356850 }, { "epoch": 3.51, "grad_norm": 5.3945417404174805, "learning_rate": 3.2785754714170815e-06, "loss": 0.1806, "step": 356875 }, { "epoch": 3.51, "grad_norm": 3.7521581649780273, "learning_rate": 3.2784513489628327e-06, "loss": 0.091, "step": 356900 }, { "epoch": 3.51, "grad_norm": 5.580638408660889, "learning_rate": 3.2783272265085847e-06, "loss": 0.2181, "step": 356925 }, { "epoch": 3.51, "grad_norm": 8.952061653137207, "learning_rate": 3.278203104054336e-06, "loss": 0.05, "step": 356950 }, { "epoch": 3.51, "grad_norm": 4.903482437133789, "learning_rate": 3.2780789816000876e-06, "loss": 0.1817, "step": 356975 }, { "epoch": 3.51, "grad_norm": 11.993861198425293, "learning_rate": 3.2779548591458392e-06, "loss": 0.0727, "step": 357000 }, { "epoch": 3.51, "grad_norm": 3.9863510131835938, "learning_rate": 3.277830736691591e-06, "loss": 0.2188, "step": 357025 }, { "epoch": 3.51, "grad_norm": 14.030669212341309, "learning_rate": 3.277706614237342e-06, "loss": 0.0624, "step": 357050 }, { "epoch": 3.51, "grad_norm": 5.260158538818359, "learning_rate": 3.277582491783094e-06, "loss": 0.1458, "step": 357075 }, { "epoch": 3.51, "grad_norm": 9.750492095947266, "learning_rate": 3.2774583693288453e-06, "loss": 0.0679, "step": 357100 }, { "epoch": 3.51, "grad_norm": 4.3863091468811035, "learning_rate": 3.2773342468745965e-06, "loss": 0.1534, "step": 357125 }, { "epoch": 3.51, "grad_norm": 12.6859712600708, "learning_rate": 3.2772101244203486e-06, "loss": 0.0703, "step": 357150 }, { "epoch": 3.51, "grad_norm": 4.749865531921387, "learning_rate": 3.2770860019661e-06, "loss": 0.1442, "step": 357175 }, { "epoch": 3.51, "grad_norm": 5.553788185119629, "learning_rate": 3.2769618795118514e-06, "loss": 0.063, "step": 357200 }, { "epoch": 3.51, "grad_norm": 4.738266944885254, "learning_rate": 3.276837757057603e-06, "loss": 0.2021, "step": 357225 }, { "epoch": 3.51, "grad_norm": 13.130616188049316, "learning_rate": 3.2767136346033547e-06, "loss": 0.0852, "step": 357250 }, { "epoch": 3.51, "grad_norm": 5.458056449890137, "learning_rate": 3.276589512149106e-06, "loss": 0.1726, "step": 357275 }, { "epoch": 3.51, "grad_norm": 9.517601013183594, "learning_rate": 3.276465389694858e-06, "loss": 0.0739, "step": 357300 }, { "epoch": 3.51, "grad_norm": 4.362614154815674, "learning_rate": 3.276341267240609e-06, "loss": 0.1545, "step": 357325 }, { "epoch": 3.51, "grad_norm": 8.379871368408203, "learning_rate": 3.2762171447863604e-06, "loss": 0.0772, "step": 357350 }, { "epoch": 3.51, "grad_norm": 3.6254818439483643, "learning_rate": 3.2760930223321125e-06, "loss": 0.1779, "step": 357375 }, { "epoch": 3.51, "grad_norm": 7.295976161956787, "learning_rate": 3.2759688998778637e-06, "loss": 0.0795, "step": 357400 }, { "epoch": 3.51, "grad_norm": 5.415552139282227, "learning_rate": 3.2758447774236153e-06, "loss": 0.1925, "step": 357425 }, { "epoch": 3.51, "grad_norm": 3.412092924118042, "learning_rate": 3.275720654969367e-06, "loss": 0.0696, "step": 357450 }, { "epoch": 3.51, "grad_norm": 3.5295376777648926, "learning_rate": 3.2755965325151186e-06, "loss": 0.1939, "step": 357475 }, { "epoch": 3.51, "grad_norm": 7.120469570159912, "learning_rate": 3.27547241006087e-06, "loss": 0.0718, "step": 357500 }, { "epoch": 3.52, "grad_norm": 5.2775115966796875, "learning_rate": 3.275348287606622e-06, "loss": 0.1956, "step": 357525 }, { "epoch": 3.52, "grad_norm": 1.1296155452728271, "learning_rate": 3.275224165152373e-06, "loss": 0.0682, "step": 357550 }, { "epoch": 3.52, "grad_norm": 6.034343242645264, "learning_rate": 3.2751000426981243e-06, "loss": 0.1774, "step": 357575 }, { "epoch": 3.52, "grad_norm": 11.044017791748047, "learning_rate": 3.2749759202438763e-06, "loss": 0.0733, "step": 357600 }, { "epoch": 3.52, "grad_norm": 5.086838722229004, "learning_rate": 3.2748517977896275e-06, "loss": 0.1508, "step": 357625 }, { "epoch": 3.52, "grad_norm": 17.544137954711914, "learning_rate": 3.274727675335379e-06, "loss": 0.0832, "step": 357650 }, { "epoch": 3.52, "grad_norm": 3.20896315574646, "learning_rate": 3.2746035528811304e-06, "loss": 0.216, "step": 357675 }, { "epoch": 3.52, "grad_norm": 9.244829177856445, "learning_rate": 3.2744794304268825e-06, "loss": 0.0733, "step": 357700 }, { "epoch": 3.52, "grad_norm": 4.917398452758789, "learning_rate": 3.2743553079726337e-06, "loss": 0.2346, "step": 357725 }, { "epoch": 3.52, "grad_norm": 9.029029846191406, "learning_rate": 3.274231185518385e-06, "loss": 0.0899, "step": 357750 }, { "epoch": 3.52, "grad_norm": 5.246224403381348, "learning_rate": 3.2741120279623067e-06, "loss": 0.1901, "step": 357775 }, { "epoch": 3.52, "grad_norm": 6.835086822509766, "learning_rate": 3.273987905508058e-06, "loss": 0.0719, "step": 357800 }, { "epoch": 3.52, "grad_norm": 4.1033196449279785, "learning_rate": 3.27386378305381e-06, "loss": 0.2085, "step": 357825 }, { "epoch": 3.52, "grad_norm": 8.461663246154785, "learning_rate": 3.2737396605995612e-06, "loss": 0.0566, "step": 357850 }, { "epoch": 3.52, "grad_norm": 7.690501689910889, "learning_rate": 3.273615538145313e-06, "loss": 0.2055, "step": 357875 }, { "epoch": 3.52, "grad_norm": 16.01658821105957, "learning_rate": 3.2734914156910645e-06, "loss": 0.0785, "step": 357900 }, { "epoch": 3.52, "grad_norm": 4.960021495819092, "learning_rate": 3.273367293236816e-06, "loss": 0.1909, "step": 357925 }, { "epoch": 3.52, "grad_norm": 14.069299697875977, "learning_rate": 3.2732431707825673e-06, "loss": 0.0593, "step": 357950 }, { "epoch": 3.52, "grad_norm": 5.054745674133301, "learning_rate": 3.2731190483283194e-06, "loss": 0.166, "step": 357975 }, { "epoch": 3.52, "grad_norm": 8.747382164001465, "learning_rate": 3.2729949258740706e-06, "loss": 0.0802, "step": 358000 }, { "epoch": 3.52, "grad_norm": 4.422237873077393, "learning_rate": 3.272870803419822e-06, "loss": 0.1998, "step": 358025 }, { "epoch": 3.52, "grad_norm": 4.02009391784668, "learning_rate": 3.272746680965574e-06, "loss": 0.0692, "step": 358050 }, { "epoch": 3.52, "grad_norm": 4.382116794586182, "learning_rate": 3.272622558511325e-06, "loss": 0.1681, "step": 358075 }, { "epoch": 3.52, "grad_norm": 2.72894024848938, "learning_rate": 3.2724984360570767e-06, "loss": 0.0725, "step": 358100 }, { "epoch": 3.52, "grad_norm": 4.546028137207031, "learning_rate": 3.2723743136028284e-06, "loss": 0.1997, "step": 358125 }, { "epoch": 3.52, "grad_norm": 6.41763162612915, "learning_rate": 3.27225019114858e-06, "loss": 0.0555, "step": 358150 }, { "epoch": 3.52, "grad_norm": 8.062946319580078, "learning_rate": 3.2721260686943312e-06, "loss": 0.1719, "step": 358175 }, { "epoch": 3.52, "grad_norm": 8.124755859375, "learning_rate": 3.2720019462400833e-06, "loss": 0.0639, "step": 358200 }, { "epoch": 3.52, "grad_norm": 4.129305362701416, "learning_rate": 3.2718778237858345e-06, "loss": 0.1647, "step": 358225 }, { "epoch": 3.52, "grad_norm": 7.8168721199035645, "learning_rate": 3.2717537013315857e-06, "loss": 0.0626, "step": 358250 }, { "epoch": 3.52, "grad_norm": 5.7979888916015625, "learning_rate": 3.2716295788773373e-06, "loss": 0.1785, "step": 358275 }, { "epoch": 3.52, "grad_norm": 5.34731388092041, "learning_rate": 3.271505456423089e-06, "loss": 0.0631, "step": 358300 }, { "epoch": 3.52, "grad_norm": 6.173125267028809, "learning_rate": 3.2713813339688406e-06, "loss": 0.2087, "step": 358325 }, { "epoch": 3.52, "grad_norm": 3.6030197143554688, "learning_rate": 3.271257211514592e-06, "loss": 0.0579, "step": 358350 }, { "epoch": 3.52, "grad_norm": 6.521298885345459, "learning_rate": 3.271133089060344e-06, "loss": 0.1892, "step": 358375 }, { "epoch": 3.52, "grad_norm": 5.374297142028809, "learning_rate": 3.271008966606095e-06, "loss": 0.0502, "step": 358400 }, { "epoch": 3.52, "grad_norm": 5.278357982635498, "learning_rate": 3.2708848441518463e-06, "loss": 0.1799, "step": 358425 }, { "epoch": 3.52, "grad_norm": 7.084472179412842, "learning_rate": 3.2707607216975984e-06, "loss": 0.0772, "step": 358450 }, { "epoch": 3.52, "grad_norm": 5.159067630767822, "learning_rate": 3.2706365992433496e-06, "loss": 0.2055, "step": 358475 }, { "epoch": 3.52, "grad_norm": 8.79230785369873, "learning_rate": 3.270512476789101e-06, "loss": 0.0931, "step": 358500 }, { "epoch": 3.53, "grad_norm": 4.804263114929199, "learning_rate": 3.270388354334853e-06, "loss": 0.1677, "step": 358525 }, { "epoch": 3.53, "grad_norm": 7.170539379119873, "learning_rate": 3.2702642318806045e-06, "loss": 0.0822, "step": 358550 }, { "epoch": 3.53, "grad_norm": 4.048908233642578, "learning_rate": 3.2701401094263557e-06, "loss": 0.1637, "step": 358575 }, { "epoch": 3.53, "grad_norm": 4.986377716064453, "learning_rate": 3.2700159869721077e-06, "loss": 0.0712, "step": 358600 }, { "epoch": 3.53, "grad_norm": 3.8035173416137695, "learning_rate": 3.269891864517859e-06, "loss": 0.1465, "step": 358625 }, { "epoch": 3.53, "grad_norm": 8.710857391357422, "learning_rate": 3.26976774206361e-06, "loss": 0.0761, "step": 358650 }, { "epoch": 3.53, "grad_norm": 5.245602130889893, "learning_rate": 3.2696436196093622e-06, "loss": 0.2069, "step": 358675 }, { "epoch": 3.53, "grad_norm": 7.198573112487793, "learning_rate": 3.2695194971551134e-06, "loss": 0.0651, "step": 358700 }, { "epoch": 3.53, "grad_norm": 4.978544235229492, "learning_rate": 3.269395374700865e-06, "loss": 0.1745, "step": 358725 }, { "epoch": 3.53, "grad_norm": 9.89635944366455, "learning_rate": 3.2692712522466167e-06, "loss": 0.0565, "step": 358750 }, { "epoch": 3.53, "grad_norm": 5.471372604370117, "learning_rate": 3.2691471297923683e-06, "loss": 0.2128, "step": 358775 }, { "epoch": 3.53, "grad_norm": 9.310088157653809, "learning_rate": 3.2690230073381195e-06, "loss": 0.0721, "step": 358800 }, { "epoch": 3.53, "grad_norm": 6.80913782119751, "learning_rate": 3.2688988848838716e-06, "loss": 0.1946, "step": 358825 }, { "epoch": 3.53, "grad_norm": 9.919723510742188, "learning_rate": 3.268774762429623e-06, "loss": 0.0633, "step": 358850 }, { "epoch": 3.53, "grad_norm": 5.523017883300781, "learning_rate": 3.2686506399753745e-06, "loss": 0.1855, "step": 358875 }, { "epoch": 3.53, "grad_norm": 3.580869436264038, "learning_rate": 3.268526517521126e-06, "loss": 0.0825, "step": 358900 }, { "epoch": 3.53, "grad_norm": 5.260661602020264, "learning_rate": 3.2684023950668773e-06, "loss": 0.1802, "step": 358925 }, { "epoch": 3.53, "grad_norm": 6.810722351074219, "learning_rate": 3.268278272612629e-06, "loss": 0.0657, "step": 358950 }, { "epoch": 3.53, "grad_norm": 4.220756530761719, "learning_rate": 3.2681541501583806e-06, "loss": 0.2038, "step": 358975 }, { "epoch": 3.53, "grad_norm": 8.313544273376465, "learning_rate": 3.268030027704132e-06, "loss": 0.0695, "step": 359000 }, { "epoch": 3.53, "grad_norm": 4.002440452575684, "learning_rate": 3.2679059052498834e-06, "loss": 0.2132, "step": 359025 }, { "epoch": 3.53, "grad_norm": 6.898556709289551, "learning_rate": 3.2677817827956355e-06, "loss": 0.0732, "step": 359050 }, { "epoch": 3.53, "grad_norm": 5.848981857299805, "learning_rate": 3.2676576603413867e-06, "loss": 0.1757, "step": 359075 }, { "epoch": 3.53, "grad_norm": 9.15622329711914, "learning_rate": 3.2675335378871383e-06, "loss": 0.0723, "step": 359100 }, { "epoch": 3.53, "grad_norm": 4.621408462524414, "learning_rate": 3.2674094154328895e-06, "loss": 0.1764, "step": 359125 }, { "epoch": 3.53, "grad_norm": 8.95776081085205, "learning_rate": 3.2672852929786416e-06, "loss": 0.0698, "step": 359150 }, { "epoch": 3.53, "grad_norm": 6.277710914611816, "learning_rate": 3.267161170524393e-06, "loss": 0.1594, "step": 359175 }, { "epoch": 3.53, "grad_norm": 20.181499481201172, "learning_rate": 3.267037048070144e-06, "loss": 0.0655, "step": 359200 }, { "epoch": 3.53, "grad_norm": 4.852296829223633, "learning_rate": 3.266912925615896e-06, "loss": 0.1668, "step": 359225 }, { "epoch": 3.53, "grad_norm": 8.247788429260254, "learning_rate": 3.2667888031616473e-06, "loss": 0.057, "step": 359250 }, { "epoch": 3.53, "grad_norm": 4.852795124053955, "learning_rate": 3.266664680707399e-06, "loss": 0.189, "step": 359275 }, { "epoch": 3.53, "grad_norm": 10.53459358215332, "learning_rate": 3.2665405582531506e-06, "loss": 0.0662, "step": 359300 }, { "epoch": 3.53, "grad_norm": 3.5925261974334717, "learning_rate": 3.266416435798902e-06, "loss": 0.2234, "step": 359325 }, { "epoch": 3.53, "grad_norm": 9.368483543395996, "learning_rate": 3.2662923133446534e-06, "loss": 0.0693, "step": 359350 }, { "epoch": 3.53, "grad_norm": 5.059828281402588, "learning_rate": 3.2661681908904055e-06, "loss": 0.1397, "step": 359375 }, { "epoch": 3.53, "grad_norm": 7.771249294281006, "learning_rate": 3.2660440684361567e-06, "loss": 0.063, "step": 359400 }, { "epoch": 3.53, "grad_norm": 4.95888090133667, "learning_rate": 3.265919945981908e-06, "loss": 0.1595, "step": 359425 }, { "epoch": 3.53, "grad_norm": 10.929248809814453, "learning_rate": 3.26579582352766e-06, "loss": 0.0745, "step": 359450 }, { "epoch": 3.53, "grad_norm": 6.080107688903809, "learning_rate": 3.265671701073411e-06, "loss": 0.1899, "step": 359475 }, { "epoch": 3.53, "grad_norm": 5.09012508392334, "learning_rate": 3.2655475786191628e-06, "loss": 0.0504, "step": 359500 }, { "epoch": 3.53, "grad_norm": 3.622624158859253, "learning_rate": 3.2654234561649144e-06, "loss": 0.2086, "step": 359525 }, { "epoch": 3.54, "grad_norm": 10.04168701171875, "learning_rate": 3.265299333710666e-06, "loss": 0.0681, "step": 359550 }, { "epoch": 3.54, "grad_norm": 4.096084117889404, "learning_rate": 3.2651752112564173e-06, "loss": 0.2134, "step": 359575 }, { "epoch": 3.54, "grad_norm": 13.327166557312012, "learning_rate": 3.2650510888021693e-06, "loss": 0.0636, "step": 359600 }, { "epoch": 3.54, "grad_norm": 5.128490447998047, "learning_rate": 3.2649269663479205e-06, "loss": 0.1981, "step": 359625 }, { "epoch": 3.54, "grad_norm": 8.192963600158691, "learning_rate": 3.2648028438936717e-06, "loss": 0.069, "step": 359650 }, { "epoch": 3.54, "grad_norm": 6.304092884063721, "learning_rate": 3.264678721439424e-06, "loss": 0.1641, "step": 359675 }, { "epoch": 3.54, "grad_norm": 5.4064412117004395, "learning_rate": 3.264554598985175e-06, "loss": 0.0551, "step": 359700 }, { "epoch": 3.54, "grad_norm": 8.137384414672852, "learning_rate": 3.2644304765309267e-06, "loss": 0.1393, "step": 359725 }, { "epoch": 3.54, "grad_norm": 8.590998649597168, "learning_rate": 3.2643063540766783e-06, "loss": 0.0755, "step": 359750 }, { "epoch": 3.54, "grad_norm": 4.139542579650879, "learning_rate": 3.26418223162243e-06, "loss": 0.1721, "step": 359775 }, { "epoch": 3.54, "grad_norm": 6.313953399658203, "learning_rate": 3.264058109168181e-06, "loss": 0.1085, "step": 359800 }, { "epoch": 3.54, "grad_norm": 6.449123859405518, "learning_rate": 3.263933986713933e-06, "loss": 0.1898, "step": 359825 }, { "epoch": 3.54, "grad_norm": 7.93772029876709, "learning_rate": 3.2638098642596844e-06, "loss": 0.0846, "step": 359850 }, { "epoch": 3.54, "grad_norm": 6.102586269378662, "learning_rate": 3.2636857418054356e-06, "loss": 0.1881, "step": 359875 }, { "epoch": 3.54, "grad_norm": 12.568169593811035, "learning_rate": 3.2635616193511877e-06, "loss": 0.0832, "step": 359900 }, { "epoch": 3.54, "grad_norm": 4.803304672241211, "learning_rate": 3.263437496896939e-06, "loss": 0.1711, "step": 359925 }, { "epoch": 3.54, "grad_norm": 14.459535598754883, "learning_rate": 3.2633133744426905e-06, "loss": 0.0534, "step": 359950 }, { "epoch": 3.54, "grad_norm": 4.303277969360352, "learning_rate": 3.2631892519884417e-06, "loss": 0.1915, "step": 359975 }, { "epoch": 3.54, "grad_norm": 5.096983432769775, "learning_rate": 3.263065129534194e-06, "loss": 0.0546, "step": 360000 }, { "epoch": 3.54, "eval_loss": 0.6926960945129395, "eval_runtime": 6127.3977, "eval_samples_per_second": 1.545, "eval_steps_per_second": 0.193, "eval_wer": 0.11900469967166677, "step": 360000 }, { "epoch": 3.54, "grad_norm": 4.418525218963623, "learning_rate": 3.262941007079945e-06, "loss": 0.2083, "step": 360025 }, { "epoch": 3.54, "grad_norm": 4.048791885375977, "learning_rate": 3.2628168846256962e-06, "loss": 0.0754, "step": 360050 }, { "epoch": 3.54, "grad_norm": 7.379584789276123, "learning_rate": 3.262697727069618e-06, "loss": 0.2272, "step": 360075 }, { "epoch": 3.54, "grad_norm": 7.146562099456787, "learning_rate": 3.2625736046153693e-06, "loss": 0.089, "step": 360100 }, { "epoch": 3.54, "grad_norm": 5.677318096160889, "learning_rate": 3.2624494821611214e-06, "loss": 0.2166, "step": 360125 }, { "epoch": 3.54, "grad_norm": 7.769060134887695, "learning_rate": 3.2623253597068726e-06, "loss": 0.0688, "step": 360150 }, { "epoch": 3.54, "grad_norm": 5.621161460876465, "learning_rate": 3.262201237252624e-06, "loss": 0.2081, "step": 360175 }, { "epoch": 3.54, "grad_norm": 5.670354843139648, "learning_rate": 3.262077114798376e-06, "loss": 0.0594, "step": 360200 }, { "epoch": 3.54, "grad_norm": 4.559488773345947, "learning_rate": 3.2619529923441275e-06, "loss": 0.1488, "step": 360225 }, { "epoch": 3.54, "grad_norm": 4.723607540130615, "learning_rate": 3.2618288698898787e-06, "loss": 0.0709, "step": 360250 }, { "epoch": 3.54, "grad_norm": 4.9066243171691895, "learning_rate": 3.2617047474356307e-06, "loss": 0.2123, "step": 360275 }, { "epoch": 3.54, "grad_norm": 10.040190696716309, "learning_rate": 3.261580624981382e-06, "loss": 0.0983, "step": 360300 }, { "epoch": 3.54, "grad_norm": 4.5936126708984375, "learning_rate": 3.261456502527133e-06, "loss": 0.186, "step": 360325 }, { "epoch": 3.54, "grad_norm": 5.767271518707275, "learning_rate": 3.2613323800728852e-06, "loss": 0.0748, "step": 360350 }, { "epoch": 3.54, "grad_norm": 5.632352828979492, "learning_rate": 3.2612082576186364e-06, "loss": 0.1911, "step": 360375 }, { "epoch": 3.54, "grad_norm": 6.679479122161865, "learning_rate": 3.261084135164388e-06, "loss": 0.0761, "step": 360400 }, { "epoch": 3.54, "grad_norm": 4.824209213256836, "learning_rate": 3.2609600127101397e-06, "loss": 0.2229, "step": 360425 }, { "epoch": 3.54, "grad_norm": 7.560863494873047, "learning_rate": 3.2608358902558913e-06, "loss": 0.0655, "step": 360450 }, { "epoch": 3.54, "grad_norm": 6.197598934173584, "learning_rate": 3.2607117678016426e-06, "loss": 0.18, "step": 360475 }, { "epoch": 3.54, "grad_norm": 7.6249470710754395, "learning_rate": 3.2605876453473938e-06, "loss": 0.0759, "step": 360500 }, { "epoch": 3.54, "grad_norm": 5.799044609069824, "learning_rate": 3.260463522893146e-06, "loss": 0.1494, "step": 360525 }, { "epoch": 3.54, "grad_norm": 10.401520729064941, "learning_rate": 3.260339400438897e-06, "loss": 0.073, "step": 360550 }, { "epoch": 3.55, "grad_norm": 4.1207709312438965, "learning_rate": 3.2602152779846487e-06, "loss": 0.1965, "step": 360575 }, { "epoch": 3.55, "grad_norm": 7.774429798126221, "learning_rate": 3.2600911555304003e-06, "loss": 0.0794, "step": 360600 }, { "epoch": 3.55, "grad_norm": 5.292847156524658, "learning_rate": 3.259967033076152e-06, "loss": 0.2114, "step": 360625 }, { "epoch": 3.55, "grad_norm": 8.141609191894531, "learning_rate": 3.259842910621903e-06, "loss": 0.0555, "step": 360650 }, { "epoch": 3.55, "grad_norm": 5.468507289886475, "learning_rate": 3.259718788167655e-06, "loss": 0.1535, "step": 360675 }, { "epoch": 3.55, "grad_norm": 10.76053237915039, "learning_rate": 3.2595946657134064e-06, "loss": 0.0671, "step": 360700 }, { "epoch": 3.55, "grad_norm": 4.635519504547119, "learning_rate": 3.2594705432591576e-06, "loss": 0.1704, "step": 360725 }, { "epoch": 3.55, "grad_norm": 8.529459953308105, "learning_rate": 3.2593464208049097e-06, "loss": 0.0695, "step": 360750 }, { "epoch": 3.55, "grad_norm": 4.403757572174072, "learning_rate": 3.259222298350661e-06, "loss": 0.1919, "step": 360775 }, { "epoch": 3.55, "grad_norm": 5.197503089904785, "learning_rate": 3.2590981758964125e-06, "loss": 0.0766, "step": 360800 }, { "epoch": 3.55, "grad_norm": 4.351384162902832, "learning_rate": 3.258974053442164e-06, "loss": 0.1728, "step": 360825 }, { "epoch": 3.55, "grad_norm": 7.947725296020508, "learning_rate": 3.258849930987916e-06, "loss": 0.094, "step": 360850 }, { "epoch": 3.55, "grad_norm": 7.4935431480407715, "learning_rate": 3.258725808533667e-06, "loss": 0.1821, "step": 360875 }, { "epoch": 3.55, "grad_norm": 11.597085952758789, "learning_rate": 3.258601686079419e-06, "loss": 0.0686, "step": 360900 }, { "epoch": 3.55, "grad_norm": 3.737179756164551, "learning_rate": 3.2584775636251703e-06, "loss": 0.1695, "step": 360925 }, { "epoch": 3.55, "grad_norm": 8.353687286376953, "learning_rate": 3.2583534411709215e-06, "loss": 0.079, "step": 360950 }, { "epoch": 3.55, "grad_norm": 4.109749794006348, "learning_rate": 3.2582293187166736e-06, "loss": 0.2069, "step": 360975 }, { "epoch": 3.55, "grad_norm": 9.552818298339844, "learning_rate": 3.2581051962624248e-06, "loss": 0.0833, "step": 361000 }, { "epoch": 3.55, "grad_norm": 4.013832092285156, "learning_rate": 3.2579810738081764e-06, "loss": 0.2158, "step": 361025 }, { "epoch": 3.55, "grad_norm": 11.58454418182373, "learning_rate": 3.257856951353928e-06, "loss": 0.071, "step": 361050 }, { "epoch": 3.55, "grad_norm": 3.5643093585968018, "learning_rate": 3.2577328288996797e-06, "loss": 0.1314, "step": 361075 }, { "epoch": 3.55, "grad_norm": 4.433474540710449, "learning_rate": 3.257608706445431e-06, "loss": 0.069, "step": 361100 }, { "epoch": 3.55, "grad_norm": 6.488908767700195, "learning_rate": 3.257484583991183e-06, "loss": 0.1953, "step": 361125 }, { "epoch": 3.55, "grad_norm": 11.776251792907715, "learning_rate": 3.257360461536934e-06, "loss": 0.0615, "step": 361150 }, { "epoch": 3.55, "grad_norm": 3.460796356201172, "learning_rate": 3.2572363390826854e-06, "loss": 0.1615, "step": 361175 }, { "epoch": 3.55, "grad_norm": 11.764144897460938, "learning_rate": 3.2571122166284374e-06, "loss": 0.0786, "step": 361200 }, { "epoch": 3.55, "grad_norm": 4.1606221199035645, "learning_rate": 3.2569880941741886e-06, "loss": 0.1841, "step": 361225 }, { "epoch": 3.55, "grad_norm": 7.282944679260254, "learning_rate": 3.2568639717199403e-06, "loss": 0.0708, "step": 361250 }, { "epoch": 3.55, "grad_norm": 3.9089138507843018, "learning_rate": 3.256739849265692e-06, "loss": 0.1703, "step": 361275 }, { "epoch": 3.55, "grad_norm": 7.485843658447266, "learning_rate": 3.2566157268114435e-06, "loss": 0.092, "step": 361300 }, { "epoch": 3.55, "grad_norm": 4.663394451141357, "learning_rate": 3.2564916043571948e-06, "loss": 0.1872, "step": 361325 }, { "epoch": 3.55, "grad_norm": 8.12918758392334, "learning_rate": 3.256367481902947e-06, "loss": 0.1093, "step": 361350 }, { "epoch": 3.55, "grad_norm": 3.96716046333313, "learning_rate": 3.256243359448698e-06, "loss": 0.1739, "step": 361375 }, { "epoch": 3.55, "grad_norm": 2.392076015472412, "learning_rate": 3.2561192369944492e-06, "loss": 0.0806, "step": 361400 }, { "epoch": 3.55, "grad_norm": 4.81352424621582, "learning_rate": 3.255995114540201e-06, "loss": 0.1756, "step": 361425 }, { "epoch": 3.55, "grad_norm": 8.162614822387695, "learning_rate": 3.2558709920859525e-06, "loss": 0.0798, "step": 361450 }, { "epoch": 3.55, "grad_norm": 3.7711050510406494, "learning_rate": 3.255746869631704e-06, "loss": 0.1644, "step": 361475 }, { "epoch": 3.55, "grad_norm": 10.40685749053955, "learning_rate": 3.2556227471774554e-06, "loss": 0.0695, "step": 361500 }, { "epoch": 3.55, "grad_norm": 5.81143856048584, "learning_rate": 3.2554986247232074e-06, "loss": 0.1892, "step": 361525 }, { "epoch": 3.55, "grad_norm": 16.34220314025879, "learning_rate": 3.2553745022689586e-06, "loss": 0.0755, "step": 361550 }, { "epoch": 3.56, "grad_norm": 4.962160110473633, "learning_rate": 3.25525037981471e-06, "loss": 0.1276, "step": 361575 }, { "epoch": 3.56, "grad_norm": 6.962082862854004, "learning_rate": 3.255126257360462e-06, "loss": 0.0789, "step": 361600 }, { "epoch": 3.56, "grad_norm": 5.257390975952148, "learning_rate": 3.255002134906213e-06, "loss": 0.15, "step": 361625 }, { "epoch": 3.56, "grad_norm": 6.71235990524292, "learning_rate": 3.2548780124519647e-06, "loss": 0.0872, "step": 361650 }, { "epoch": 3.56, "grad_norm": 4.64511251449585, "learning_rate": 3.2547538899977164e-06, "loss": 0.1802, "step": 361675 }, { "epoch": 3.56, "grad_norm": 4.139918804168701, "learning_rate": 3.254629767543468e-06, "loss": 0.077, "step": 361700 }, { "epoch": 3.56, "grad_norm": 4.776229381561279, "learning_rate": 3.2545056450892192e-06, "loss": 0.2087, "step": 361725 }, { "epoch": 3.56, "grad_norm": 11.210413932800293, "learning_rate": 3.2543815226349713e-06, "loss": 0.0783, "step": 361750 }, { "epoch": 3.56, "grad_norm": 4.632557392120361, "learning_rate": 3.2542574001807225e-06, "loss": 0.1773, "step": 361775 }, { "epoch": 3.56, "grad_norm": 8.610596656799316, "learning_rate": 3.254133277726474e-06, "loss": 0.0907, "step": 361800 }, { "epoch": 3.56, "grad_norm": 3.940737724304199, "learning_rate": 3.2540091552722258e-06, "loss": 0.1819, "step": 361825 }, { "epoch": 3.56, "grad_norm": 8.671456336975098, "learning_rate": 3.2538850328179774e-06, "loss": 0.0914, "step": 361850 }, { "epoch": 3.56, "grad_norm": 3.9160971641540527, "learning_rate": 3.2537609103637286e-06, "loss": 0.1878, "step": 361875 }, { "epoch": 3.56, "grad_norm": 17.48068618774414, "learning_rate": 3.2536367879094807e-06, "loss": 0.0614, "step": 361900 }, { "epoch": 3.56, "grad_norm": 4.941681385040283, "learning_rate": 3.253512665455232e-06, "loss": 0.1629, "step": 361925 }, { "epoch": 3.56, "grad_norm": 7.779843807220459, "learning_rate": 3.253388543000983e-06, "loss": 0.0964, "step": 361950 }, { "epoch": 3.56, "grad_norm": 4.962604522705078, "learning_rate": 3.253264420546735e-06, "loss": 0.1847, "step": 361975 }, { "epoch": 3.56, "grad_norm": 7.188049793243408, "learning_rate": 3.2531402980924864e-06, "loss": 0.0613, "step": 362000 }, { "epoch": 3.56, "grad_norm": 4.649294376373291, "learning_rate": 3.253016175638238e-06, "loss": 0.1654, "step": 362025 }, { "epoch": 3.56, "grad_norm": 6.288492679595947, "learning_rate": 3.2528920531839896e-06, "loss": 0.0824, "step": 362050 }, { "epoch": 3.56, "grad_norm": 4.981501579284668, "learning_rate": 3.2527679307297413e-06, "loss": 0.1542, "step": 362075 }, { "epoch": 3.56, "grad_norm": 7.951125621795654, "learning_rate": 3.2526438082754925e-06, "loss": 0.0694, "step": 362100 }, { "epoch": 3.56, "grad_norm": 4.110218524932861, "learning_rate": 3.2525196858212445e-06, "loss": 0.1735, "step": 362125 }, { "epoch": 3.56, "grad_norm": 8.03690242767334, "learning_rate": 3.2523955633669957e-06, "loss": 0.0634, "step": 362150 }, { "epoch": 3.56, "grad_norm": 5.355453968048096, "learning_rate": 3.252271440912747e-06, "loss": 0.2091, "step": 362175 }, { "epoch": 3.56, "grad_norm": 9.285309791564941, "learning_rate": 3.252147318458499e-06, "loss": 0.0775, "step": 362200 }, { "epoch": 3.56, "grad_norm": 4.858936786651611, "learning_rate": 3.2520231960042502e-06, "loss": 0.1833, "step": 362225 }, { "epoch": 3.56, "grad_norm": 7.170598983764648, "learning_rate": 3.251899073550002e-06, "loss": 0.0651, "step": 362250 }, { "epoch": 3.56, "grad_norm": 4.657713413238525, "learning_rate": 3.251774951095753e-06, "loss": 0.1905, "step": 362275 }, { "epoch": 3.56, "grad_norm": 13.439220428466797, "learning_rate": 3.251650828641505e-06, "loss": 0.0788, "step": 362300 }, { "epoch": 3.56, "grad_norm": 6.158483505249023, "learning_rate": 3.2515267061872563e-06, "loss": 0.1574, "step": 362325 }, { "epoch": 3.56, "grad_norm": 10.132022857666016, "learning_rate": 3.2514025837330076e-06, "loss": 0.0814, "step": 362350 }, { "epoch": 3.56, "grad_norm": 5.955911159515381, "learning_rate": 3.2512784612787596e-06, "loss": 0.1954, "step": 362375 }, { "epoch": 3.56, "grad_norm": 6.426797389984131, "learning_rate": 3.251154338824511e-06, "loss": 0.0666, "step": 362400 }, { "epoch": 3.56, "grad_norm": 5.615116596221924, "learning_rate": 3.2510302163702625e-06, "loss": 0.2186, "step": 362425 }, { "epoch": 3.56, "grad_norm": 2.699082612991333, "learning_rate": 3.250906093916014e-06, "loss": 0.0554, "step": 362450 }, { "epoch": 3.56, "grad_norm": 3.952878475189209, "learning_rate": 3.2507819714617657e-06, "loss": 0.2002, "step": 362475 }, { "epoch": 3.56, "grad_norm": 6.287527561187744, "learning_rate": 3.250657849007517e-06, "loss": 0.0629, "step": 362500 }, { "epoch": 3.56, "grad_norm": 3.700629472732544, "learning_rate": 3.250538691451439e-06, "loss": 0.1799, "step": 362525 }, { "epoch": 3.56, "grad_norm": 6.027707576751709, "learning_rate": 3.25041456899719e-06, "loss": 0.0481, "step": 362550 }, { "epoch": 3.56, "grad_norm": 5.237150192260742, "learning_rate": 3.250290446542942e-06, "loss": 0.1826, "step": 362575 }, { "epoch": 3.57, "grad_norm": 8.769896507263184, "learning_rate": 3.2501663240886933e-06, "loss": 0.0896, "step": 362600 }, { "epoch": 3.57, "grad_norm": 6.41116189956665, "learning_rate": 3.2500422016344445e-06, "loss": 0.1755, "step": 362625 }, { "epoch": 3.57, "grad_norm": 9.057558059692383, "learning_rate": 3.2499180791801966e-06, "loss": 0.0865, "step": 362650 }, { "epoch": 3.57, "grad_norm": 6.063901901245117, "learning_rate": 3.2497939567259478e-06, "loss": 0.1989, "step": 362675 }, { "epoch": 3.57, "grad_norm": 7.174699783325195, "learning_rate": 3.2496698342716994e-06, "loss": 0.1032, "step": 362700 }, { "epoch": 3.57, "grad_norm": 6.798519134521484, "learning_rate": 3.249545711817451e-06, "loss": 0.1512, "step": 362725 }, { "epoch": 3.57, "grad_norm": 8.652113914489746, "learning_rate": 3.2494215893632027e-06, "loss": 0.0841, "step": 362750 }, { "epoch": 3.57, "grad_norm": 4.511654376983643, "learning_rate": 3.249297466908954e-06, "loss": 0.2095, "step": 362775 }, { "epoch": 3.57, "grad_norm": 8.164220809936523, "learning_rate": 3.249173344454705e-06, "loss": 0.071, "step": 362800 }, { "epoch": 3.57, "grad_norm": 24.350601196289062, "learning_rate": 3.249049222000457e-06, "loss": 0.2288, "step": 362825 }, { "epoch": 3.57, "grad_norm": 8.673762321472168, "learning_rate": 3.2489250995462084e-06, "loss": 0.0725, "step": 362850 }, { "epoch": 3.57, "grad_norm": 5.996311187744141, "learning_rate": 3.24880097709196e-06, "loss": 0.1922, "step": 362875 }, { "epoch": 3.57, "grad_norm": 10.57101821899414, "learning_rate": 3.2486768546377116e-06, "loss": 0.0567, "step": 362900 }, { "epoch": 3.57, "grad_norm": 5.520367622375488, "learning_rate": 3.2485527321834633e-06, "loss": 0.1988, "step": 362925 }, { "epoch": 3.57, "grad_norm": 14.513458251953125, "learning_rate": 3.2484286097292145e-06, "loss": 0.0749, "step": 362950 }, { "epoch": 3.57, "grad_norm": 3.7058169841766357, "learning_rate": 3.2483044872749665e-06, "loss": 0.1622, "step": 362975 }, { "epoch": 3.57, "grad_norm": 4.646329879760742, "learning_rate": 3.2481803648207178e-06, "loss": 0.0778, "step": 363000 }, { "epoch": 3.57, "grad_norm": 4.5266900062561035, "learning_rate": 3.248056242366469e-06, "loss": 0.2, "step": 363025 }, { "epoch": 3.57, "grad_norm": 3.930999755859375, "learning_rate": 3.247932119912221e-06, "loss": 0.0598, "step": 363050 }, { "epoch": 3.57, "grad_norm": 3.8933424949645996, "learning_rate": 3.2478079974579722e-06, "loss": 0.1876, "step": 363075 }, { "epoch": 3.57, "grad_norm": 14.268630981445312, "learning_rate": 3.247683875003724e-06, "loss": 0.0592, "step": 363100 }, { "epoch": 3.57, "grad_norm": 4.450690746307373, "learning_rate": 3.2475597525494755e-06, "loss": 0.187, "step": 363125 }, { "epoch": 3.57, "grad_norm": 9.255919456481934, "learning_rate": 3.247435630095227e-06, "loss": 0.0691, "step": 363150 }, { "epoch": 3.57, "grad_norm": 5.121198654174805, "learning_rate": 3.2473115076409784e-06, "loss": 0.1699, "step": 363175 }, { "epoch": 3.57, "grad_norm": 10.921812057495117, "learning_rate": 3.2471873851867304e-06, "loss": 0.0808, "step": 363200 }, { "epoch": 3.57, "grad_norm": 4.8986005783081055, "learning_rate": 3.2470632627324816e-06, "loss": 0.1692, "step": 363225 }, { "epoch": 3.57, "grad_norm": 7.752821922302246, "learning_rate": 3.246939140278233e-06, "loss": 0.0768, "step": 363250 }, { "epoch": 3.57, "grad_norm": 9.473221778869629, "learning_rate": 3.246815017823985e-06, "loss": 0.1856, "step": 363275 }, { "epoch": 3.57, "grad_norm": 4.95237922668457, "learning_rate": 3.246690895369736e-06, "loss": 0.0692, "step": 363300 }, { "epoch": 3.57, "grad_norm": 7.30504035949707, "learning_rate": 3.2465667729154877e-06, "loss": 0.1571, "step": 363325 }, { "epoch": 3.57, "grad_norm": 5.007332801818848, "learning_rate": 3.2464426504612394e-06, "loss": 0.0678, "step": 363350 }, { "epoch": 3.57, "grad_norm": 6.793357849121094, "learning_rate": 3.246318528006991e-06, "loss": 0.1992, "step": 363375 }, { "epoch": 3.57, "grad_norm": 9.701106071472168, "learning_rate": 3.2461944055527422e-06, "loss": 0.079, "step": 363400 }, { "epoch": 3.57, "grad_norm": 4.4994049072265625, "learning_rate": 3.2460702830984943e-06, "loss": 0.2052, "step": 363425 }, { "epoch": 3.57, "grad_norm": 12.922560691833496, "learning_rate": 3.2459461606442455e-06, "loss": 0.0845, "step": 363450 }, { "epoch": 3.57, "grad_norm": 5.30209493637085, "learning_rate": 3.2458220381899967e-06, "loss": 0.1887, "step": 363475 }, { "epoch": 3.57, "grad_norm": 6.595372200012207, "learning_rate": 3.2456979157357488e-06, "loss": 0.0804, "step": 363500 }, { "epoch": 3.57, "grad_norm": 5.258037090301514, "learning_rate": 3.2455737932815e-06, "loss": 0.1797, "step": 363525 }, { "epoch": 3.57, "grad_norm": 9.508511543273926, "learning_rate": 3.2454496708272516e-06, "loss": 0.099, "step": 363550 }, { "epoch": 3.57, "grad_norm": 3.907156229019165, "learning_rate": 3.2453255483730032e-06, "loss": 0.1599, "step": 363575 }, { "epoch": 3.57, "grad_norm": 6.058712005615234, "learning_rate": 3.245201425918755e-06, "loss": 0.0673, "step": 363600 }, { "epoch": 3.58, "grad_norm": 5.809760570526123, "learning_rate": 3.245077303464506e-06, "loss": 0.1655, "step": 363625 }, { "epoch": 3.58, "grad_norm": 11.483654022216797, "learning_rate": 3.2449531810102573e-06, "loss": 0.0687, "step": 363650 }, { "epoch": 3.58, "grad_norm": 6.316596984863281, "learning_rate": 3.2448290585560094e-06, "loss": 0.1894, "step": 363675 }, { "epoch": 3.58, "grad_norm": 0.4436154067516327, "learning_rate": 3.2447049361017606e-06, "loss": 0.0535, "step": 363700 }, { "epoch": 3.58, "grad_norm": 5.401129245758057, "learning_rate": 3.244580813647512e-06, "loss": 0.1755, "step": 363725 }, { "epoch": 3.58, "grad_norm": 5.693169116973877, "learning_rate": 3.244456691193264e-06, "loss": 0.0767, "step": 363750 }, { "epoch": 3.58, "grad_norm": 4.648609161376953, "learning_rate": 3.2443325687390155e-06, "loss": 0.1868, "step": 363775 }, { "epoch": 3.58, "grad_norm": 3.7810823917388916, "learning_rate": 3.2442084462847667e-06, "loss": 0.0887, "step": 363800 }, { "epoch": 3.58, "grad_norm": 4.486285209655762, "learning_rate": 3.2440843238305188e-06, "loss": 0.2278, "step": 363825 }, { "epoch": 3.58, "grad_norm": 6.999819755554199, "learning_rate": 3.24396020137627e-06, "loss": 0.0625, "step": 363850 }, { "epoch": 3.58, "grad_norm": 6.84343957901001, "learning_rate": 3.243836078922021e-06, "loss": 0.1767, "step": 363875 }, { "epoch": 3.58, "grad_norm": 12.458520889282227, "learning_rate": 3.2437119564677732e-06, "loss": 0.0624, "step": 363900 }, { "epoch": 3.58, "grad_norm": 5.225926876068115, "learning_rate": 3.2435878340135244e-06, "loss": 0.2458, "step": 363925 }, { "epoch": 3.58, "grad_norm": 10.648324966430664, "learning_rate": 3.243463711559276e-06, "loss": 0.1015, "step": 363950 }, { "epoch": 3.58, "grad_norm": 6.8278422355651855, "learning_rate": 3.2433395891050277e-06, "loss": 0.2047, "step": 363975 }, { "epoch": 3.58, "grad_norm": 6.069194316864014, "learning_rate": 3.2432154666507793e-06, "loss": 0.0529, "step": 364000 }, { "epoch": 3.58, "grad_norm": 11.073373794555664, "learning_rate": 3.2430913441965306e-06, "loss": 0.1877, "step": 364025 }, { "epoch": 3.58, "grad_norm": 10.238612174987793, "learning_rate": 3.2429672217422826e-06, "loss": 0.0854, "step": 364050 }, { "epoch": 3.58, "grad_norm": 5.514801025390625, "learning_rate": 3.242843099288034e-06, "loss": 0.1719, "step": 364075 }, { "epoch": 3.58, "grad_norm": 12.686690330505371, "learning_rate": 3.242718976833785e-06, "loss": 0.0924, "step": 364100 }, { "epoch": 3.58, "grad_norm": 4.533384799957275, "learning_rate": 3.242594854379537e-06, "loss": 0.1789, "step": 364125 }, { "epoch": 3.58, "grad_norm": 14.13077163696289, "learning_rate": 3.2424707319252883e-06, "loss": 0.0794, "step": 364150 }, { "epoch": 3.58, "grad_norm": 7.705901622772217, "learning_rate": 3.24234660947104e-06, "loss": 0.2256, "step": 364175 }, { "epoch": 3.58, "grad_norm": 4.250050067901611, "learning_rate": 3.2422224870167916e-06, "loss": 0.0687, "step": 364200 }, { "epoch": 3.58, "grad_norm": 5.636967658996582, "learning_rate": 3.2420983645625432e-06, "loss": 0.2062, "step": 364225 }, { "epoch": 3.58, "grad_norm": 11.05159854888916, "learning_rate": 3.2419742421082944e-06, "loss": 0.0957, "step": 364250 }, { "epoch": 3.58, "grad_norm": 5.068073272705078, "learning_rate": 3.2418501196540465e-06, "loss": 0.1779, "step": 364275 }, { "epoch": 3.58, "grad_norm": 7.619304656982422, "learning_rate": 3.2417259971997977e-06, "loss": 0.0754, "step": 364300 }, { "epoch": 3.58, "grad_norm": 4.45332670211792, "learning_rate": 3.241601874745549e-06, "loss": 0.1937, "step": 364325 }, { "epoch": 3.58, "grad_norm": 8.07162094116211, "learning_rate": 3.241477752291301e-06, "loss": 0.0758, "step": 364350 }, { "epoch": 3.58, "grad_norm": 5.197201728820801, "learning_rate": 3.241353629837052e-06, "loss": 0.1896, "step": 364375 }, { "epoch": 3.58, "grad_norm": 11.982529640197754, "learning_rate": 3.241229507382804e-06, "loss": 0.0757, "step": 364400 }, { "epoch": 3.58, "grad_norm": 11.026238441467285, "learning_rate": 3.2411053849285554e-06, "loss": 0.1903, "step": 364425 }, { "epoch": 3.58, "grad_norm": 5.663596153259277, "learning_rate": 3.240981262474307e-06, "loss": 0.0947, "step": 364450 }, { "epoch": 3.58, "grad_norm": 10.211875915527344, "learning_rate": 3.2408571400200583e-06, "loss": 0.1488, "step": 364475 }, { "epoch": 3.58, "grad_norm": 24.262327194213867, "learning_rate": 3.2407330175658095e-06, "loss": 0.0553, "step": 364500 }, { "epoch": 3.58, "grad_norm": 6.124882698059082, "learning_rate": 3.2406088951115616e-06, "loss": 0.1845, "step": 364525 }, { "epoch": 3.58, "grad_norm": 14.440250396728516, "learning_rate": 3.2404847726573128e-06, "loss": 0.1186, "step": 364550 }, { "epoch": 3.58, "grad_norm": 4.499777317047119, "learning_rate": 3.2403606502030644e-06, "loss": 0.1852, "step": 364575 }, { "epoch": 3.58, "grad_norm": 6.858781337738037, "learning_rate": 3.240236527748816e-06, "loss": 0.0867, "step": 364600 }, { "epoch": 3.59, "grad_norm": 4.422268390655518, "learning_rate": 3.2401173701927375e-06, "loss": 0.2276, "step": 364625 }, { "epoch": 3.59, "grad_norm": 13.238844871520996, "learning_rate": 3.239993247738489e-06, "loss": 0.0571, "step": 364650 }, { "epoch": 3.59, "grad_norm": 7.064104080200195, "learning_rate": 3.2398691252842408e-06, "loss": 0.1821, "step": 364675 }, { "epoch": 3.59, "grad_norm": 6.74305534362793, "learning_rate": 3.239745002829992e-06, "loss": 0.0809, "step": 364700 }, { "epoch": 3.59, "grad_norm": 4.363339900970459, "learning_rate": 3.239620880375744e-06, "loss": 0.2126, "step": 364725 }, { "epoch": 3.59, "grad_norm": 5.60318660736084, "learning_rate": 3.2394967579214952e-06, "loss": 0.1004, "step": 364750 }, { "epoch": 3.59, "grad_norm": 5.852818489074707, "learning_rate": 3.239372635467247e-06, "loss": 0.2148, "step": 364775 }, { "epoch": 3.59, "grad_norm": 10.14097785949707, "learning_rate": 3.2392485130129985e-06, "loss": 0.0613, "step": 364800 }, { "epoch": 3.59, "grad_norm": 5.240122318267822, "learning_rate": 3.23912439055875e-06, "loss": 0.1739, "step": 364825 }, { "epoch": 3.59, "grad_norm": 11.504650115966797, "learning_rate": 3.2390002681045014e-06, "loss": 0.0706, "step": 364850 }, { "epoch": 3.59, "grad_norm": 5.890507221221924, "learning_rate": 3.238876145650253e-06, "loss": 0.1858, "step": 364875 }, { "epoch": 3.59, "grad_norm": 12.665590286254883, "learning_rate": 3.2387520231960046e-06, "loss": 0.0659, "step": 364900 }, { "epoch": 3.59, "grad_norm": 4.116995811462402, "learning_rate": 3.238627900741756e-06, "loss": 0.2002, "step": 364925 }, { "epoch": 3.59, "grad_norm": 13.660493850708008, "learning_rate": 3.238503778287508e-06, "loss": 0.1136, "step": 364950 }, { "epoch": 3.59, "grad_norm": 5.157310485839844, "learning_rate": 3.238379655833259e-06, "loss": 0.2008, "step": 364975 }, { "epoch": 3.59, "grad_norm": 4.184670448303223, "learning_rate": 3.2382555333790108e-06, "loss": 0.0732, "step": 365000 }, { "epoch": 3.59, "grad_norm": 6.489013671875, "learning_rate": 3.2381314109247624e-06, "loss": 0.1956, "step": 365025 }, { "epoch": 3.59, "grad_norm": 9.363285064697266, "learning_rate": 3.238007288470514e-06, "loss": 0.0634, "step": 365050 }, { "epoch": 3.59, "grad_norm": 4.1656575202941895, "learning_rate": 3.2378831660162652e-06, "loss": 0.1915, "step": 365075 }, { "epoch": 3.59, "grad_norm": 5.39908504486084, "learning_rate": 3.2377590435620164e-06, "loss": 0.059, "step": 365100 }, { "epoch": 3.59, "grad_norm": 2.4382081031799316, "learning_rate": 3.2376349211077685e-06, "loss": 0.1896, "step": 365125 }, { "epoch": 3.59, "grad_norm": 10.776660919189453, "learning_rate": 3.2375107986535197e-06, "loss": 0.0567, "step": 365150 }, { "epoch": 3.59, "grad_norm": 4.6232500076293945, "learning_rate": 3.2373866761992713e-06, "loss": 0.1628, "step": 365175 }, { "epoch": 3.59, "grad_norm": 11.41380500793457, "learning_rate": 3.237262553745023e-06, "loss": 0.0692, "step": 365200 }, { "epoch": 3.59, "grad_norm": 4.09580135345459, "learning_rate": 3.2371384312907746e-06, "loss": 0.1746, "step": 365225 }, { "epoch": 3.59, "grad_norm": 3.2629008293151855, "learning_rate": 3.237014308836526e-06, "loss": 0.0655, "step": 365250 }, { "epoch": 3.59, "grad_norm": 4.744700908660889, "learning_rate": 3.236890186382278e-06, "loss": 0.1928, "step": 365275 }, { "epoch": 3.59, "grad_norm": 1.5022443532943726, "learning_rate": 3.236766063928029e-06, "loss": 0.0689, "step": 365300 }, { "epoch": 3.59, "grad_norm": 8.462165832519531, "learning_rate": 3.2366419414737803e-06, "loss": 0.195, "step": 365325 }, { "epoch": 3.59, "grad_norm": 11.476920127868652, "learning_rate": 3.2365178190195324e-06, "loss": 0.0604, "step": 365350 }, { "epoch": 3.59, "grad_norm": 7.916141510009766, "learning_rate": 3.2363936965652836e-06, "loss": 0.1583, "step": 365375 }, { "epoch": 3.59, "grad_norm": 5.613031387329102, "learning_rate": 3.2362695741110352e-06, "loss": 0.0592, "step": 365400 }, { "epoch": 3.59, "grad_norm": 3.909337043762207, "learning_rate": 3.236145451656787e-06, "loss": 0.1542, "step": 365425 }, { "epoch": 3.59, "grad_norm": 6.4864606857299805, "learning_rate": 3.2360213292025385e-06, "loss": 0.0801, "step": 365450 }, { "epoch": 3.59, "grad_norm": 36.64402389526367, "learning_rate": 3.2358972067482897e-06, "loss": 0.1821, "step": 365475 }, { "epoch": 3.59, "grad_norm": 4.246496200561523, "learning_rate": 3.2357730842940418e-06, "loss": 0.0607, "step": 365500 }, { "epoch": 3.59, "grad_norm": 4.834843158721924, "learning_rate": 3.235648961839793e-06, "loss": 0.1943, "step": 365525 }, { "epoch": 3.59, "grad_norm": 8.296097755432129, "learning_rate": 3.235524839385544e-06, "loss": 0.0683, "step": 365550 }, { "epoch": 3.59, "grad_norm": 5.758617401123047, "learning_rate": 3.2354007169312962e-06, "loss": 0.2197, "step": 365575 }, { "epoch": 3.59, "grad_norm": 9.655989646911621, "learning_rate": 3.2352765944770474e-06, "loss": 0.0641, "step": 365600 }, { "epoch": 3.59, "grad_norm": 11.079513549804688, "learning_rate": 3.235152472022799e-06, "loss": 0.1544, "step": 365625 }, { "epoch": 3.6, "grad_norm": 11.571529388427734, "learning_rate": 3.2350283495685507e-06, "loss": 0.0827, "step": 365650 }, { "epoch": 3.6, "grad_norm": 4.562081336975098, "learning_rate": 3.2349042271143024e-06, "loss": 0.1688, "step": 365675 }, { "epoch": 3.6, "grad_norm": 9.077983856201172, "learning_rate": 3.2347801046600536e-06, "loss": 0.0762, "step": 365700 }, { "epoch": 3.6, "grad_norm": 3.9724137783050537, "learning_rate": 3.2346559822058056e-06, "loss": 0.1859, "step": 365725 }, { "epoch": 3.6, "grad_norm": 8.316518783569336, "learning_rate": 3.234531859751557e-06, "loss": 0.0742, "step": 365750 }, { "epoch": 3.6, "grad_norm": 5.608796119689941, "learning_rate": 3.234407737297308e-06, "loss": 0.2114, "step": 365775 }, { "epoch": 3.6, "grad_norm": 5.767719268798828, "learning_rate": 3.23428361484306e-06, "loss": 0.0659, "step": 365800 }, { "epoch": 3.6, "grad_norm": 6.394558906555176, "learning_rate": 3.2341594923888113e-06, "loss": 0.1845, "step": 365825 }, { "epoch": 3.6, "grad_norm": 6.570433616638184, "learning_rate": 3.234035369934563e-06, "loss": 0.0641, "step": 365850 }, { "epoch": 3.6, "grad_norm": 2.7421600818634033, "learning_rate": 3.2339112474803146e-06, "loss": 0.2117, "step": 365875 }, { "epoch": 3.6, "grad_norm": 5.179591178894043, "learning_rate": 3.2337871250260662e-06, "loss": 0.0652, "step": 365900 }, { "epoch": 3.6, "grad_norm": 4.446844100952148, "learning_rate": 3.2336630025718174e-06, "loss": 0.1852, "step": 365925 }, { "epoch": 3.6, "grad_norm": 8.40550422668457, "learning_rate": 3.2335388801175686e-06, "loss": 0.0625, "step": 365950 }, { "epoch": 3.6, "grad_norm": 5.977367401123047, "learning_rate": 3.2334147576633207e-06, "loss": 0.205, "step": 365975 }, { "epoch": 3.6, "grad_norm": 4.533297061920166, "learning_rate": 3.233290635209072e-06, "loss": 0.0648, "step": 366000 }, { "epoch": 3.6, "grad_norm": 4.15672492980957, "learning_rate": 3.2331665127548236e-06, "loss": 0.168, "step": 366025 }, { "epoch": 3.6, "grad_norm": 10.987173080444336, "learning_rate": 3.233042390300575e-06, "loss": 0.0702, "step": 366050 }, { "epoch": 3.6, "grad_norm": 2.8638248443603516, "learning_rate": 3.232918267846327e-06, "loss": 0.1825, "step": 366075 }, { "epoch": 3.6, "grad_norm": 7.824257850646973, "learning_rate": 3.232794145392078e-06, "loss": 0.0437, "step": 366100 }, { "epoch": 3.6, "grad_norm": 6.052772045135498, "learning_rate": 3.23267002293783e-06, "loss": 0.2045, "step": 366125 }, { "epoch": 3.6, "grad_norm": 7.137964725494385, "learning_rate": 3.2325459004835813e-06, "loss": 0.0845, "step": 366150 }, { "epoch": 3.6, "grad_norm": 3.803015947341919, "learning_rate": 3.2324217780293325e-06, "loss": 0.184, "step": 366175 }, { "epoch": 3.6, "grad_norm": 8.504670143127441, "learning_rate": 3.2322976555750846e-06, "loss": 0.0714, "step": 366200 }, { "epoch": 3.6, "grad_norm": 5.1829094886779785, "learning_rate": 3.2321735331208358e-06, "loss": 0.2037, "step": 366225 }, { "epoch": 3.6, "grad_norm": 3.6726107597351074, "learning_rate": 3.2320494106665874e-06, "loss": 0.0893, "step": 366250 }, { "epoch": 3.6, "grad_norm": 4.728908538818359, "learning_rate": 3.231925288212339e-06, "loss": 0.2044, "step": 366275 }, { "epoch": 3.6, "grad_norm": 4.590875148773193, "learning_rate": 3.2318011657580907e-06, "loss": 0.0919, "step": 366300 }, { "epoch": 3.6, "grad_norm": 3.2479190826416016, "learning_rate": 3.231677043303842e-06, "loss": 0.1659, "step": 366325 }, { "epoch": 3.6, "grad_norm": 1.1442519426345825, "learning_rate": 3.231552920849594e-06, "loss": 0.0715, "step": 366350 }, { "epoch": 3.6, "grad_norm": 6.5206427574157715, "learning_rate": 3.231428798395345e-06, "loss": 0.1736, "step": 366375 }, { "epoch": 3.6, "grad_norm": 3.681861400604248, "learning_rate": 3.2313046759410964e-06, "loss": 0.0534, "step": 366400 }, { "epoch": 3.6, "grad_norm": 3.584108591079712, "learning_rate": 3.2311805534868484e-06, "loss": 0.2067, "step": 366425 }, { "epoch": 3.6, "grad_norm": 18.312257766723633, "learning_rate": 3.2310564310325997e-06, "loss": 0.0656, "step": 366450 }, { "epoch": 3.6, "grad_norm": 4.307305812835693, "learning_rate": 3.2309323085783513e-06, "loss": 0.1889, "step": 366475 }, { "epoch": 3.6, "grad_norm": 8.846284866333008, "learning_rate": 3.230808186124103e-06, "loss": 0.0537, "step": 366500 }, { "epoch": 3.6, "grad_norm": 5.008912086486816, "learning_rate": 3.2306840636698546e-06, "loss": 0.1799, "step": 366525 }, { "epoch": 3.6, "grad_norm": 11.662890434265137, "learning_rate": 3.2305599412156058e-06, "loss": 0.0804, "step": 366550 }, { "epoch": 3.6, "grad_norm": 6.504528045654297, "learning_rate": 3.230435818761358e-06, "loss": 0.1693, "step": 366575 }, { "epoch": 3.6, "grad_norm": 11.371777534484863, "learning_rate": 3.230311696307109e-06, "loss": 0.0628, "step": 366600 }, { "epoch": 3.6, "grad_norm": 4.598473072052002, "learning_rate": 3.23019253875103e-06, "loss": 0.2274, "step": 366625 }, { "epoch": 3.6, "grad_norm": 4.789393424987793, "learning_rate": 3.230068416296782e-06, "loss": 0.0851, "step": 366650 }, { "epoch": 3.61, "grad_norm": 3.982645034790039, "learning_rate": 3.2299442938425333e-06, "loss": 0.1793, "step": 366675 }, { "epoch": 3.61, "grad_norm": 9.575037002563477, "learning_rate": 3.229820171388285e-06, "loss": 0.0716, "step": 366700 }, { "epoch": 3.61, "grad_norm": 6.834857940673828, "learning_rate": 3.2296960489340366e-06, "loss": 0.2449, "step": 366725 }, { "epoch": 3.61, "grad_norm": 9.17498779296875, "learning_rate": 3.2295719264797882e-06, "loss": 0.0723, "step": 366750 }, { "epoch": 3.61, "grad_norm": 7.652414798736572, "learning_rate": 3.2294478040255395e-06, "loss": 0.2168, "step": 366775 }, { "epoch": 3.61, "grad_norm": 8.094221115112305, "learning_rate": 3.2293236815712915e-06, "loss": 0.0741, "step": 366800 }, { "epoch": 3.61, "grad_norm": 6.85066032409668, "learning_rate": 3.2291995591170427e-06, "loss": 0.1735, "step": 366825 }, { "epoch": 3.61, "grad_norm": 6.502073764801025, "learning_rate": 3.229075436662794e-06, "loss": 0.062, "step": 366850 }, { "epoch": 3.61, "grad_norm": 3.771157741546631, "learning_rate": 3.228951314208546e-06, "loss": 0.1529, "step": 366875 }, { "epoch": 3.61, "grad_norm": 6.731177806854248, "learning_rate": 3.228827191754297e-06, "loss": 0.0641, "step": 366900 }, { "epoch": 3.61, "grad_norm": 4.728751182556152, "learning_rate": 3.228703069300049e-06, "loss": 0.1829, "step": 366925 }, { "epoch": 3.61, "grad_norm": 9.109325408935547, "learning_rate": 3.2285789468458005e-06, "loss": 0.0733, "step": 366950 }, { "epoch": 3.61, "grad_norm": 6.755912780761719, "learning_rate": 3.228454824391552e-06, "loss": 0.1502, "step": 366975 }, { "epoch": 3.61, "grad_norm": 11.426131248474121, "learning_rate": 3.2283307019373033e-06, "loss": 0.0753, "step": 367000 }, { "epoch": 3.61, "grad_norm": 6.721951007843018, "learning_rate": 3.2282065794830554e-06, "loss": 0.1782, "step": 367025 }, { "epoch": 3.61, "grad_norm": 9.099028587341309, "learning_rate": 3.2280824570288066e-06, "loss": 0.0778, "step": 367050 }, { "epoch": 3.61, "grad_norm": 4.238882541656494, "learning_rate": 3.227958334574558e-06, "loss": 0.17, "step": 367075 }, { "epoch": 3.61, "grad_norm": 9.553489685058594, "learning_rate": 3.22783421212031e-06, "loss": 0.0725, "step": 367100 }, { "epoch": 3.61, "grad_norm": 3.7868430614471436, "learning_rate": 3.227710089666061e-06, "loss": 0.1727, "step": 367125 }, { "epoch": 3.61, "grad_norm": 8.515766143798828, "learning_rate": 3.2275859672118127e-06, "loss": 0.0877, "step": 367150 }, { "epoch": 3.61, "grad_norm": 6.391822814941406, "learning_rate": 3.2274618447575643e-06, "loss": 0.1802, "step": 367175 }, { "epoch": 3.61, "grad_norm": 7.665640354156494, "learning_rate": 3.227337722303316e-06, "loss": 0.0657, "step": 367200 }, { "epoch": 3.61, "grad_norm": 4.51314640045166, "learning_rate": 3.227213599849067e-06, "loss": 0.1986, "step": 367225 }, { "epoch": 3.61, "grad_norm": 8.820792198181152, "learning_rate": 3.2270894773948192e-06, "loss": 0.0668, "step": 367250 }, { "epoch": 3.61, "grad_norm": 6.212576389312744, "learning_rate": 3.2269653549405705e-06, "loss": 0.1956, "step": 367275 }, { "epoch": 3.61, "grad_norm": 10.434452056884766, "learning_rate": 3.2268412324863217e-06, "loss": 0.0708, "step": 367300 }, { "epoch": 3.61, "grad_norm": 4.536642551422119, "learning_rate": 3.2267171100320737e-06, "loss": 0.1684, "step": 367325 }, { "epoch": 3.61, "grad_norm": 14.187355041503906, "learning_rate": 3.226592987577825e-06, "loss": 0.0784, "step": 367350 }, { "epoch": 3.61, "grad_norm": 4.400550842285156, "learning_rate": 3.2264688651235766e-06, "loss": 0.2295, "step": 367375 }, { "epoch": 3.61, "grad_norm": 8.013769149780273, "learning_rate": 3.2263447426693278e-06, "loss": 0.0597, "step": 367400 }, { "epoch": 3.61, "grad_norm": 6.405177116394043, "learning_rate": 3.22622062021508e-06, "loss": 0.2164, "step": 367425 }, { "epoch": 3.61, "grad_norm": 18.390174865722656, "learning_rate": 3.226096497760831e-06, "loss": 0.0628, "step": 367450 }, { "epoch": 3.61, "grad_norm": 3.2571916580200195, "learning_rate": 3.2259723753065823e-06, "loss": 0.2059, "step": 367475 }, { "epoch": 3.61, "grad_norm": 8.989850997924805, "learning_rate": 3.2258482528523343e-06, "loss": 0.0733, "step": 367500 }, { "epoch": 3.61, "grad_norm": 2.755059003829956, "learning_rate": 3.2257241303980855e-06, "loss": 0.1928, "step": 367525 }, { "epoch": 3.61, "grad_norm": 11.324423789978027, "learning_rate": 3.225600007943837e-06, "loss": 0.0629, "step": 367550 }, { "epoch": 3.61, "grad_norm": 3.643638849258423, "learning_rate": 3.225475885489589e-06, "loss": 0.182, "step": 367575 }, { "epoch": 3.61, "grad_norm": 8.036615371704102, "learning_rate": 3.2253517630353404e-06, "loss": 0.0677, "step": 367600 }, { "epoch": 3.61, "grad_norm": 4.759182453155518, "learning_rate": 3.2252276405810917e-06, "loss": 0.1717, "step": 367625 }, { "epoch": 3.61, "grad_norm": 4.188045978546143, "learning_rate": 3.2251035181268437e-06, "loss": 0.0733, "step": 367650 }, { "epoch": 3.62, "grad_norm": 4.9794745445251465, "learning_rate": 3.224979395672595e-06, "loss": 0.1955, "step": 367675 }, { "epoch": 3.62, "grad_norm": 7.889553546905518, "learning_rate": 3.2248552732183466e-06, "loss": 0.0896, "step": 367700 }, { "epoch": 3.62, "grad_norm": 6.194143772125244, "learning_rate": 3.224731150764098e-06, "loss": 0.1654, "step": 367725 }, { "epoch": 3.62, "grad_norm": 8.411657333374023, "learning_rate": 3.22460702830985e-06, "loss": 0.0652, "step": 367750 }, { "epoch": 3.62, "grad_norm": 5.970427513122559, "learning_rate": 3.224482905855601e-06, "loss": 0.1717, "step": 367775 }, { "epoch": 3.62, "grad_norm": 6.135511875152588, "learning_rate": 3.224358783401353e-06, "loss": 0.0853, "step": 367800 }, { "epoch": 3.62, "grad_norm": 5.669948101043701, "learning_rate": 3.2242346609471043e-06, "loss": 0.2098, "step": 367825 }, { "epoch": 3.62, "grad_norm": 6.332712173461914, "learning_rate": 3.2241105384928555e-06, "loss": 0.062, "step": 367850 }, { "epoch": 3.62, "grad_norm": 6.902746200561523, "learning_rate": 3.2239864160386076e-06, "loss": 0.2174, "step": 367875 }, { "epoch": 3.62, "grad_norm": 11.509725570678711, "learning_rate": 3.223862293584359e-06, "loss": 0.0737, "step": 367900 }, { "epoch": 3.62, "grad_norm": 4.900585651397705, "learning_rate": 3.2237381711301104e-06, "loss": 0.1803, "step": 367925 }, { "epoch": 3.62, "grad_norm": 5.965666770935059, "learning_rate": 3.223614048675862e-06, "loss": 0.0812, "step": 367950 }, { "epoch": 3.62, "grad_norm": 4.986674785614014, "learning_rate": 3.2234899262216137e-06, "loss": 0.1958, "step": 367975 }, { "epoch": 3.62, "grad_norm": 4.210200786590576, "learning_rate": 3.223365803767365e-06, "loss": 0.0731, "step": 368000 }, { "epoch": 3.62, "grad_norm": 3.8845014572143555, "learning_rate": 3.223241681313117e-06, "loss": 0.1342, "step": 368025 }, { "epoch": 3.62, "grad_norm": 30.64275360107422, "learning_rate": 3.223117558858868e-06, "loss": 0.084, "step": 368050 }, { "epoch": 3.62, "grad_norm": 5.216312885284424, "learning_rate": 3.2229934364046194e-06, "loss": 0.1966, "step": 368075 }, { "epoch": 3.62, "grad_norm": 13.669443130493164, "learning_rate": 3.2228693139503714e-06, "loss": 0.0783, "step": 368100 }, { "epoch": 3.62, "grad_norm": 12.20191764831543, "learning_rate": 3.2227451914961227e-06, "loss": 0.1991, "step": 368125 }, { "epoch": 3.62, "grad_norm": 21.36590003967285, "learning_rate": 3.2226210690418743e-06, "loss": 0.0678, "step": 368150 }, { "epoch": 3.62, "grad_norm": 3.1814937591552734, "learning_rate": 3.222496946587626e-06, "loss": 0.2397, "step": 368175 }, { "epoch": 3.62, "grad_norm": 6.667595863342285, "learning_rate": 3.2223728241333776e-06, "loss": 0.0827, "step": 368200 }, { "epoch": 3.62, "grad_norm": 6.608492374420166, "learning_rate": 3.2222487016791288e-06, "loss": 0.1905, "step": 368225 }, { "epoch": 3.62, "grad_norm": 8.062427520751953, "learning_rate": 3.22212457922488e-06, "loss": 0.0808, "step": 368250 }, { "epoch": 3.62, "grad_norm": 4.241211414337158, "learning_rate": 3.222000456770632e-06, "loss": 0.1482, "step": 368275 }, { "epoch": 3.62, "grad_norm": 9.099533081054688, "learning_rate": 3.2218763343163833e-06, "loss": 0.0782, "step": 368300 }, { "epoch": 3.62, "grad_norm": 7.046987533569336, "learning_rate": 3.221752211862135e-06, "loss": 0.2008, "step": 368325 }, { "epoch": 3.62, "grad_norm": 8.046611785888672, "learning_rate": 3.2216280894078865e-06, "loss": 0.0686, "step": 368350 }, { "epoch": 3.62, "grad_norm": 5.144519329071045, "learning_rate": 3.221503966953638e-06, "loss": 0.18, "step": 368375 }, { "epoch": 3.62, "grad_norm": 5.859400272369385, "learning_rate": 3.2213798444993894e-06, "loss": 0.0685, "step": 368400 }, { "epoch": 3.62, "grad_norm": 4.676197528839111, "learning_rate": 3.2212557220451414e-06, "loss": 0.2076, "step": 368425 }, { "epoch": 3.62, "grad_norm": 11.423460960388184, "learning_rate": 3.2211315995908926e-06, "loss": 0.054, "step": 368450 }, { "epoch": 3.62, "grad_norm": 4.9422831535339355, "learning_rate": 3.221007477136644e-06, "loss": 0.2016, "step": 368475 }, { "epoch": 3.62, "grad_norm": 7.635842323303223, "learning_rate": 3.220883354682396e-06, "loss": 0.0825, "step": 368500 }, { "epoch": 3.62, "grad_norm": 2.6161210536956787, "learning_rate": 3.220759232228147e-06, "loss": 0.1865, "step": 368525 }, { "epoch": 3.62, "grad_norm": 8.908413887023926, "learning_rate": 3.2206351097738988e-06, "loss": 0.0924, "step": 368550 }, { "epoch": 3.62, "grad_norm": 3.5132739543914795, "learning_rate": 3.2205109873196504e-06, "loss": 0.172, "step": 368575 }, { "epoch": 3.62, "grad_norm": 13.462047576904297, "learning_rate": 3.220386864865402e-06, "loss": 0.0817, "step": 368600 }, { "epoch": 3.62, "grad_norm": 4.558047294616699, "learning_rate": 3.2202627424111532e-06, "loss": 0.1694, "step": 368625 }, { "epoch": 3.62, "grad_norm": 10.642518997192383, "learning_rate": 3.2201386199569053e-06, "loss": 0.0838, "step": 368650 }, { "epoch": 3.62, "grad_norm": 4.520395755767822, "learning_rate": 3.2200144975026565e-06, "loss": 0.2051, "step": 368675 }, { "epoch": 3.63, "grad_norm": 9.532642364501953, "learning_rate": 3.2198903750484077e-06, "loss": 0.0563, "step": 368700 }, { "epoch": 3.63, "grad_norm": 4.595311164855957, "learning_rate": 3.2197712174923296e-06, "loss": 0.2132, "step": 368725 }, { "epoch": 3.63, "grad_norm": 5.553379535675049, "learning_rate": 3.219647095038081e-06, "loss": 0.0782, "step": 368750 }, { "epoch": 3.63, "grad_norm": 4.248503684997559, "learning_rate": 3.219522972583833e-06, "loss": 0.201, "step": 368775 }, { "epoch": 3.63, "grad_norm": 4.282970905303955, "learning_rate": 3.219398850129584e-06, "loss": 0.0758, "step": 368800 }, { "epoch": 3.63, "grad_norm": 3.854887008666992, "learning_rate": 3.2192747276753357e-06, "loss": 0.1661, "step": 368825 }, { "epoch": 3.63, "grad_norm": 9.812859535217285, "learning_rate": 3.219150605221087e-06, "loss": 0.0761, "step": 368850 }, { "epoch": 3.63, "grad_norm": 5.673504829406738, "learning_rate": 3.219026482766839e-06, "loss": 0.2041, "step": 368875 }, { "epoch": 3.63, "grad_norm": 5.279510974884033, "learning_rate": 3.21890236031259e-06, "loss": 0.0589, "step": 368900 }, { "epoch": 3.63, "grad_norm": 3.7148003578186035, "learning_rate": 3.2187782378583414e-06, "loss": 0.1935, "step": 368925 }, { "epoch": 3.63, "grad_norm": 7.027836799621582, "learning_rate": 3.2186541154040935e-06, "loss": 0.0655, "step": 368950 }, { "epoch": 3.63, "grad_norm": 3.963027000427246, "learning_rate": 3.2185299929498447e-06, "loss": 0.1645, "step": 368975 }, { "epoch": 3.63, "grad_norm": 20.421329498291016, "learning_rate": 3.2184058704955963e-06, "loss": 0.0653, "step": 369000 }, { "epoch": 3.63, "grad_norm": 4.095364570617676, "learning_rate": 3.218281748041348e-06, "loss": 0.1727, "step": 369025 }, { "epoch": 3.63, "grad_norm": 10.365267753601074, "learning_rate": 3.2181576255870996e-06, "loss": 0.0759, "step": 369050 }, { "epoch": 3.63, "grad_norm": 5.599184989929199, "learning_rate": 3.218033503132851e-06, "loss": 0.1784, "step": 369075 }, { "epoch": 3.63, "grad_norm": 7.442800521850586, "learning_rate": 3.217909380678603e-06, "loss": 0.0826, "step": 369100 }, { "epoch": 3.63, "grad_norm": 4.936265468597412, "learning_rate": 3.217785258224354e-06, "loss": 0.199, "step": 369125 }, { "epoch": 3.63, "grad_norm": 4.280983924865723, "learning_rate": 3.2176611357701053e-06, "loss": 0.0795, "step": 369150 }, { "epoch": 3.63, "grad_norm": 4.665860176086426, "learning_rate": 3.2175370133158573e-06, "loss": 0.2056, "step": 369175 }, { "epoch": 3.63, "grad_norm": 5.977356433868408, "learning_rate": 3.2174128908616085e-06, "loss": 0.0616, "step": 369200 }, { "epoch": 3.63, "grad_norm": 4.173527240753174, "learning_rate": 3.21728876840736e-06, "loss": 0.198, "step": 369225 }, { "epoch": 3.63, "grad_norm": 10.708892822265625, "learning_rate": 3.217164645953112e-06, "loss": 0.0709, "step": 369250 }, { "epoch": 3.63, "grad_norm": 3.8907477855682373, "learning_rate": 3.2170405234988634e-06, "loss": 0.1458, "step": 369275 }, { "epoch": 3.63, "grad_norm": 9.305432319641113, "learning_rate": 3.2169164010446147e-06, "loss": 0.0737, "step": 369300 }, { "epoch": 3.63, "grad_norm": 4.783461570739746, "learning_rate": 3.2167922785903667e-06, "loss": 0.172, "step": 369325 }, { "epoch": 3.63, "grad_norm": 5.950185775756836, "learning_rate": 3.216668156136118e-06, "loss": 0.0683, "step": 369350 }, { "epoch": 3.63, "grad_norm": 3.8833606243133545, "learning_rate": 3.216544033681869e-06, "loss": 0.1635, "step": 369375 }, { "epoch": 3.63, "grad_norm": 7.849308490753174, "learning_rate": 3.216419911227621e-06, "loss": 0.0873, "step": 369400 }, { "epoch": 3.63, "grad_norm": 5.3890838623046875, "learning_rate": 3.2162957887733724e-06, "loss": 0.2257, "step": 369425 }, { "epoch": 3.63, "grad_norm": 3.9782986640930176, "learning_rate": 3.216171666319124e-06, "loss": 0.0869, "step": 369450 }, { "epoch": 3.63, "grad_norm": 4.622565746307373, "learning_rate": 3.2160475438648757e-06, "loss": 0.1885, "step": 369475 }, { "epoch": 3.63, "grad_norm": 9.844386100769043, "learning_rate": 3.2159234214106273e-06, "loss": 0.0745, "step": 369500 }, { "epoch": 3.63, "grad_norm": 3.884800434112549, "learning_rate": 3.2157992989563785e-06, "loss": 0.2007, "step": 369525 }, { "epoch": 3.63, "grad_norm": 12.251104354858398, "learning_rate": 3.2156751765021306e-06, "loss": 0.0629, "step": 369550 }, { "epoch": 3.63, "grad_norm": 3.381230115890503, "learning_rate": 3.215551054047882e-06, "loss": 0.193, "step": 369575 }, { "epoch": 3.63, "grad_norm": 3.2453010082244873, "learning_rate": 3.215426931593633e-06, "loss": 0.077, "step": 369600 }, { "epoch": 3.63, "grad_norm": 4.480743885040283, "learning_rate": 3.215302809139385e-06, "loss": 0.1636, "step": 369625 }, { "epoch": 3.63, "grad_norm": 3.4297423362731934, "learning_rate": 3.2151786866851363e-06, "loss": 0.0626, "step": 369650 }, { "epoch": 3.63, "grad_norm": 4.052199363708496, "learning_rate": 3.215054564230888e-06, "loss": 0.2023, "step": 369675 }, { "epoch": 3.63, "grad_norm": 48.096439361572266, "learning_rate": 3.214930441776639e-06, "loss": 0.084, "step": 369700 }, { "epoch": 3.64, "grad_norm": 6.590548515319824, "learning_rate": 3.214806319322391e-06, "loss": 0.1984, "step": 369725 }, { "epoch": 3.64, "grad_norm": 5.294250965118408, "learning_rate": 3.2146821968681424e-06, "loss": 0.0835, "step": 369750 }, { "epoch": 3.64, "grad_norm": 5.473986625671387, "learning_rate": 3.2145580744138936e-06, "loss": 0.2052, "step": 369775 }, { "epoch": 3.64, "grad_norm": 2.942484140396118, "learning_rate": 3.2144339519596457e-06, "loss": 0.0721, "step": 369800 }, { "epoch": 3.64, "grad_norm": 5.287282466888428, "learning_rate": 3.214309829505397e-06, "loss": 0.1479, "step": 369825 }, { "epoch": 3.64, "grad_norm": 10.231524467468262, "learning_rate": 3.2141857070511485e-06, "loss": 0.0664, "step": 369850 }, { "epoch": 3.64, "grad_norm": 6.049152374267578, "learning_rate": 3.2140615845969e-06, "loss": 0.1243, "step": 369875 }, { "epoch": 3.64, "grad_norm": 13.086397171020508, "learning_rate": 3.2139374621426518e-06, "loss": 0.0893, "step": 369900 }, { "epoch": 3.64, "grad_norm": 5.468204498291016, "learning_rate": 3.213813339688403e-06, "loss": 0.17, "step": 369925 }, { "epoch": 3.64, "grad_norm": 11.915205955505371, "learning_rate": 3.213689217234155e-06, "loss": 0.0556, "step": 369950 }, { "epoch": 3.64, "grad_norm": 6.123107433319092, "learning_rate": 3.2135650947799063e-06, "loss": 0.1925, "step": 369975 }, { "epoch": 3.64, "grad_norm": 8.733348846435547, "learning_rate": 3.2134409723256575e-06, "loss": 0.082, "step": 370000 }, { "epoch": 3.64, "grad_norm": 5.7253828048706055, "learning_rate": 3.2133168498714095e-06, "loss": 0.1972, "step": 370025 }, { "epoch": 3.64, "grad_norm": 10.202359199523926, "learning_rate": 3.2131927274171607e-06, "loss": 0.0729, "step": 370050 }, { "epoch": 3.64, "grad_norm": 5.6270751953125, "learning_rate": 3.2130686049629124e-06, "loss": 0.1895, "step": 370075 }, { "epoch": 3.64, "grad_norm": 5.994744777679443, "learning_rate": 3.212944482508664e-06, "loss": 0.0838, "step": 370100 }, { "epoch": 3.64, "grad_norm": 5.390081882476807, "learning_rate": 3.2128203600544156e-06, "loss": 0.1842, "step": 370125 }, { "epoch": 3.64, "grad_norm": 9.5276517868042, "learning_rate": 3.212696237600167e-06, "loss": 0.0674, "step": 370150 }, { "epoch": 3.64, "grad_norm": 3.616344451904297, "learning_rate": 3.212572115145919e-06, "loss": 0.2158, "step": 370175 }, { "epoch": 3.64, "grad_norm": 1.7559449672698975, "learning_rate": 3.21244799269167e-06, "loss": 0.0661, "step": 370200 }, { "epoch": 3.64, "grad_norm": 4.809659004211426, "learning_rate": 3.2123238702374213e-06, "loss": 0.1823, "step": 370225 }, { "epoch": 3.64, "grad_norm": 6.841085433959961, "learning_rate": 3.2121997477831734e-06, "loss": 0.0555, "step": 370250 }, { "epoch": 3.64, "grad_norm": 8.15595817565918, "learning_rate": 3.2120756253289246e-06, "loss": 0.1574, "step": 370275 }, { "epoch": 3.64, "grad_norm": 8.526254653930664, "learning_rate": 3.2119515028746762e-06, "loss": 0.0389, "step": 370300 }, { "epoch": 3.64, "grad_norm": 5.207616806030273, "learning_rate": 3.211827380420428e-06, "loss": 0.1693, "step": 370325 }, { "epoch": 3.64, "grad_norm": 7.684842109680176, "learning_rate": 3.2117032579661795e-06, "loss": 0.0701, "step": 370350 }, { "epoch": 3.64, "grad_norm": 5.048071384429932, "learning_rate": 3.2115791355119307e-06, "loss": 0.2024, "step": 370375 }, { "epoch": 3.64, "grad_norm": 4.94711446762085, "learning_rate": 3.2114550130576828e-06, "loss": 0.0665, "step": 370400 }, { "epoch": 3.64, "grad_norm": 3.6445658206939697, "learning_rate": 3.211330890603434e-06, "loss": 0.1753, "step": 370425 }, { "epoch": 3.64, "grad_norm": 10.802289009094238, "learning_rate": 3.211206768149185e-06, "loss": 0.0702, "step": 370450 }, { "epoch": 3.64, "grad_norm": 4.356605529785156, "learning_rate": 3.2110826456949373e-06, "loss": 0.1507, "step": 370475 }, { "epoch": 3.64, "grad_norm": 10.416452407836914, "learning_rate": 3.2109585232406885e-06, "loss": 0.0687, "step": 370500 }, { "epoch": 3.64, "grad_norm": 4.954438209533691, "learning_rate": 3.21083440078644e-06, "loss": 0.1488, "step": 370525 }, { "epoch": 3.64, "grad_norm": 10.225825309753418, "learning_rate": 3.2107102783321913e-06, "loss": 0.0846, "step": 370550 }, { "epoch": 3.64, "grad_norm": 6.398379802703857, "learning_rate": 3.2105861558779434e-06, "loss": 0.189, "step": 370575 }, { "epoch": 3.64, "grad_norm": 13.50217342376709, "learning_rate": 3.2104620334236946e-06, "loss": 0.0869, "step": 370600 }, { "epoch": 3.64, "grad_norm": 4.872317314147949, "learning_rate": 3.2103379109694462e-06, "loss": 0.2121, "step": 370625 }, { "epoch": 3.64, "grad_norm": 12.726308822631836, "learning_rate": 3.210213788515198e-06, "loss": 0.0698, "step": 370650 }, { "epoch": 3.64, "grad_norm": 6.456186771392822, "learning_rate": 3.2100896660609495e-06, "loss": 0.187, "step": 370675 }, { "epoch": 3.64, "grad_norm": 12.575498580932617, "learning_rate": 3.2099655436067007e-06, "loss": 0.1037, "step": 370700 }, { "epoch": 3.65, "grad_norm": 5.177056312561035, "learning_rate": 3.2098414211524528e-06, "loss": 0.2332, "step": 370725 }, { "epoch": 3.65, "grad_norm": 10.526264190673828, "learning_rate": 3.209717298698204e-06, "loss": 0.0799, "step": 370750 }, { "epoch": 3.65, "grad_norm": 4.778608322143555, "learning_rate": 3.209593176243955e-06, "loss": 0.1887, "step": 370775 }, { "epoch": 3.65, "grad_norm": 9.78917121887207, "learning_rate": 3.2094690537897073e-06, "loss": 0.0666, "step": 370800 }, { "epoch": 3.65, "grad_norm": 12.06910228729248, "learning_rate": 3.2093449313354585e-06, "loss": 0.1837, "step": 370825 }, { "epoch": 3.65, "grad_norm": 13.74515438079834, "learning_rate": 3.20922080888121e-06, "loss": 0.0655, "step": 370850 }, { "epoch": 3.65, "grad_norm": 7.506231784820557, "learning_rate": 3.2090966864269617e-06, "loss": 0.1543, "step": 370875 }, { "epoch": 3.65, "grad_norm": 3.9011454582214355, "learning_rate": 3.2089725639727134e-06, "loss": 0.0575, "step": 370900 }, { "epoch": 3.65, "grad_norm": 5.5109100341796875, "learning_rate": 3.2088484415184646e-06, "loss": 0.1795, "step": 370925 }, { "epoch": 3.65, "grad_norm": 6.035251140594482, "learning_rate": 3.2087243190642166e-06, "loss": 0.0736, "step": 370950 }, { "epoch": 3.65, "grad_norm": 5.020644664764404, "learning_rate": 3.208600196609968e-06, "loss": 0.1355, "step": 370975 }, { "epoch": 3.65, "grad_norm": 9.862815856933594, "learning_rate": 3.208476074155719e-06, "loss": 0.054, "step": 371000 }, { "epoch": 3.65, "grad_norm": 4.076132297515869, "learning_rate": 3.208351951701471e-06, "loss": 0.1747, "step": 371025 }, { "epoch": 3.65, "grad_norm": 2.98380970954895, "learning_rate": 3.2082278292472223e-06, "loss": 0.0666, "step": 371050 }, { "epoch": 3.65, "grad_norm": 5.612473487854004, "learning_rate": 3.208103706792974e-06, "loss": 0.2368, "step": 371075 }, { "epoch": 3.65, "grad_norm": 9.80955982208252, "learning_rate": 3.2079795843387256e-06, "loss": 0.0939, "step": 371100 }, { "epoch": 3.65, "grad_norm": 3.8570613861083984, "learning_rate": 3.2078554618844772e-06, "loss": 0.1611, "step": 371125 }, { "epoch": 3.65, "grad_norm": 4.650700092315674, "learning_rate": 3.2077313394302284e-06, "loss": 0.0892, "step": 371150 }, { "epoch": 3.65, "grad_norm": 6.018450736999512, "learning_rate": 3.2076121818741503e-06, "loss": 0.1889, "step": 371175 }, { "epoch": 3.65, "grad_norm": 11.188632011413574, "learning_rate": 3.2074880594199015e-06, "loss": 0.0782, "step": 371200 }, { "epoch": 3.65, "grad_norm": 4.8361029624938965, "learning_rate": 3.2073639369656527e-06, "loss": 0.2343, "step": 371225 }, { "epoch": 3.65, "grad_norm": 2.2236461639404297, "learning_rate": 3.207239814511405e-06, "loss": 0.0701, "step": 371250 }, { "epoch": 3.65, "grad_norm": 3.378352403640747, "learning_rate": 3.207115692057156e-06, "loss": 0.2025, "step": 371275 }, { "epoch": 3.65, "grad_norm": 10.628881454467773, "learning_rate": 3.2069915696029076e-06, "loss": 0.0853, "step": 371300 }, { "epoch": 3.65, "grad_norm": 5.210610389709473, "learning_rate": 3.2068674471486593e-06, "loss": 0.1928, "step": 371325 }, { "epoch": 3.65, "grad_norm": 9.351400375366211, "learning_rate": 3.206743324694411e-06, "loss": 0.0905, "step": 371350 }, { "epoch": 3.65, "grad_norm": 5.213812351226807, "learning_rate": 3.206619202240162e-06, "loss": 0.1606, "step": 371375 }, { "epoch": 3.65, "grad_norm": 8.860499382019043, "learning_rate": 3.206495079785914e-06, "loss": 0.071, "step": 371400 }, { "epoch": 3.65, "grad_norm": 7.930665969848633, "learning_rate": 3.2063709573316654e-06, "loss": 0.2066, "step": 371425 }, { "epoch": 3.65, "grad_norm": 11.080510139465332, "learning_rate": 3.2062468348774166e-06, "loss": 0.0814, "step": 371450 }, { "epoch": 3.65, "grad_norm": 6.824382781982422, "learning_rate": 3.2061227124231687e-06, "loss": 0.2118, "step": 371475 }, { "epoch": 3.65, "grad_norm": 7.254294395446777, "learning_rate": 3.20599858996892e-06, "loss": 0.0657, "step": 371500 }, { "epoch": 3.65, "grad_norm": 5.369226455688477, "learning_rate": 3.2058744675146715e-06, "loss": 0.1727, "step": 371525 }, { "epoch": 3.65, "grad_norm": 14.592333793640137, "learning_rate": 3.205750345060423e-06, "loss": 0.085, "step": 371550 }, { "epoch": 3.65, "grad_norm": 3.2504262924194336, "learning_rate": 3.2056262226061748e-06, "loss": 0.163, "step": 371575 }, { "epoch": 3.65, "grad_norm": 12.437309265136719, "learning_rate": 3.205502100151926e-06, "loss": 0.0644, "step": 371600 }, { "epoch": 3.65, "grad_norm": 5.4055399894714355, "learning_rate": 3.205377977697678e-06, "loss": 0.1512, "step": 371625 }, { "epoch": 3.65, "grad_norm": 5.650587558746338, "learning_rate": 3.2052538552434293e-06, "loss": 0.0651, "step": 371650 }, { "epoch": 3.65, "grad_norm": 7.5298285484313965, "learning_rate": 3.2051297327891805e-06, "loss": 0.1864, "step": 371675 }, { "epoch": 3.65, "grad_norm": 2.9615249633789062, "learning_rate": 3.2050056103349325e-06, "loss": 0.077, "step": 371700 }, { "epoch": 3.65, "grad_norm": 4.2500081062316895, "learning_rate": 3.2048814878806837e-06, "loss": 0.1651, "step": 371725 }, { "epoch": 3.66, "grad_norm": 13.435980796813965, "learning_rate": 3.2047573654264354e-06, "loss": 0.1011, "step": 371750 }, { "epoch": 3.66, "grad_norm": 4.017437934875488, "learning_rate": 3.204633242972187e-06, "loss": 0.1789, "step": 371775 }, { "epoch": 3.66, "grad_norm": 6.86637020111084, "learning_rate": 3.2045091205179387e-06, "loss": 0.0685, "step": 371800 }, { "epoch": 3.66, "grad_norm": 6.343677997589111, "learning_rate": 3.20438499806369e-06, "loss": 0.198, "step": 371825 }, { "epoch": 3.66, "grad_norm": 10.696574211120605, "learning_rate": 3.204260875609442e-06, "loss": 0.0845, "step": 371850 }, { "epoch": 3.66, "grad_norm": 4.1771063804626465, "learning_rate": 3.204136753155193e-06, "loss": 0.1752, "step": 371875 }, { "epoch": 3.66, "grad_norm": 10.147770881652832, "learning_rate": 3.2040126307009443e-06, "loss": 0.0817, "step": 371900 }, { "epoch": 3.66, "grad_norm": 4.132112503051758, "learning_rate": 3.2038885082466964e-06, "loss": 0.1703, "step": 371925 }, { "epoch": 3.66, "grad_norm": 6.909457206726074, "learning_rate": 3.2037643857924476e-06, "loss": 0.0781, "step": 371950 }, { "epoch": 3.66, "grad_norm": 10.077372550964355, "learning_rate": 3.2036402633381993e-06, "loss": 0.1311, "step": 371975 }, { "epoch": 3.66, "grad_norm": 5.786023139953613, "learning_rate": 3.2035161408839505e-06, "loss": 0.0673, "step": 372000 }, { "epoch": 3.66, "grad_norm": 5.346753120422363, "learning_rate": 3.2033920184297025e-06, "loss": 0.1996, "step": 372025 }, { "epoch": 3.66, "grad_norm": 7.378774642944336, "learning_rate": 3.2032678959754537e-06, "loss": 0.081, "step": 372050 }, { "epoch": 3.66, "grad_norm": 6.772475719451904, "learning_rate": 3.203143773521205e-06, "loss": 0.1865, "step": 372075 }, { "epoch": 3.66, "grad_norm": 5.843884468078613, "learning_rate": 3.203019651066957e-06, "loss": 0.0708, "step": 372100 }, { "epoch": 3.66, "grad_norm": 5.753381252288818, "learning_rate": 3.2028955286127082e-06, "loss": 0.1687, "step": 372125 }, { "epoch": 3.66, "grad_norm": 9.260226249694824, "learning_rate": 3.20277140615846e-06, "loss": 0.0707, "step": 372150 }, { "epoch": 3.66, "grad_norm": 4.56403112411499, "learning_rate": 3.2026472837042115e-06, "loss": 0.1753, "step": 372175 }, { "epoch": 3.66, "grad_norm": 10.314858436584473, "learning_rate": 3.202523161249963e-06, "loss": 0.0541, "step": 372200 }, { "epoch": 3.66, "grad_norm": 6.108316421508789, "learning_rate": 3.2023990387957143e-06, "loss": 0.1778, "step": 372225 }, { "epoch": 3.66, "grad_norm": 9.125669479370117, "learning_rate": 3.2022749163414664e-06, "loss": 0.0505, "step": 372250 }, { "epoch": 3.66, "grad_norm": 4.553563594818115, "learning_rate": 3.2021507938872176e-06, "loss": 0.1575, "step": 372275 }, { "epoch": 3.66, "grad_norm": 15.36720085144043, "learning_rate": 3.202026671432969e-06, "loss": 0.0707, "step": 372300 }, { "epoch": 3.66, "grad_norm": 4.975017547607422, "learning_rate": 3.201902548978721e-06, "loss": 0.1789, "step": 372325 }, { "epoch": 3.66, "grad_norm": 15.979297637939453, "learning_rate": 3.201778426524472e-06, "loss": 0.0701, "step": 372350 }, { "epoch": 3.66, "grad_norm": 5.453180313110352, "learning_rate": 3.2016543040702237e-06, "loss": 0.2112, "step": 372375 }, { "epoch": 3.66, "grad_norm": 10.305990219116211, "learning_rate": 3.2015301816159754e-06, "loss": 0.0681, "step": 372400 }, { "epoch": 3.66, "grad_norm": 5.478568077087402, "learning_rate": 3.201406059161727e-06, "loss": 0.1966, "step": 372425 }, { "epoch": 3.66, "grad_norm": 2.3744378089904785, "learning_rate": 3.201281936707478e-06, "loss": 0.0702, "step": 372450 }, { "epoch": 3.66, "grad_norm": 6.189074516296387, "learning_rate": 3.2011578142532303e-06, "loss": 0.1627, "step": 372475 }, { "epoch": 3.66, "grad_norm": 7.379636764526367, "learning_rate": 3.2010336917989815e-06, "loss": 0.0669, "step": 372500 }, { "epoch": 3.66, "grad_norm": 16.412931442260742, "learning_rate": 3.2009095693447327e-06, "loss": 0.1899, "step": 372525 }, { "epoch": 3.66, "grad_norm": 8.17965030670166, "learning_rate": 3.2007854468904847e-06, "loss": 0.0639, "step": 372550 }, { "epoch": 3.66, "grad_norm": 3.627371311187744, "learning_rate": 3.200661324436236e-06, "loss": 0.1595, "step": 372575 }, { "epoch": 3.66, "grad_norm": 7.422781467437744, "learning_rate": 3.2005372019819876e-06, "loss": 0.0759, "step": 372600 }, { "epoch": 3.66, "grad_norm": 5.245259761810303, "learning_rate": 3.2004130795277392e-06, "loss": 0.1816, "step": 372625 }, { "epoch": 3.66, "grad_norm": 6.65683126449585, "learning_rate": 3.200288957073491e-06, "loss": 0.063, "step": 372650 }, { "epoch": 3.66, "grad_norm": 6.027067184448242, "learning_rate": 3.200164834619242e-06, "loss": 0.2074, "step": 372675 }, { "epoch": 3.66, "grad_norm": 3.2072439193725586, "learning_rate": 3.200040712164994e-06, "loss": 0.0691, "step": 372700 }, { "epoch": 3.66, "grad_norm": 6.035414695739746, "learning_rate": 3.1999165897107453e-06, "loss": 0.182, "step": 372725 }, { "epoch": 3.66, "grad_norm": 12.222636222839355, "learning_rate": 3.1997924672564965e-06, "loss": 0.0962, "step": 372750 }, { "epoch": 3.67, "grad_norm": 10.723688125610352, "learning_rate": 3.1996683448022486e-06, "loss": 0.2254, "step": 372775 }, { "epoch": 3.67, "grad_norm": 10.596097946166992, "learning_rate": 3.199544222348e-06, "loss": 0.0793, "step": 372800 }, { "epoch": 3.67, "grad_norm": 4.617658615112305, "learning_rate": 3.1994200998937515e-06, "loss": 0.1549, "step": 372825 }, { "epoch": 3.67, "grad_norm": 14.50473690032959, "learning_rate": 3.1992959774395027e-06, "loss": 0.0759, "step": 372850 }, { "epoch": 3.67, "grad_norm": 5.134409427642822, "learning_rate": 3.1991718549852547e-06, "loss": 0.173, "step": 372875 }, { "epoch": 3.67, "grad_norm": 14.010078430175781, "learning_rate": 3.199047732531006e-06, "loss": 0.0508, "step": 372900 }, { "epoch": 3.67, "grad_norm": 9.58720874786377, "learning_rate": 3.198923610076757e-06, "loss": 0.2052, "step": 372925 }, { "epoch": 3.67, "grad_norm": 7.600496768951416, "learning_rate": 3.198799487622509e-06, "loss": 0.0884, "step": 372950 }, { "epoch": 3.67, "grad_norm": 4.772557735443115, "learning_rate": 3.1986753651682604e-06, "loss": 0.1715, "step": 372975 }, { "epoch": 3.67, "grad_norm": 7.604077339172363, "learning_rate": 3.198551242714012e-06, "loss": 0.0528, "step": 373000 }, { "epoch": 3.67, "grad_norm": 4.9806623458862305, "learning_rate": 3.1984271202597637e-06, "loss": 0.2047, "step": 373025 }, { "epoch": 3.67, "grad_norm": 10.317906379699707, "learning_rate": 3.1983029978055153e-06, "loss": 0.0744, "step": 373050 }, { "epoch": 3.67, "grad_norm": 4.320055961608887, "learning_rate": 3.1981788753512665e-06, "loss": 0.172, "step": 373075 }, { "epoch": 3.67, "grad_norm": 8.408312797546387, "learning_rate": 3.1980547528970186e-06, "loss": 0.0421, "step": 373100 }, { "epoch": 3.67, "grad_norm": 5.1779937744140625, "learning_rate": 3.19793063044277e-06, "loss": 0.2137, "step": 373125 }, { "epoch": 3.67, "grad_norm": 8.324686050415039, "learning_rate": 3.197806507988521e-06, "loss": 0.0581, "step": 373150 }, { "epoch": 3.67, "grad_norm": 3.7891082763671875, "learning_rate": 3.197682385534273e-06, "loss": 0.2086, "step": 373175 }, { "epoch": 3.67, "grad_norm": 8.719684600830078, "learning_rate": 3.1975582630800243e-06, "loss": 0.0788, "step": 373200 }, { "epoch": 3.67, "grad_norm": 4.267533779144287, "learning_rate": 3.197434140625776e-06, "loss": 0.1641, "step": 373225 }, { "epoch": 3.67, "grad_norm": 8.505784034729004, "learning_rate": 3.1973100181715276e-06, "loss": 0.0757, "step": 373250 }, { "epoch": 3.67, "grad_norm": 5.39232873916626, "learning_rate": 3.197190860615449e-06, "loss": 0.1775, "step": 373275 }, { "epoch": 3.67, "grad_norm": 8.295562744140625, "learning_rate": 3.1970667381612006e-06, "loss": 0.0918, "step": 373300 }, { "epoch": 3.67, "grad_norm": 4.938711166381836, "learning_rate": 3.1969426157069523e-06, "loss": 0.2014, "step": 373325 }, { "epoch": 3.67, "grad_norm": 6.1496453285217285, "learning_rate": 3.1968184932527035e-06, "loss": 0.0712, "step": 373350 }, { "epoch": 3.67, "grad_norm": 5.8728790283203125, "learning_rate": 3.1966943707984555e-06, "loss": 0.1665, "step": 373375 }, { "epoch": 3.67, "grad_norm": 8.901388168334961, "learning_rate": 3.1965702483442068e-06, "loss": 0.0798, "step": 373400 }, { "epoch": 3.67, "grad_norm": 4.148220539093018, "learning_rate": 3.196446125889958e-06, "loss": 0.1654, "step": 373425 }, { "epoch": 3.67, "grad_norm": 7.272785186767578, "learning_rate": 3.1963220034357096e-06, "loss": 0.0645, "step": 373450 }, { "epoch": 3.67, "grad_norm": 2.8343870639801025, "learning_rate": 3.1961978809814612e-06, "loss": 0.1839, "step": 373475 }, { "epoch": 3.67, "grad_norm": 2.7864062786102295, "learning_rate": 3.196073758527213e-06, "loss": 0.0684, "step": 373500 }, { "epoch": 3.67, "grad_norm": 9.818140029907227, "learning_rate": 3.195949636072964e-06, "loss": 0.1858, "step": 373525 }, { "epoch": 3.67, "grad_norm": 9.63556957244873, "learning_rate": 3.195825513618716e-06, "loss": 0.0617, "step": 373550 }, { "epoch": 3.67, "grad_norm": 4.897411346435547, "learning_rate": 3.1957013911644674e-06, "loss": 0.1935, "step": 373575 }, { "epoch": 3.67, "grad_norm": 4.549346446990967, "learning_rate": 3.195577268710219e-06, "loss": 0.0746, "step": 373600 }, { "epoch": 3.67, "grad_norm": 4.345236778259277, "learning_rate": 3.1954531462559706e-06, "loss": 0.1621, "step": 373625 }, { "epoch": 3.67, "grad_norm": 13.931312561035156, "learning_rate": 3.1953290238017223e-06, "loss": 0.0664, "step": 373650 }, { "epoch": 3.67, "grad_norm": 3.875417947769165, "learning_rate": 3.1952049013474735e-06, "loss": 0.1507, "step": 373675 }, { "epoch": 3.67, "grad_norm": 8.751606941223145, "learning_rate": 3.1950807788932255e-06, "loss": 0.0684, "step": 373700 }, { "epoch": 3.67, "grad_norm": 4.939196586608887, "learning_rate": 3.1949566564389767e-06, "loss": 0.1645, "step": 373725 }, { "epoch": 3.67, "grad_norm": 11.887076377868652, "learning_rate": 3.194832533984728e-06, "loss": 0.0988, "step": 373750 }, { "epoch": 3.68, "grad_norm": 2.7735085487365723, "learning_rate": 3.19470841153048e-06, "loss": 0.1822, "step": 373775 }, { "epoch": 3.68, "grad_norm": 6.674606800079346, "learning_rate": 3.1945842890762312e-06, "loss": 0.0666, "step": 373800 }, { "epoch": 3.68, "grad_norm": 7.70475959777832, "learning_rate": 3.194460166621983e-06, "loss": 0.2057, "step": 373825 }, { "epoch": 3.68, "grad_norm": 5.5227370262146, "learning_rate": 3.1943360441677345e-06, "loss": 0.0738, "step": 373850 }, { "epoch": 3.68, "grad_norm": 5.423933506011963, "learning_rate": 3.194211921713486e-06, "loss": 0.1889, "step": 373875 }, { "epoch": 3.68, "grad_norm": 7.266678810119629, "learning_rate": 3.1940877992592373e-06, "loss": 0.0535, "step": 373900 }, { "epoch": 3.68, "grad_norm": 3.2357230186462402, "learning_rate": 3.1939636768049894e-06, "loss": 0.1577, "step": 373925 }, { "epoch": 3.68, "grad_norm": 6.821080207824707, "learning_rate": 3.1938395543507406e-06, "loss": 0.0697, "step": 373950 }, { "epoch": 3.68, "grad_norm": 4.047459602355957, "learning_rate": 3.193715431896492e-06, "loss": 0.1717, "step": 373975 }, { "epoch": 3.68, "grad_norm": 8.932499885559082, "learning_rate": 3.193591309442244e-06, "loss": 0.0779, "step": 374000 }, { "epoch": 3.68, "grad_norm": 4.620066165924072, "learning_rate": 3.193467186987995e-06, "loss": 0.171, "step": 374025 }, { "epoch": 3.68, "grad_norm": 8.141529083251953, "learning_rate": 3.1933430645337467e-06, "loss": 0.0713, "step": 374050 }, { "epoch": 3.68, "grad_norm": 4.040996074676514, "learning_rate": 3.1932189420794984e-06, "loss": 0.1806, "step": 374075 }, { "epoch": 3.68, "grad_norm": 6.234002113342285, "learning_rate": 3.19309481962525e-06, "loss": 0.0708, "step": 374100 }, { "epoch": 3.68, "grad_norm": 5.491024494171143, "learning_rate": 3.192970697171001e-06, "loss": 0.2118, "step": 374125 }, { "epoch": 3.68, "grad_norm": 5.159909248352051, "learning_rate": 3.1928465747167533e-06, "loss": 0.0942, "step": 374150 }, { "epoch": 3.68, "grad_norm": 4.351620674133301, "learning_rate": 3.1927224522625045e-06, "loss": 0.1969, "step": 374175 }, { "epoch": 3.68, "grad_norm": 14.731934547424316, "learning_rate": 3.1925983298082557e-06, "loss": 0.0626, "step": 374200 }, { "epoch": 3.68, "grad_norm": 3.6264231204986572, "learning_rate": 3.1924742073540077e-06, "loss": 0.1684, "step": 374225 }, { "epoch": 3.68, "grad_norm": 9.304713249206543, "learning_rate": 3.192350084899759e-06, "loss": 0.0712, "step": 374250 }, { "epoch": 3.68, "grad_norm": 6.479170322418213, "learning_rate": 3.1922259624455106e-06, "loss": 0.167, "step": 374275 }, { "epoch": 3.68, "grad_norm": 8.395118713378906, "learning_rate": 3.192101839991262e-06, "loss": 0.0641, "step": 374300 }, { "epoch": 3.68, "grad_norm": 4.738171577453613, "learning_rate": 3.191977717537014e-06, "loss": 0.2302, "step": 374325 }, { "epoch": 3.68, "grad_norm": 14.668693542480469, "learning_rate": 3.191853595082765e-06, "loss": 0.0795, "step": 374350 }, { "epoch": 3.68, "grad_norm": 8.078142166137695, "learning_rate": 3.1917294726285163e-06, "loss": 0.1851, "step": 374375 }, { "epoch": 3.68, "grad_norm": 6.504473686218262, "learning_rate": 3.1916053501742683e-06, "loss": 0.0782, "step": 374400 }, { "epoch": 3.68, "grad_norm": 6.840299129486084, "learning_rate": 3.1914812277200196e-06, "loss": 0.1857, "step": 374425 }, { "epoch": 3.68, "grad_norm": 2.810537338256836, "learning_rate": 3.191357105265771e-06, "loss": 0.0661, "step": 374450 }, { "epoch": 3.68, "grad_norm": 4.63386869430542, "learning_rate": 3.191232982811523e-06, "loss": 0.1558, "step": 374475 }, { "epoch": 3.68, "grad_norm": 9.278303146362305, "learning_rate": 3.1911088603572745e-06, "loss": 0.0705, "step": 374500 }, { "epoch": 3.68, "grad_norm": 4.501919269561768, "learning_rate": 3.1909847379030257e-06, "loss": 0.2058, "step": 374525 }, { "epoch": 3.68, "grad_norm": 18.138774871826172, "learning_rate": 3.1908606154487777e-06, "loss": 0.0809, "step": 374550 }, { "epoch": 3.68, "grad_norm": 3.4939281940460205, "learning_rate": 3.190736492994529e-06, "loss": 0.1993, "step": 374575 }, { "epoch": 3.68, "grad_norm": 9.342906951904297, "learning_rate": 3.19061237054028e-06, "loss": 0.0708, "step": 374600 }, { "epoch": 3.68, "grad_norm": 4.774960041046143, "learning_rate": 3.190488248086032e-06, "loss": 0.1749, "step": 374625 }, { "epoch": 3.68, "grad_norm": 4.956745624542236, "learning_rate": 3.1903641256317834e-06, "loss": 0.063, "step": 374650 }, { "epoch": 3.68, "grad_norm": 4.660205841064453, "learning_rate": 3.190240003177535e-06, "loss": 0.1803, "step": 374675 }, { "epoch": 3.68, "grad_norm": 18.007766723632812, "learning_rate": 3.1901158807232867e-06, "loss": 0.0603, "step": 374700 }, { "epoch": 3.68, "grad_norm": 4.885339736938477, "learning_rate": 3.1899917582690383e-06, "loss": 0.1859, "step": 374725 }, { "epoch": 3.68, "grad_norm": 8.965642929077148, "learning_rate": 3.1898676358147895e-06, "loss": 0.0586, "step": 374750 }, { "epoch": 3.68, "grad_norm": 7.055606842041016, "learning_rate": 3.1897435133605416e-06, "loss": 0.1618, "step": 374775 }, { "epoch": 3.69, "grad_norm": 9.321293830871582, "learning_rate": 3.189619390906293e-06, "loss": 0.0716, "step": 374800 }, { "epoch": 3.69, "grad_norm": 5.066656112670898, "learning_rate": 3.189495268452044e-06, "loss": 0.1676, "step": 374825 }, { "epoch": 3.69, "grad_norm": 6.55164909362793, "learning_rate": 3.189371145997796e-06, "loss": 0.0683, "step": 374850 }, { "epoch": 3.69, "grad_norm": 5.458260536193848, "learning_rate": 3.1892470235435473e-06, "loss": 0.175, "step": 374875 }, { "epoch": 3.69, "grad_norm": 6.923292636871338, "learning_rate": 3.189122901089299e-06, "loss": 0.0728, "step": 374900 }, { "epoch": 3.69, "grad_norm": 4.037684917449951, "learning_rate": 3.1889987786350506e-06, "loss": 0.1763, "step": 374925 }, { "epoch": 3.69, "grad_norm": 5.161830425262451, "learning_rate": 3.188874656180802e-06, "loss": 0.0688, "step": 374950 }, { "epoch": 3.69, "grad_norm": 6.348689556121826, "learning_rate": 3.1887505337265534e-06, "loss": 0.1717, "step": 374975 }, { "epoch": 3.69, "grad_norm": 10.683544158935547, "learning_rate": 3.1886264112723055e-06, "loss": 0.1054, "step": 375000 }, { "epoch": 3.69, "grad_norm": 4.518220901489258, "learning_rate": 3.1885022888180567e-06, "loss": 0.1882, "step": 375025 }, { "epoch": 3.69, "grad_norm": 2.9308199882507324, "learning_rate": 3.188378166363808e-06, "loss": 0.058, "step": 375050 }, { "epoch": 3.69, "grad_norm": 4.946387767791748, "learning_rate": 3.18825404390956e-06, "loss": 0.2034, "step": 375075 }, { "epoch": 3.69, "grad_norm": 8.373571395874023, "learning_rate": 3.188129921455311e-06, "loss": 0.0794, "step": 375100 }, { "epoch": 3.69, "grad_norm": 4.918753147125244, "learning_rate": 3.188005799001063e-06, "loss": 0.1576, "step": 375125 }, { "epoch": 3.69, "grad_norm": 6.243759632110596, "learning_rate": 3.187881676546814e-06, "loss": 0.0766, "step": 375150 }, { "epoch": 3.69, "grad_norm": 6.602444171905518, "learning_rate": 3.187757554092566e-06, "loss": 0.1894, "step": 375175 }, { "epoch": 3.69, "grad_norm": 7.432314872741699, "learning_rate": 3.1876334316383173e-06, "loss": 0.0604, "step": 375200 }, { "epoch": 3.69, "grad_norm": 7.495863437652588, "learning_rate": 3.1875093091840685e-06, "loss": 0.2297, "step": 375225 }, { "epoch": 3.69, "grad_norm": 10.939258575439453, "learning_rate": 3.1873851867298205e-06, "loss": 0.0953, "step": 375250 }, { "epoch": 3.69, "grad_norm": 5.1635260581970215, "learning_rate": 3.1872610642755718e-06, "loss": 0.1617, "step": 375275 }, { "epoch": 3.69, "grad_norm": 7.3697123527526855, "learning_rate": 3.1871369418213234e-06, "loss": 0.0562, "step": 375300 }, { "epoch": 3.69, "grad_norm": 4.7495036125183105, "learning_rate": 3.187012819367075e-06, "loss": 0.1856, "step": 375325 }, { "epoch": 3.69, "grad_norm": 5.774144649505615, "learning_rate": 3.1868886969128267e-06, "loss": 0.0672, "step": 375350 }, { "epoch": 3.69, "grad_norm": 4.909140110015869, "learning_rate": 3.186764574458578e-06, "loss": 0.1659, "step": 375375 }, { "epoch": 3.69, "grad_norm": 3.4848344326019287, "learning_rate": 3.18664045200433e-06, "loss": 0.0594, "step": 375400 }, { "epoch": 3.69, "grad_norm": 5.334686756134033, "learning_rate": 3.186516329550081e-06, "loss": 0.1977, "step": 375425 }, { "epoch": 3.69, "grad_norm": 11.897833824157715, "learning_rate": 3.1863922070958324e-06, "loss": 0.0993, "step": 375450 }, { "epoch": 3.69, "grad_norm": 5.554603576660156, "learning_rate": 3.1862680846415844e-06, "loss": 0.2137, "step": 375475 }, { "epoch": 3.69, "grad_norm": 8.774998664855957, "learning_rate": 3.1861439621873356e-06, "loss": 0.0913, "step": 375500 }, { "epoch": 3.69, "grad_norm": 3.187725067138672, "learning_rate": 3.1860198397330873e-06, "loss": 0.2381, "step": 375525 }, { "epoch": 3.69, "grad_norm": 7.227441310882568, "learning_rate": 3.185895717278839e-06, "loss": 0.0903, "step": 375550 }, { "epoch": 3.69, "grad_norm": 4.222840309143066, "learning_rate": 3.1857715948245905e-06, "loss": 0.1623, "step": 375575 }, { "epoch": 3.69, "grad_norm": 6.286412715911865, "learning_rate": 3.1856474723703417e-06, "loss": 0.0584, "step": 375600 }, { "epoch": 3.69, "grad_norm": 4.214797496795654, "learning_rate": 3.185523349916094e-06, "loss": 0.1559, "step": 375625 }, { "epoch": 3.69, "grad_norm": 12.401028633117676, "learning_rate": 3.185399227461845e-06, "loss": 0.0758, "step": 375650 }, { "epoch": 3.69, "grad_norm": 4.2765655517578125, "learning_rate": 3.1852751050075962e-06, "loss": 0.2031, "step": 375675 }, { "epoch": 3.69, "grad_norm": 10.214871406555176, "learning_rate": 3.1851509825533483e-06, "loss": 0.0677, "step": 375700 }, { "epoch": 3.69, "grad_norm": 6.72144079208374, "learning_rate": 3.1850268600990995e-06, "loss": 0.1817, "step": 375725 }, { "epoch": 3.69, "grad_norm": 7.0857720375061035, "learning_rate": 3.184902737644851e-06, "loss": 0.065, "step": 375750 }, { "epoch": 3.69, "grad_norm": 3.976912260055542, "learning_rate": 3.1847786151906028e-06, "loss": 0.154, "step": 375775 }, { "epoch": 3.69, "grad_norm": 12.773694038391113, "learning_rate": 3.1846544927363544e-06, "loss": 0.1071, "step": 375800 }, { "epoch": 3.7, "grad_norm": 3.8182621002197266, "learning_rate": 3.1845303702821056e-06, "loss": 0.2103, "step": 375825 }, { "epoch": 3.7, "grad_norm": 14.488480567932129, "learning_rate": 3.1844062478278577e-06, "loss": 0.0876, "step": 375850 }, { "epoch": 3.7, "grad_norm": 7.138622283935547, "learning_rate": 3.184282125373609e-06, "loss": 0.2365, "step": 375875 }, { "epoch": 3.7, "grad_norm": 7.159111022949219, "learning_rate": 3.18415800291936e-06, "loss": 0.0798, "step": 375900 }, { "epoch": 3.7, "grad_norm": 5.78387975692749, "learning_rate": 3.184033880465112e-06, "loss": 0.1636, "step": 375925 }, { "epoch": 3.7, "grad_norm": 7.656811237335205, "learning_rate": 3.1839097580108634e-06, "loss": 0.0826, "step": 375950 }, { "epoch": 3.7, "grad_norm": 5.620626449584961, "learning_rate": 3.183790600454785e-06, "loss": 0.193, "step": 375975 }, { "epoch": 3.7, "grad_norm": 7.622501373291016, "learning_rate": 3.1836664780005364e-06, "loss": 0.0553, "step": 376000 }, { "epoch": 3.7, "grad_norm": 3.602776050567627, "learning_rate": 3.183542355546288e-06, "loss": 0.1984, "step": 376025 }, { "epoch": 3.7, "grad_norm": 14.220383644104004, "learning_rate": 3.1834182330920393e-06, "loss": 0.0576, "step": 376050 }, { "epoch": 3.7, "grad_norm": 4.835383892059326, "learning_rate": 3.1832941106377913e-06, "loss": 0.1919, "step": 376075 }, { "epoch": 3.7, "grad_norm": 7.847303867340088, "learning_rate": 3.1831699881835426e-06, "loss": 0.0539, "step": 376100 }, { "epoch": 3.7, "grad_norm": 3.438469171524048, "learning_rate": 3.1830458657292938e-06, "loss": 0.1881, "step": 376125 }, { "epoch": 3.7, "grad_norm": 11.945117950439453, "learning_rate": 3.182921743275046e-06, "loss": 0.0726, "step": 376150 }, { "epoch": 3.7, "grad_norm": 5.423375129699707, "learning_rate": 3.182797620820797e-06, "loss": 0.1959, "step": 376175 }, { "epoch": 3.7, "grad_norm": 7.751032829284668, "learning_rate": 3.1826734983665487e-06, "loss": 0.0951, "step": 376200 }, { "epoch": 3.7, "grad_norm": 4.606250286102295, "learning_rate": 3.1825493759123003e-06, "loss": 0.1735, "step": 376225 }, { "epoch": 3.7, "grad_norm": 5.3162522315979, "learning_rate": 3.182425253458052e-06, "loss": 0.071, "step": 376250 }, { "epoch": 3.7, "grad_norm": 4.92706823348999, "learning_rate": 3.182301131003803e-06, "loss": 0.1941, "step": 376275 }, { "epoch": 3.7, "grad_norm": 11.056915283203125, "learning_rate": 3.1821770085495552e-06, "loss": 0.0709, "step": 376300 }, { "epoch": 3.7, "grad_norm": 4.364241600036621, "learning_rate": 3.1820528860953064e-06, "loss": 0.1862, "step": 376325 }, { "epoch": 3.7, "grad_norm": 11.862334251403809, "learning_rate": 3.1819287636410576e-06, "loss": 0.1054, "step": 376350 }, { "epoch": 3.7, "grad_norm": 3.67520809173584, "learning_rate": 3.1818046411868097e-06, "loss": 0.1965, "step": 376375 }, { "epoch": 3.7, "grad_norm": 12.341121673583984, "learning_rate": 3.181680518732561e-06, "loss": 0.0675, "step": 376400 }, { "epoch": 3.7, "grad_norm": 7.398219108581543, "learning_rate": 3.1815563962783125e-06, "loss": 0.2032, "step": 376425 }, { "epoch": 3.7, "grad_norm": 10.841102600097656, "learning_rate": 3.181432273824064e-06, "loss": 0.0646, "step": 376450 }, { "epoch": 3.7, "grad_norm": 5.647978782653809, "learning_rate": 3.181308151369816e-06, "loss": 0.1948, "step": 376475 }, { "epoch": 3.7, "grad_norm": 4.623749732971191, "learning_rate": 3.181184028915567e-06, "loss": 0.0743, "step": 376500 }, { "epoch": 3.7, "grad_norm": 4.811922073364258, "learning_rate": 3.181059906461319e-06, "loss": 0.1711, "step": 376525 }, { "epoch": 3.7, "grad_norm": 10.03506088256836, "learning_rate": 3.1809357840070703e-06, "loss": 0.074, "step": 376550 }, { "epoch": 3.7, "grad_norm": 4.263157367706299, "learning_rate": 3.180811661552822e-06, "loss": 0.2086, "step": 376575 }, { "epoch": 3.7, "grad_norm": 6.5135650634765625, "learning_rate": 3.180687539098573e-06, "loss": 0.0515, "step": 376600 }, { "epoch": 3.7, "grad_norm": 8.86090087890625, "learning_rate": 3.180563416644325e-06, "loss": 0.2059, "step": 376625 }, { "epoch": 3.7, "grad_norm": 11.21708869934082, "learning_rate": 3.1804392941900764e-06, "loss": 0.0661, "step": 376650 }, { "epoch": 3.7, "grad_norm": 5.2931084632873535, "learning_rate": 3.1803151717358276e-06, "loss": 0.1871, "step": 376675 }, { "epoch": 3.7, "grad_norm": 9.286588668823242, "learning_rate": 3.1801910492815797e-06, "loss": 0.0862, "step": 376700 }, { "epoch": 3.7, "grad_norm": 3.9558677673339844, "learning_rate": 3.180066926827331e-06, "loss": 0.1712, "step": 376725 }, { "epoch": 3.7, "grad_norm": 7.332136631011963, "learning_rate": 3.1799428043730825e-06, "loss": 0.0917, "step": 376750 }, { "epoch": 3.7, "grad_norm": 6.936327934265137, "learning_rate": 3.179818681918834e-06, "loss": 0.2036, "step": 376775 }, { "epoch": 3.7, "grad_norm": 16.227069854736328, "learning_rate": 3.179694559464586e-06, "loss": 0.0617, "step": 376800 }, { "epoch": 3.71, "grad_norm": 4.15688943862915, "learning_rate": 3.179570437010337e-06, "loss": 0.1628, "step": 376825 }, { "epoch": 3.71, "grad_norm": 5.358287811279297, "learning_rate": 3.179446314556089e-06, "loss": 0.0869, "step": 376850 }, { "epoch": 3.71, "grad_norm": 5.370694637298584, "learning_rate": 3.1793221921018403e-06, "loss": 0.1791, "step": 376875 }, { "epoch": 3.71, "grad_norm": 3.5537495613098145, "learning_rate": 3.1791980696475915e-06, "loss": 0.0875, "step": 376900 }, { "epoch": 3.71, "grad_norm": 3.8769092559814453, "learning_rate": 3.1790739471933436e-06, "loss": 0.1821, "step": 376925 }, { "epoch": 3.71, "grad_norm": 9.520648002624512, "learning_rate": 3.1789498247390948e-06, "loss": 0.1005, "step": 376950 }, { "epoch": 3.71, "grad_norm": 6.051070690155029, "learning_rate": 3.1788257022848464e-06, "loss": 0.1738, "step": 376975 }, { "epoch": 3.71, "grad_norm": 13.796953201293945, "learning_rate": 3.178701579830598e-06, "loss": 0.095, "step": 377000 }, { "epoch": 3.71, "grad_norm": 6.890003204345703, "learning_rate": 3.1785774573763497e-06, "loss": 0.23, "step": 377025 }, { "epoch": 3.71, "grad_norm": 11.090389251708984, "learning_rate": 3.178453334922101e-06, "loss": 0.0906, "step": 377050 }, { "epoch": 3.71, "grad_norm": 6.218894004821777, "learning_rate": 3.178329212467853e-06, "loss": 0.1803, "step": 377075 }, { "epoch": 3.71, "grad_norm": 9.181706428527832, "learning_rate": 3.178205090013604e-06, "loss": 0.0708, "step": 377100 }, { "epoch": 3.71, "grad_norm": 4.151653289794922, "learning_rate": 3.1780809675593554e-06, "loss": 0.1983, "step": 377125 }, { "epoch": 3.71, "grad_norm": 5.864317417144775, "learning_rate": 3.1779568451051074e-06, "loss": 0.0674, "step": 377150 }, { "epoch": 3.71, "grad_norm": 5.931983470916748, "learning_rate": 3.1778327226508586e-06, "loss": 0.172, "step": 377175 }, { "epoch": 3.71, "grad_norm": 7.8816070556640625, "learning_rate": 3.1777086001966103e-06, "loss": 0.1009, "step": 377200 }, { "epoch": 3.71, "grad_norm": 6.303245544433594, "learning_rate": 3.177584477742362e-06, "loss": 0.1902, "step": 377225 }, { "epoch": 3.71, "grad_norm": 19.844511032104492, "learning_rate": 3.1774603552881135e-06, "loss": 0.0684, "step": 377250 }, { "epoch": 3.71, "grad_norm": 4.437542915344238, "learning_rate": 3.1773362328338647e-06, "loss": 0.2387, "step": 377275 }, { "epoch": 3.71, "grad_norm": 11.06827449798584, "learning_rate": 3.177212110379617e-06, "loss": 0.0965, "step": 377300 }, { "epoch": 3.71, "grad_norm": 8.281641960144043, "learning_rate": 3.177087987925368e-06, "loss": 0.1746, "step": 377325 }, { "epoch": 3.71, "grad_norm": 4.6999711990356445, "learning_rate": 3.1769638654711192e-06, "loss": 0.0941, "step": 377350 }, { "epoch": 3.71, "grad_norm": 4.540987491607666, "learning_rate": 3.1768397430168713e-06, "loss": 0.1791, "step": 377375 }, { "epoch": 3.71, "grad_norm": 8.642876625061035, "learning_rate": 3.1767156205626225e-06, "loss": 0.0803, "step": 377400 }, { "epoch": 3.71, "grad_norm": 4.706755638122559, "learning_rate": 3.176591498108374e-06, "loss": 0.2298, "step": 377425 }, { "epoch": 3.71, "grad_norm": 4.64812707901001, "learning_rate": 3.1764673756541253e-06, "loss": 0.0692, "step": 377450 }, { "epoch": 3.71, "grad_norm": 3.1455981731414795, "learning_rate": 3.1763432531998774e-06, "loss": 0.1897, "step": 377475 }, { "epoch": 3.71, "grad_norm": 7.737857818603516, "learning_rate": 3.1762191307456286e-06, "loss": 0.0692, "step": 377500 }, { "epoch": 3.71, "grad_norm": 4.708184719085693, "learning_rate": 3.17609500829138e-06, "loss": 0.1438, "step": 377525 }, { "epoch": 3.71, "grad_norm": 8.499174118041992, "learning_rate": 3.175970885837132e-06, "loss": 0.0891, "step": 377550 }, { "epoch": 3.71, "grad_norm": 4.343175411224365, "learning_rate": 3.175846763382883e-06, "loss": 0.1958, "step": 377575 }, { "epoch": 3.71, "grad_norm": 4.495784759521484, "learning_rate": 3.1757226409286347e-06, "loss": 0.0804, "step": 377600 }, { "epoch": 3.71, "grad_norm": 4.444740295410156, "learning_rate": 3.1755985184743864e-06, "loss": 0.1487, "step": 377625 }, { "epoch": 3.71, "grad_norm": 5.372385025024414, "learning_rate": 3.175474396020138e-06, "loss": 0.0705, "step": 377650 }, { "epoch": 3.71, "grad_norm": 4.470526695251465, "learning_rate": 3.1753502735658892e-06, "loss": 0.2099, "step": 377675 }, { "epoch": 3.71, "grad_norm": 9.010573387145996, "learning_rate": 3.1752261511116413e-06, "loss": 0.0909, "step": 377700 }, { "epoch": 3.71, "grad_norm": 4.746087551116943, "learning_rate": 3.1751020286573925e-06, "loss": 0.1961, "step": 377725 }, { "epoch": 3.71, "grad_norm": 5.203190803527832, "learning_rate": 3.1749779062031437e-06, "loss": 0.0746, "step": 377750 }, { "epoch": 3.71, "grad_norm": 4.738870620727539, "learning_rate": 3.1748537837488958e-06, "loss": 0.1707, "step": 377775 }, { "epoch": 3.71, "grad_norm": 9.328588485717773, "learning_rate": 3.174729661294647e-06, "loss": 0.0913, "step": 377800 }, { "epoch": 3.71, "grad_norm": 4.113880634307861, "learning_rate": 3.1746055388403986e-06, "loss": 0.216, "step": 377825 }, { "epoch": 3.72, "grad_norm": 9.688103675842285, "learning_rate": 3.1744814163861502e-06, "loss": 0.0794, "step": 377850 }, { "epoch": 3.72, "grad_norm": 4.293564319610596, "learning_rate": 3.174357293931902e-06, "loss": 0.204, "step": 377875 }, { "epoch": 3.72, "grad_norm": 6.275993824005127, "learning_rate": 3.174233171477653e-06, "loss": 0.0758, "step": 377900 }, { "epoch": 3.72, "grad_norm": 4.941628932952881, "learning_rate": 3.174109049023405e-06, "loss": 0.1621, "step": 377925 }, { "epoch": 3.72, "grad_norm": 5.39150333404541, "learning_rate": 3.1739849265691563e-06, "loss": 0.0609, "step": 377950 }, { "epoch": 3.72, "grad_norm": 6.637380599975586, "learning_rate": 3.1738608041149076e-06, "loss": 0.1949, "step": 377975 }, { "epoch": 3.72, "grad_norm": 7.506618499755859, "learning_rate": 3.1737366816606596e-06, "loss": 0.0902, "step": 378000 }, { "epoch": 3.72, "grad_norm": 4.684253692626953, "learning_rate": 3.173612559206411e-06, "loss": 0.1678, "step": 378025 }, { "epoch": 3.72, "grad_norm": 11.304792404174805, "learning_rate": 3.1734884367521625e-06, "loss": 0.0566, "step": 378050 }, { "epoch": 3.72, "grad_norm": 4.134634017944336, "learning_rate": 3.173364314297914e-06, "loss": 0.1322, "step": 378075 }, { "epoch": 3.72, "grad_norm": 5.252292633056641, "learning_rate": 3.1732401918436657e-06, "loss": 0.0885, "step": 378100 }, { "epoch": 3.72, "grad_norm": 4.721242904663086, "learning_rate": 3.173116069389417e-06, "loss": 0.2008, "step": 378125 }, { "epoch": 3.72, "grad_norm": 5.323588848114014, "learning_rate": 3.172991946935169e-06, "loss": 0.0863, "step": 378150 }, { "epoch": 3.72, "grad_norm": 5.030515193939209, "learning_rate": 3.1728678244809202e-06, "loss": 0.215, "step": 378175 }, { "epoch": 3.72, "grad_norm": 9.857597351074219, "learning_rate": 3.1727437020266714e-06, "loss": 0.0767, "step": 378200 }, { "epoch": 3.72, "grad_norm": 5.152734279632568, "learning_rate": 3.1726245444705933e-06, "loss": 0.1866, "step": 378225 }, { "epoch": 3.72, "grad_norm": 1.2210438251495361, "learning_rate": 3.1725004220163445e-06, "loss": 0.0493, "step": 378250 }, { "epoch": 3.72, "grad_norm": 5.421521186828613, "learning_rate": 3.172376299562096e-06, "loss": 0.1455, "step": 378275 }, { "epoch": 3.72, "grad_norm": 8.5471773147583, "learning_rate": 3.1722521771078478e-06, "loss": 0.0673, "step": 378300 }, { "epoch": 3.72, "grad_norm": 5.065188407897949, "learning_rate": 3.1721280546535994e-06, "loss": 0.1856, "step": 378325 }, { "epoch": 3.72, "grad_norm": 7.349095821380615, "learning_rate": 3.1720039321993506e-06, "loss": 0.0902, "step": 378350 }, { "epoch": 3.72, "grad_norm": 6.228643417358398, "learning_rate": 3.1718798097451027e-06, "loss": 0.1454, "step": 378375 }, { "epoch": 3.72, "grad_norm": 13.269248008728027, "learning_rate": 3.171755687290854e-06, "loss": 0.0768, "step": 378400 }, { "epoch": 3.72, "grad_norm": 5.787652015686035, "learning_rate": 3.171631564836605e-06, "loss": 0.1403, "step": 378425 }, { "epoch": 3.72, "grad_norm": 12.746102333068848, "learning_rate": 3.171507442382357e-06, "loss": 0.087, "step": 378450 }, { "epoch": 3.72, "grad_norm": 2.2216920852661133, "learning_rate": 3.1713833199281084e-06, "loss": 0.1848, "step": 378475 }, { "epoch": 3.72, "grad_norm": 10.244314193725586, "learning_rate": 3.17125919747386e-06, "loss": 0.085, "step": 378500 }, { "epoch": 3.72, "grad_norm": 3.739882707595825, "learning_rate": 3.1711350750196117e-06, "loss": 0.1865, "step": 378525 }, { "epoch": 3.72, "grad_norm": 4.681274890899658, "learning_rate": 3.1710109525653633e-06, "loss": 0.0676, "step": 378550 }, { "epoch": 3.72, "grad_norm": 5.621308326721191, "learning_rate": 3.1708868301111145e-06, "loss": 0.1959, "step": 378575 }, { "epoch": 3.72, "grad_norm": 7.777324676513672, "learning_rate": 3.1707627076568666e-06, "loss": 0.0891, "step": 378600 }, { "epoch": 3.72, "grad_norm": 3.638152599334717, "learning_rate": 3.1706385852026178e-06, "loss": 0.1911, "step": 378625 }, { "epoch": 3.72, "grad_norm": 4.800470352172852, "learning_rate": 3.170514462748369e-06, "loss": 0.0759, "step": 378650 }, { "epoch": 3.72, "grad_norm": 4.319545745849609, "learning_rate": 3.170390340294121e-06, "loss": 0.2093, "step": 378675 }, { "epoch": 3.72, "grad_norm": 8.846278190612793, "learning_rate": 3.1702662178398723e-06, "loss": 0.0576, "step": 378700 }, { "epoch": 3.72, "grad_norm": 5.490782260894775, "learning_rate": 3.170142095385624e-06, "loss": 0.1528, "step": 378725 }, { "epoch": 3.72, "grad_norm": 7.691112518310547, "learning_rate": 3.1700179729313755e-06, "loss": 0.0618, "step": 378750 }, { "epoch": 3.72, "grad_norm": 6.8748579025268555, "learning_rate": 3.169893850477127e-06, "loss": 0.1744, "step": 378775 }, { "epoch": 3.72, "grad_norm": 8.228436470031738, "learning_rate": 3.1697697280228784e-06, "loss": 0.0816, "step": 378800 }, { "epoch": 3.72, "grad_norm": 5.3742475509643555, "learning_rate": 3.1696456055686304e-06, "loss": 0.192, "step": 378825 }, { "epoch": 3.72, "grad_norm": 10.782729148864746, "learning_rate": 3.1695214831143816e-06, "loss": 0.0858, "step": 378850 }, { "epoch": 3.73, "grad_norm": 4.3804497718811035, "learning_rate": 3.169397360660133e-06, "loss": 0.1646, "step": 378875 }, { "epoch": 3.73, "grad_norm": 8.015719413757324, "learning_rate": 3.1692732382058845e-06, "loss": 0.0615, "step": 378900 }, { "epoch": 3.73, "grad_norm": 5.357341289520264, "learning_rate": 3.169149115751636e-06, "loss": 0.1542, "step": 378925 }, { "epoch": 3.73, "grad_norm": 4.1049628257751465, "learning_rate": 3.1690249932973878e-06, "loss": 0.0579, "step": 378950 }, { "epoch": 3.73, "grad_norm": 3.261272430419922, "learning_rate": 3.168900870843139e-06, "loss": 0.1493, "step": 378975 }, { "epoch": 3.73, "grad_norm": 14.380586624145508, "learning_rate": 3.168776748388891e-06, "loss": 0.0554, "step": 379000 }, { "epoch": 3.73, "grad_norm": 5.064012050628662, "learning_rate": 3.1686526259346422e-06, "loss": 0.1663, "step": 379025 }, { "epoch": 3.73, "grad_norm": 9.73834228515625, "learning_rate": 3.1685285034803934e-06, "loss": 0.0688, "step": 379050 }, { "epoch": 3.73, "grad_norm": 4.804739475250244, "learning_rate": 3.1684043810261455e-06, "loss": 0.1508, "step": 379075 }, { "epoch": 3.73, "grad_norm": 9.762110710144043, "learning_rate": 3.1682802585718967e-06, "loss": 0.0683, "step": 379100 }, { "epoch": 3.73, "grad_norm": 4.251405715942383, "learning_rate": 3.1681561361176484e-06, "loss": 0.1903, "step": 379125 }, { "epoch": 3.73, "grad_norm": 10.081650733947754, "learning_rate": 3.1680320136634e-06, "loss": 0.0844, "step": 379150 }, { "epoch": 3.73, "grad_norm": 5.002716064453125, "learning_rate": 3.1679078912091516e-06, "loss": 0.1776, "step": 379175 }, { "epoch": 3.73, "grad_norm": 9.756464004516602, "learning_rate": 3.167783768754903e-06, "loss": 0.0668, "step": 379200 }, { "epoch": 3.73, "grad_norm": 6.122787952423096, "learning_rate": 3.167659646300655e-06, "loss": 0.1796, "step": 379225 }, { "epoch": 3.73, "grad_norm": 4.346596717834473, "learning_rate": 3.167535523846406e-06, "loss": 0.0663, "step": 379250 }, { "epoch": 3.73, "grad_norm": 14.029497146606445, "learning_rate": 3.1674114013921577e-06, "loss": 0.1998, "step": 379275 }, { "epoch": 3.73, "grad_norm": 17.67656707763672, "learning_rate": 3.1672872789379094e-06, "loss": 0.0755, "step": 379300 }, { "epoch": 3.73, "grad_norm": 3.9203040599823, "learning_rate": 3.1671631564836606e-06, "loss": 0.1596, "step": 379325 }, { "epoch": 3.73, "grad_norm": 8.931151390075684, "learning_rate": 3.1670390340294122e-06, "loss": 0.0737, "step": 379350 }, { "epoch": 3.73, "grad_norm": 7.343804359436035, "learning_rate": 3.166914911575164e-06, "loss": 0.1906, "step": 379375 }, { "epoch": 3.73, "grad_norm": 14.369952201843262, "learning_rate": 3.1667907891209155e-06, "loss": 0.0645, "step": 379400 }, { "epoch": 3.73, "grad_norm": 6.745563983917236, "learning_rate": 3.1666666666666667e-06, "loss": 0.175, "step": 379425 }, { "epoch": 3.73, "grad_norm": 10.612584114074707, "learning_rate": 3.1665425442124188e-06, "loss": 0.0574, "step": 379450 }, { "epoch": 3.73, "grad_norm": 5.53394079208374, "learning_rate": 3.16641842175817e-06, "loss": 0.1558, "step": 379475 }, { "epoch": 3.73, "grad_norm": 4.892654895782471, "learning_rate": 3.1662942993039216e-06, "loss": 0.0783, "step": 379500 }, { "epoch": 3.73, "grad_norm": 6.238353729248047, "learning_rate": 3.1661701768496732e-06, "loss": 0.1947, "step": 379525 }, { "epoch": 3.73, "grad_norm": 16.152206420898438, "learning_rate": 3.166046054395425e-06, "loss": 0.0747, "step": 379550 }, { "epoch": 3.73, "grad_norm": 4.188803195953369, "learning_rate": 3.165921931941176e-06, "loss": 0.1879, "step": 379575 }, { "epoch": 3.73, "grad_norm": 10.65503215789795, "learning_rate": 3.165797809486928e-06, "loss": 0.0746, "step": 379600 }, { "epoch": 3.73, "grad_norm": 5.838407039642334, "learning_rate": 3.1656736870326794e-06, "loss": 0.1954, "step": 379625 }, { "epoch": 3.73, "grad_norm": 13.367890357971191, "learning_rate": 3.1655495645784306e-06, "loss": 0.0696, "step": 379650 }, { "epoch": 3.73, "grad_norm": 6.605657577514648, "learning_rate": 3.1654254421241826e-06, "loss": 0.1829, "step": 379675 }, { "epoch": 3.73, "grad_norm": 8.893630981445312, "learning_rate": 3.165301319669934e-06, "loss": 0.067, "step": 379700 }, { "epoch": 3.73, "grad_norm": 3.606649398803711, "learning_rate": 3.1651771972156855e-06, "loss": 0.1742, "step": 379725 }, { "epoch": 3.73, "grad_norm": 3.774775981903076, "learning_rate": 3.1650530747614367e-06, "loss": 0.0628, "step": 379750 }, { "epoch": 3.73, "grad_norm": 4.235100269317627, "learning_rate": 3.1649289523071887e-06, "loss": 0.1777, "step": 379775 }, { "epoch": 3.73, "grad_norm": 9.738980293273926, "learning_rate": 3.16480482985294e-06, "loss": 0.0832, "step": 379800 }, { "epoch": 3.73, "grad_norm": 4.352625370025635, "learning_rate": 3.164680707398691e-06, "loss": 0.1919, "step": 379825 }, { "epoch": 3.73, "grad_norm": 0.9982786774635315, "learning_rate": 3.1645565849444432e-06, "loss": 0.0747, "step": 379850 }, { "epoch": 3.73, "grad_norm": 4.31815767288208, "learning_rate": 3.1644324624901944e-06, "loss": 0.1623, "step": 379875 }, { "epoch": 3.74, "grad_norm": 8.679617881774902, "learning_rate": 3.164308340035946e-06, "loss": 0.0766, "step": 379900 }, { "epoch": 3.74, "grad_norm": 6.348616123199463, "learning_rate": 3.1641842175816977e-06, "loss": 0.1959, "step": 379925 }, { "epoch": 3.74, "grad_norm": 6.532835483551025, "learning_rate": 3.1640600951274493e-06, "loss": 0.0734, "step": 379950 }, { "epoch": 3.74, "grad_norm": 5.269672870635986, "learning_rate": 3.1639359726732006e-06, "loss": 0.2027, "step": 379975 }, { "epoch": 3.74, "grad_norm": 8.189493179321289, "learning_rate": 3.1638118502189526e-06, "loss": 0.1029, "step": 380000 }, { "epoch": 3.74, "eval_loss": 0.6776847243309021, "eval_runtime": 6140.0471, "eval_samples_per_second": 1.542, "eval_steps_per_second": 0.193, "eval_wer": 0.11837700379836477, "step": 380000 }, { "epoch": 3.74, "grad_norm": 4.935797214508057, "learning_rate": 3.163687727764704e-06, "loss": 0.1935, "step": 380025 }, { "epoch": 3.74, "grad_norm": 3.3766226768493652, "learning_rate": 3.163563605310455e-06, "loss": 0.0657, "step": 380050 }, { "epoch": 3.74, "grad_norm": 3.9330410957336426, "learning_rate": 3.163439482856207e-06, "loss": 0.1836, "step": 380075 }, { "epoch": 3.74, "grad_norm": 4.968339443206787, "learning_rate": 3.1633153604019583e-06, "loss": 0.069, "step": 380100 }, { "epoch": 3.74, "grad_norm": 3.8364946842193604, "learning_rate": 3.16319123794771e-06, "loss": 0.1798, "step": 380125 }, { "epoch": 3.74, "grad_norm": 5.914059162139893, "learning_rate": 3.1630671154934616e-06, "loss": 0.0723, "step": 380150 }, { "epoch": 3.74, "grad_norm": 3.833575963973999, "learning_rate": 3.162942993039213e-06, "loss": 0.1647, "step": 380175 }, { "epoch": 3.74, "grad_norm": 14.354287147521973, "learning_rate": 3.1628188705849644e-06, "loss": 0.092, "step": 380200 }, { "epoch": 3.74, "grad_norm": 4.1605424880981445, "learning_rate": 3.1626947481307165e-06, "loss": 0.1848, "step": 380225 }, { "epoch": 3.74, "grad_norm": 4.882745742797852, "learning_rate": 3.1625706256764677e-06, "loss": 0.0492, "step": 380250 }, { "epoch": 3.74, "grad_norm": 3.985867500305176, "learning_rate": 3.162446503222219e-06, "loss": 0.1597, "step": 380275 }, { "epoch": 3.74, "grad_norm": 8.01522445678711, "learning_rate": 3.162322380767971e-06, "loss": 0.0726, "step": 380300 }, { "epoch": 3.74, "grad_norm": 5.2951436042785645, "learning_rate": 3.162198258313722e-06, "loss": 0.1644, "step": 380325 }, { "epoch": 3.74, "grad_norm": 6.4142985343933105, "learning_rate": 3.162074135859474e-06, "loss": 0.0983, "step": 380350 }, { "epoch": 3.74, "grad_norm": 4.449466705322266, "learning_rate": 3.1619500134052254e-06, "loss": 0.1634, "step": 380375 }, { "epoch": 3.74, "grad_norm": 9.473760604858398, "learning_rate": 3.161825890950977e-06, "loss": 0.0883, "step": 380400 }, { "epoch": 3.74, "grad_norm": 7.141627788543701, "learning_rate": 3.161706733394898e-06, "loss": 0.1824, "step": 380425 }, { "epoch": 3.74, "grad_norm": 10.722518920898438, "learning_rate": 3.16158261094065e-06, "loss": 0.0957, "step": 380450 }, { "epoch": 3.74, "grad_norm": 5.134609222412109, "learning_rate": 3.1614584884864014e-06, "loss": 0.1661, "step": 380475 }, { "epoch": 3.74, "grad_norm": 11.876351356506348, "learning_rate": 3.1613343660321526e-06, "loss": 0.096, "step": 380500 }, { "epoch": 3.74, "grad_norm": 4.584962844848633, "learning_rate": 3.1612102435779046e-06, "loss": 0.2253, "step": 380525 }, { "epoch": 3.74, "grad_norm": 6.647696018218994, "learning_rate": 3.161086121123656e-06, "loss": 0.0522, "step": 380550 }, { "epoch": 3.74, "grad_norm": 5.205676078796387, "learning_rate": 3.1609619986694075e-06, "loss": 0.1704, "step": 380575 }, { "epoch": 3.74, "grad_norm": 9.193876266479492, "learning_rate": 3.160837876215159e-06, "loss": 0.0612, "step": 380600 }, { "epoch": 3.74, "grad_norm": 5.469942092895508, "learning_rate": 3.1607137537609108e-06, "loss": 0.1681, "step": 380625 }, { "epoch": 3.74, "grad_norm": 7.103280067443848, "learning_rate": 3.160589631306662e-06, "loss": 0.0778, "step": 380650 }, { "epoch": 3.74, "grad_norm": 5.891807556152344, "learning_rate": 3.160465508852414e-06, "loss": 0.17, "step": 380675 }, { "epoch": 3.74, "grad_norm": 9.643655776977539, "learning_rate": 3.1603413863981652e-06, "loss": 0.0789, "step": 380700 }, { "epoch": 3.74, "grad_norm": 5.37190580368042, "learning_rate": 3.1602172639439165e-06, "loss": 0.1847, "step": 380725 }, { "epoch": 3.74, "grad_norm": 3.503967761993408, "learning_rate": 3.1600931414896685e-06, "loss": 0.0551, "step": 380750 }, { "epoch": 3.74, "grad_norm": 7.443155288696289, "learning_rate": 3.1599690190354197e-06, "loss": 0.1576, "step": 380775 }, { "epoch": 3.74, "grad_norm": 19.73896598815918, "learning_rate": 3.1598448965811714e-06, "loss": 0.0573, "step": 380800 }, { "epoch": 3.74, "grad_norm": 4.851698398590088, "learning_rate": 3.159720774126923e-06, "loss": 0.1727, "step": 380825 }, { "epoch": 3.74, "grad_norm": 12.300211906433105, "learning_rate": 3.1595966516726746e-06, "loss": 0.0851, "step": 380850 }, { "epoch": 3.74, "grad_norm": 5.675129413604736, "learning_rate": 3.159472529218426e-06, "loss": 0.1645, "step": 380875 }, { "epoch": 3.75, "grad_norm": 8.517940521240234, "learning_rate": 3.159348406764178e-06, "loss": 0.0871, "step": 380900 }, { "epoch": 3.75, "grad_norm": 4.56862735748291, "learning_rate": 3.159224284309929e-06, "loss": 0.1344, "step": 380925 }, { "epoch": 3.75, "grad_norm": 8.311985969543457, "learning_rate": 3.1591001618556803e-06, "loss": 0.0811, "step": 380950 }, { "epoch": 3.75, "grad_norm": 3.401073932647705, "learning_rate": 3.1589760394014324e-06, "loss": 0.1566, "step": 380975 }, { "epoch": 3.75, "grad_norm": 12.219694137573242, "learning_rate": 3.1588519169471836e-06, "loss": 0.0875, "step": 381000 }, { "epoch": 3.75, "grad_norm": 6.09790563583374, "learning_rate": 3.1587277944929352e-06, "loss": 0.2065, "step": 381025 }, { "epoch": 3.75, "grad_norm": 12.96845817565918, "learning_rate": 3.158603672038687e-06, "loss": 0.09, "step": 381050 }, { "epoch": 3.75, "grad_norm": 3.487609624862671, "learning_rate": 3.1584795495844385e-06, "loss": 0.1733, "step": 381075 }, { "epoch": 3.75, "grad_norm": 10.578936576843262, "learning_rate": 3.1583554271301897e-06, "loss": 0.0867, "step": 381100 }, { "epoch": 3.75, "grad_norm": 3.6582581996917725, "learning_rate": 3.158231304675941e-06, "loss": 0.1798, "step": 381125 }, { "epoch": 3.75, "grad_norm": 13.400033950805664, "learning_rate": 3.158107182221693e-06, "loss": 0.0551, "step": 381150 }, { "epoch": 3.75, "grad_norm": 4.580953598022461, "learning_rate": 3.157983059767444e-06, "loss": 0.1582, "step": 381175 }, { "epoch": 3.75, "grad_norm": 9.092623710632324, "learning_rate": 3.157858937313196e-06, "loss": 0.0837, "step": 381200 }, { "epoch": 3.75, "grad_norm": 4.648491859436035, "learning_rate": 3.1577348148589475e-06, "loss": 0.1882, "step": 381225 }, { "epoch": 3.75, "grad_norm": 10.0230073928833, "learning_rate": 3.157610692404699e-06, "loss": 0.0808, "step": 381250 }, { "epoch": 3.75, "grad_norm": 4.975394248962402, "learning_rate": 3.1574865699504503e-06, "loss": 0.1793, "step": 381275 }, { "epoch": 3.75, "grad_norm": 10.04568099975586, "learning_rate": 3.1573624474962024e-06, "loss": 0.0616, "step": 381300 }, { "epoch": 3.75, "grad_norm": 6.8655853271484375, "learning_rate": 3.1572383250419536e-06, "loss": 0.2097, "step": 381325 }, { "epoch": 3.75, "grad_norm": 10.58735466003418, "learning_rate": 3.1571142025877048e-06, "loss": 0.0703, "step": 381350 }, { "epoch": 3.75, "grad_norm": 2.960178852081299, "learning_rate": 3.156990080133457e-06, "loss": 0.2195, "step": 381375 }, { "epoch": 3.75, "grad_norm": 12.786284446716309, "learning_rate": 3.156865957679208e-06, "loss": 0.0868, "step": 381400 }, { "epoch": 3.75, "grad_norm": 4.017428398132324, "learning_rate": 3.1567418352249597e-06, "loss": 0.1814, "step": 381425 }, { "epoch": 3.75, "grad_norm": 9.564157485961914, "learning_rate": 3.1566177127707113e-06, "loss": 0.0453, "step": 381450 }, { "epoch": 3.75, "grad_norm": 4.907559871673584, "learning_rate": 3.156493590316463e-06, "loss": 0.1863, "step": 381475 }, { "epoch": 3.75, "grad_norm": 8.435483932495117, "learning_rate": 3.156369467862214e-06, "loss": 0.0815, "step": 381500 }, { "epoch": 3.75, "grad_norm": 4.963763236999512, "learning_rate": 3.1562453454079662e-06, "loss": 0.1521, "step": 381525 }, { "epoch": 3.75, "grad_norm": 9.263280868530273, "learning_rate": 3.1561212229537174e-06, "loss": 0.0699, "step": 381550 }, { "epoch": 3.75, "grad_norm": 4.420372009277344, "learning_rate": 3.1559971004994687e-06, "loss": 0.1569, "step": 381575 }, { "epoch": 3.75, "grad_norm": 9.93586540222168, "learning_rate": 3.1558729780452207e-06, "loss": 0.0697, "step": 381600 }, { "epoch": 3.75, "grad_norm": 5.340346336364746, "learning_rate": 3.155748855590972e-06, "loss": 0.1433, "step": 381625 }, { "epoch": 3.75, "grad_norm": 12.122894287109375, "learning_rate": 3.1556247331367236e-06, "loss": 0.0672, "step": 381650 }, { "epoch": 3.75, "grad_norm": 5.526989936828613, "learning_rate": 3.155500610682475e-06, "loss": 0.14, "step": 381675 }, { "epoch": 3.75, "grad_norm": 4.976221561431885, "learning_rate": 3.155376488228227e-06, "loss": 0.079, "step": 381700 }, { "epoch": 3.75, "grad_norm": 5.381195068359375, "learning_rate": 3.155252365773978e-06, "loss": 0.1613, "step": 381725 }, { "epoch": 3.75, "grad_norm": 9.099413871765137, "learning_rate": 3.15512824331973e-06, "loss": 0.0595, "step": 381750 }, { "epoch": 3.75, "grad_norm": 5.658264636993408, "learning_rate": 3.1550041208654813e-06, "loss": 0.1807, "step": 381775 }, { "epoch": 3.75, "grad_norm": 6.264033317565918, "learning_rate": 3.1548799984112325e-06, "loss": 0.068, "step": 381800 }, { "epoch": 3.75, "grad_norm": 2.8914473056793213, "learning_rate": 3.1547558759569846e-06, "loss": 0.177, "step": 381825 }, { "epoch": 3.75, "grad_norm": 11.365636825561523, "learning_rate": 3.154631753502736e-06, "loss": 0.0914, "step": 381850 }, { "epoch": 3.75, "grad_norm": 6.839111804962158, "learning_rate": 3.1545076310484874e-06, "loss": 0.1773, "step": 381875 }, { "epoch": 3.75, "grad_norm": 5.768272876739502, "learning_rate": 3.154383508594239e-06, "loss": 0.0983, "step": 381900 }, { "epoch": 3.76, "grad_norm": 9.142499923706055, "learning_rate": 3.1542593861399907e-06, "loss": 0.1702, "step": 381925 }, { "epoch": 3.76, "grad_norm": 0.13736850023269653, "learning_rate": 3.154135263685742e-06, "loss": 0.0781, "step": 381950 }, { "epoch": 3.76, "grad_norm": 4.839674472808838, "learning_rate": 3.154011141231493e-06, "loss": 0.156, "step": 381975 }, { "epoch": 3.76, "grad_norm": 8.855134010314941, "learning_rate": 3.153887018777245e-06, "loss": 0.0722, "step": 382000 }, { "epoch": 3.76, "grad_norm": 4.096193313598633, "learning_rate": 3.1537628963229964e-06, "loss": 0.1862, "step": 382025 }, { "epoch": 3.76, "grad_norm": 12.71381950378418, "learning_rate": 3.153638773868748e-06, "loss": 0.0814, "step": 382050 }, { "epoch": 3.76, "grad_norm": 5.651122093200684, "learning_rate": 3.1535146514144997e-06, "loss": 0.1806, "step": 382075 }, { "epoch": 3.76, "grad_norm": 12.793476104736328, "learning_rate": 3.1533905289602513e-06, "loss": 0.0804, "step": 382100 }, { "epoch": 3.76, "grad_norm": 4.571421146392822, "learning_rate": 3.1532664065060025e-06, "loss": 0.182, "step": 382125 }, { "epoch": 3.76, "grad_norm": 9.272418022155762, "learning_rate": 3.1531422840517546e-06, "loss": 0.0983, "step": 382150 }, { "epoch": 3.76, "grad_norm": 6.497931003570557, "learning_rate": 3.1530181615975058e-06, "loss": 0.1727, "step": 382175 }, { "epoch": 3.76, "grad_norm": 6.826585292816162, "learning_rate": 3.1528940391432574e-06, "loss": 0.0747, "step": 382200 }, { "epoch": 3.76, "grad_norm": 4.835493564605713, "learning_rate": 3.152769916689009e-06, "loss": 0.1722, "step": 382225 }, { "epoch": 3.76, "grad_norm": 14.485007286071777, "learning_rate": 3.1526457942347607e-06, "loss": 0.0899, "step": 382250 }, { "epoch": 3.76, "grad_norm": 7.514415740966797, "learning_rate": 3.152521671780512e-06, "loss": 0.1987, "step": 382275 }, { "epoch": 3.76, "grad_norm": 0.7664864659309387, "learning_rate": 3.152397549326264e-06, "loss": 0.0605, "step": 382300 }, { "epoch": 3.76, "grad_norm": 5.0540900230407715, "learning_rate": 3.152273426872015e-06, "loss": 0.1839, "step": 382325 }, { "epoch": 3.76, "grad_norm": 6.493685245513916, "learning_rate": 3.1521493044177664e-06, "loss": 0.0735, "step": 382350 }, { "epoch": 3.76, "grad_norm": 8.041630744934082, "learning_rate": 3.1520251819635184e-06, "loss": 0.1717, "step": 382375 }, { "epoch": 3.76, "grad_norm": 12.027840614318848, "learning_rate": 3.1519010595092696e-06, "loss": 0.0765, "step": 382400 }, { "epoch": 3.76, "grad_norm": 2.937252998352051, "learning_rate": 3.1517769370550213e-06, "loss": 0.1487, "step": 382425 }, { "epoch": 3.76, "grad_norm": 9.557974815368652, "learning_rate": 3.151652814600773e-06, "loss": 0.0996, "step": 382450 }, { "epoch": 3.76, "grad_norm": 3.1480207443237305, "learning_rate": 3.1515286921465245e-06, "loss": 0.1876, "step": 382475 }, { "epoch": 3.76, "grad_norm": 8.690702438354492, "learning_rate": 3.1514045696922758e-06, "loss": 0.0861, "step": 382500 }, { "epoch": 3.76, "grad_norm": 5.045421123504639, "learning_rate": 3.151280447238028e-06, "loss": 0.1642, "step": 382525 }, { "epoch": 3.76, "grad_norm": 4.174965858459473, "learning_rate": 3.151156324783779e-06, "loss": 0.0703, "step": 382550 }, { "epoch": 3.76, "grad_norm": 6.210142612457275, "learning_rate": 3.1510322023295302e-06, "loss": 0.1727, "step": 382575 }, { "epoch": 3.76, "grad_norm": 3.2475571632385254, "learning_rate": 3.1509080798752823e-06, "loss": 0.0781, "step": 382600 }, { "epoch": 3.76, "grad_norm": 5.382968425750732, "learning_rate": 3.1507839574210335e-06, "loss": 0.1747, "step": 382625 }, { "epoch": 3.76, "grad_norm": 7.829779148101807, "learning_rate": 3.150659834966785e-06, "loss": 0.065, "step": 382650 }, { "epoch": 3.76, "grad_norm": 3.79017972946167, "learning_rate": 3.1505406774107066e-06, "loss": 0.1856, "step": 382675 }, { "epoch": 3.76, "grad_norm": 7.688117980957031, "learning_rate": 3.1504165549564582e-06, "loss": 0.0637, "step": 382700 }, { "epoch": 3.76, "grad_norm": 4.127418041229248, "learning_rate": 3.1502924325022094e-06, "loss": 0.1606, "step": 382725 }, { "epoch": 3.76, "grad_norm": 11.881119728088379, "learning_rate": 3.1501683100479615e-06, "loss": 0.0738, "step": 382750 }, { "epoch": 3.76, "grad_norm": 4.257584571838379, "learning_rate": 3.1500441875937127e-06, "loss": 0.1902, "step": 382775 }, { "epoch": 3.76, "grad_norm": 8.368422508239746, "learning_rate": 3.149920065139464e-06, "loss": 0.0579, "step": 382800 }, { "epoch": 3.76, "grad_norm": 6.36440896987915, "learning_rate": 3.149795942685216e-06, "loss": 0.204, "step": 382825 }, { "epoch": 3.76, "grad_norm": 9.714106559753418, "learning_rate": 3.149671820230967e-06, "loss": 0.0892, "step": 382850 }, { "epoch": 3.76, "grad_norm": 4.5507588386535645, "learning_rate": 3.149547697776719e-06, "loss": 0.1857, "step": 382875 }, { "epoch": 3.76, "grad_norm": 5.3321027755737305, "learning_rate": 3.1494235753224705e-06, "loss": 0.0741, "step": 382900 }, { "epoch": 3.76, "grad_norm": 3.7158524990081787, "learning_rate": 3.149299452868222e-06, "loss": 0.1955, "step": 382925 }, { "epoch": 3.77, "grad_norm": 6.918338298797607, "learning_rate": 3.1491753304139733e-06, "loss": 0.1031, "step": 382950 }, { "epoch": 3.77, "grad_norm": 4.557845592498779, "learning_rate": 3.1490512079597254e-06, "loss": 0.1505, "step": 382975 }, { "epoch": 3.77, "grad_norm": 9.042116165161133, "learning_rate": 3.1489270855054766e-06, "loss": 0.0691, "step": 383000 }, { "epoch": 3.77, "grad_norm": 7.225375652313232, "learning_rate": 3.148802963051228e-06, "loss": 0.1692, "step": 383025 }, { "epoch": 3.77, "grad_norm": 10.044210433959961, "learning_rate": 3.14867884059698e-06, "loss": 0.0852, "step": 383050 }, { "epoch": 3.77, "grad_norm": 4.4645891189575195, "learning_rate": 3.148554718142731e-06, "loss": 0.2091, "step": 383075 }, { "epoch": 3.77, "grad_norm": 3.893132209777832, "learning_rate": 3.1484305956884827e-06, "loss": 0.0531, "step": 383100 }, { "epoch": 3.77, "grad_norm": 3.1663825511932373, "learning_rate": 3.1483064732342343e-06, "loss": 0.1794, "step": 383125 }, { "epoch": 3.77, "grad_norm": 7.775489330291748, "learning_rate": 3.148182350779986e-06, "loss": 0.104, "step": 383150 }, { "epoch": 3.77, "grad_norm": 2.8721282482147217, "learning_rate": 3.148058228325737e-06, "loss": 0.1765, "step": 383175 }, { "epoch": 3.77, "grad_norm": 6.1288604736328125, "learning_rate": 3.1479341058714892e-06, "loss": 0.0765, "step": 383200 }, { "epoch": 3.77, "grad_norm": 6.783116817474365, "learning_rate": 3.1478099834172404e-06, "loss": 0.1486, "step": 383225 }, { "epoch": 3.77, "grad_norm": 7.154374122619629, "learning_rate": 3.1476858609629917e-06, "loss": 0.0678, "step": 383250 }, { "epoch": 3.77, "grad_norm": 3.6951353549957275, "learning_rate": 3.1475617385087437e-06, "loss": 0.1927, "step": 383275 }, { "epoch": 3.77, "grad_norm": 7.814316749572754, "learning_rate": 3.147437616054495e-06, "loss": 0.0438, "step": 383300 }, { "epoch": 3.77, "grad_norm": 5.503223896026611, "learning_rate": 3.1473134936002466e-06, "loss": 0.193, "step": 383325 }, { "epoch": 3.77, "grad_norm": 9.185052871704102, "learning_rate": 3.147189371145998e-06, "loss": 0.0565, "step": 383350 }, { "epoch": 3.77, "grad_norm": 4.538112640380859, "learning_rate": 3.14706524869175e-06, "loss": 0.1938, "step": 383375 }, { "epoch": 3.77, "grad_norm": 4.748142242431641, "learning_rate": 3.146941126237501e-06, "loss": 0.0513, "step": 383400 }, { "epoch": 3.77, "grad_norm": 6.607964992523193, "learning_rate": 3.1468170037832523e-06, "loss": 0.183, "step": 383425 }, { "epoch": 3.77, "grad_norm": 7.635712623596191, "learning_rate": 3.1466928813290043e-06, "loss": 0.0713, "step": 383450 }, { "epoch": 3.77, "grad_norm": 7.374976634979248, "learning_rate": 3.1465687588747555e-06, "loss": 0.1795, "step": 383475 }, { "epoch": 3.77, "grad_norm": 2.5595085620880127, "learning_rate": 3.146444636420507e-06, "loss": 0.067, "step": 383500 }, { "epoch": 3.77, "grad_norm": 5.53636360168457, "learning_rate": 3.146320513966259e-06, "loss": 0.1788, "step": 383525 }, { "epoch": 3.77, "grad_norm": 9.430584907531738, "learning_rate": 3.1461963915120104e-06, "loss": 0.0592, "step": 383550 }, { "epoch": 3.77, "grad_norm": 5.424056529998779, "learning_rate": 3.1460722690577616e-06, "loss": 0.1755, "step": 383575 }, { "epoch": 3.77, "grad_norm": 14.441227912902832, "learning_rate": 3.1459481466035137e-06, "loss": 0.1149, "step": 383600 }, { "epoch": 3.77, "grad_norm": 4.624031066894531, "learning_rate": 3.145824024149265e-06, "loss": 0.1844, "step": 383625 }, { "epoch": 3.77, "grad_norm": 9.832578659057617, "learning_rate": 3.145699901695016e-06, "loss": 0.0674, "step": 383650 }, { "epoch": 3.77, "grad_norm": 7.40965461730957, "learning_rate": 3.145575779240768e-06, "loss": 0.187, "step": 383675 }, { "epoch": 3.77, "grad_norm": 10.611435890197754, "learning_rate": 3.1454516567865194e-06, "loss": 0.0743, "step": 383700 }, { "epoch": 3.77, "grad_norm": 4.852793216705322, "learning_rate": 3.145327534332271e-06, "loss": 0.2002, "step": 383725 }, { "epoch": 3.77, "grad_norm": 5.648773670196533, "learning_rate": 3.1452034118780227e-06, "loss": 0.0616, "step": 383750 }, { "epoch": 3.77, "grad_norm": 6.447330474853516, "learning_rate": 3.1450792894237743e-06, "loss": 0.2006, "step": 383775 }, { "epoch": 3.77, "grad_norm": 5.386742115020752, "learning_rate": 3.1449551669695255e-06, "loss": 0.0643, "step": 383800 }, { "epoch": 3.77, "grad_norm": 4.694815158843994, "learning_rate": 3.1448310445152776e-06, "loss": 0.19, "step": 383825 }, { "epoch": 3.77, "grad_norm": 2.6405036449432373, "learning_rate": 3.1447069220610288e-06, "loss": 0.0816, "step": 383850 }, { "epoch": 3.77, "grad_norm": 3.9305918216705322, "learning_rate": 3.14458279960678e-06, "loss": 0.1734, "step": 383875 }, { "epoch": 3.77, "grad_norm": 10.426668167114258, "learning_rate": 3.144458677152532e-06, "loss": 0.0815, "step": 383900 }, { "epoch": 3.77, "grad_norm": 6.354162216186523, "learning_rate": 3.1443345546982833e-06, "loss": 0.2032, "step": 383925 }, { "epoch": 3.78, "grad_norm": 8.331934928894043, "learning_rate": 3.144210432244035e-06, "loss": 0.0814, "step": 383950 }, { "epoch": 3.78, "grad_norm": 4.375941753387451, "learning_rate": 3.1440863097897865e-06, "loss": 0.1625, "step": 383975 }, { "epoch": 3.78, "grad_norm": 9.9966459274292, "learning_rate": 3.143962187335538e-06, "loss": 0.0753, "step": 384000 }, { "epoch": 3.78, "grad_norm": 4.532400608062744, "learning_rate": 3.1438380648812894e-06, "loss": 0.1793, "step": 384025 }, { "epoch": 3.78, "grad_norm": 11.123298645019531, "learning_rate": 3.1437139424270414e-06, "loss": 0.0831, "step": 384050 }, { "epoch": 3.78, "grad_norm": 6.358171463012695, "learning_rate": 3.1435898199727926e-06, "loss": 0.1848, "step": 384075 }, { "epoch": 3.78, "grad_norm": 12.206095695495605, "learning_rate": 3.143465697518544e-06, "loss": 0.0643, "step": 384100 }, { "epoch": 3.78, "grad_norm": 6.61733341217041, "learning_rate": 3.143341575064296e-06, "loss": 0.1891, "step": 384125 }, { "epoch": 3.78, "grad_norm": 3.7827930450439453, "learning_rate": 3.143217452610047e-06, "loss": 0.084, "step": 384150 }, { "epoch": 3.78, "grad_norm": 6.363836288452148, "learning_rate": 3.1430933301557988e-06, "loss": 0.1728, "step": 384175 }, { "epoch": 3.78, "grad_norm": 9.655852317810059, "learning_rate": 3.1429692077015504e-06, "loss": 0.0856, "step": 384200 }, { "epoch": 3.78, "grad_norm": 5.192920207977295, "learning_rate": 3.142845085247302e-06, "loss": 0.1727, "step": 384225 }, { "epoch": 3.78, "grad_norm": 12.412688255310059, "learning_rate": 3.1427209627930532e-06, "loss": 0.0865, "step": 384250 }, { "epoch": 3.78, "grad_norm": 14.11416244506836, "learning_rate": 3.1425968403388045e-06, "loss": 0.1845, "step": 384275 }, { "epoch": 3.78, "grad_norm": 6.414497375488281, "learning_rate": 3.1424727178845565e-06, "loss": 0.064, "step": 384300 }, { "epoch": 3.78, "grad_norm": 5.8853373527526855, "learning_rate": 3.1423485954303077e-06, "loss": 0.1633, "step": 384325 }, { "epoch": 3.78, "grad_norm": 3.382824420928955, "learning_rate": 3.1422244729760594e-06, "loss": 0.0776, "step": 384350 }, { "epoch": 3.78, "grad_norm": 4.636228561401367, "learning_rate": 3.142100350521811e-06, "loss": 0.1316, "step": 384375 }, { "epoch": 3.78, "grad_norm": 5.036632537841797, "learning_rate": 3.1419762280675626e-06, "loss": 0.0688, "step": 384400 }, { "epoch": 3.78, "grad_norm": 5.870255947113037, "learning_rate": 3.141852105613314e-06, "loss": 0.1863, "step": 384425 }, { "epoch": 3.78, "grad_norm": 10.096023559570312, "learning_rate": 3.141727983159066e-06, "loss": 0.0926, "step": 384450 }, { "epoch": 3.78, "grad_norm": 4.037776947021484, "learning_rate": 3.141603860704817e-06, "loss": 0.1998, "step": 384475 }, { "epoch": 3.78, "grad_norm": 7.267941951751709, "learning_rate": 3.1414797382505683e-06, "loss": 0.0536, "step": 384500 }, { "epoch": 3.78, "grad_norm": 6.649649143218994, "learning_rate": 3.1413556157963204e-06, "loss": 0.1773, "step": 384525 }, { "epoch": 3.78, "grad_norm": 3.9874324798583984, "learning_rate": 3.1412314933420716e-06, "loss": 0.0808, "step": 384550 }, { "epoch": 3.78, "grad_norm": 3.0367627143859863, "learning_rate": 3.1411073708878232e-06, "loss": 0.1526, "step": 384575 }, { "epoch": 3.78, "grad_norm": 0.4334578216075897, "learning_rate": 3.140983248433575e-06, "loss": 0.0943, "step": 384600 }, { "epoch": 3.78, "grad_norm": 4.201465129852295, "learning_rate": 3.1408591259793265e-06, "loss": 0.1737, "step": 384625 }, { "epoch": 3.78, "grad_norm": 1.0882744789123535, "learning_rate": 3.1407350035250777e-06, "loss": 0.0514, "step": 384650 }, { "epoch": 3.78, "grad_norm": 4.497485637664795, "learning_rate": 3.1406108810708298e-06, "loss": 0.1928, "step": 384675 }, { "epoch": 3.78, "grad_norm": 8.796998977661133, "learning_rate": 3.140486758616581e-06, "loss": 0.075, "step": 384700 }, { "epoch": 3.78, "grad_norm": 6.209630489349365, "learning_rate": 3.140362636162332e-06, "loss": 0.1817, "step": 384725 }, { "epoch": 3.78, "grad_norm": 13.438304901123047, "learning_rate": 3.1402385137080843e-06, "loss": 0.0689, "step": 384750 }, { "epoch": 3.78, "grad_norm": 5.372830867767334, "learning_rate": 3.1401193561520053e-06, "loss": 0.2064, "step": 384775 }, { "epoch": 3.78, "grad_norm": 6.420157432556152, "learning_rate": 3.1399952336977573e-06, "loss": 0.0486, "step": 384800 }, { "epoch": 3.78, "grad_norm": 5.040960788726807, "learning_rate": 3.1398711112435085e-06, "loss": 0.1477, "step": 384825 }, { "epoch": 3.78, "grad_norm": 6.559349536895752, "learning_rate": 3.13974698878926e-06, "loss": 0.0784, "step": 384850 }, { "epoch": 3.78, "grad_norm": 5.235971927642822, "learning_rate": 3.1396228663350114e-06, "loss": 0.2, "step": 384875 }, { "epoch": 3.78, "grad_norm": 5.767775058746338, "learning_rate": 3.1394987438807635e-06, "loss": 0.0706, "step": 384900 }, { "epoch": 3.78, "grad_norm": 4.238391399383545, "learning_rate": 3.1393746214265147e-06, "loss": 0.1925, "step": 384925 }, { "epoch": 3.78, "grad_norm": 12.101187705993652, "learning_rate": 3.139250498972266e-06, "loss": 0.0812, "step": 384950 }, { "epoch": 3.79, "grad_norm": 5.829641819000244, "learning_rate": 3.139126376518018e-06, "loss": 0.1969, "step": 384975 }, { "epoch": 3.79, "grad_norm": 5.268864631652832, "learning_rate": 3.139002254063769e-06, "loss": 0.0852, "step": 385000 }, { "epoch": 3.79, "grad_norm": 4.021063804626465, "learning_rate": 3.1388781316095208e-06, "loss": 0.2032, "step": 385025 }, { "epoch": 3.79, "grad_norm": 6.334819316864014, "learning_rate": 3.1387540091552724e-06, "loss": 0.0668, "step": 385050 }, { "epoch": 3.79, "grad_norm": 4.952171802520752, "learning_rate": 3.138629886701024e-06, "loss": 0.1982, "step": 385075 }, { "epoch": 3.79, "grad_norm": 7.162252426147461, "learning_rate": 3.1385057642467753e-06, "loss": 0.0924, "step": 385100 }, { "epoch": 3.79, "grad_norm": 7.404931545257568, "learning_rate": 3.1383816417925273e-06, "loss": 0.1953, "step": 385125 }, { "epoch": 3.79, "grad_norm": 13.080056190490723, "learning_rate": 3.1382575193382785e-06, "loss": 0.075, "step": 385150 }, { "epoch": 3.79, "grad_norm": 5.142355442047119, "learning_rate": 3.13813339688403e-06, "loss": 0.2107, "step": 385175 }, { "epoch": 3.79, "grad_norm": 9.31796932220459, "learning_rate": 3.138009274429782e-06, "loss": 0.0722, "step": 385200 }, { "epoch": 3.79, "grad_norm": 3.9594295024871826, "learning_rate": 3.1378851519755334e-06, "loss": 0.1722, "step": 385225 }, { "epoch": 3.79, "grad_norm": 13.095108985900879, "learning_rate": 3.1377610295212847e-06, "loss": 0.0851, "step": 385250 }, { "epoch": 3.79, "grad_norm": 7.062378883361816, "learning_rate": 3.1376369070670363e-06, "loss": 0.1719, "step": 385275 }, { "epoch": 3.79, "grad_norm": 8.72610092163086, "learning_rate": 3.137512784612788e-06, "loss": 0.0926, "step": 385300 }, { "epoch": 3.79, "grad_norm": 5.487646579742432, "learning_rate": 3.137388662158539e-06, "loss": 0.1972, "step": 385325 }, { "epoch": 3.79, "grad_norm": 9.092977523803711, "learning_rate": 3.137264539704291e-06, "loss": 0.081, "step": 385350 }, { "epoch": 3.79, "grad_norm": 7.385397911071777, "learning_rate": 3.1371404172500424e-06, "loss": 0.1956, "step": 385375 }, { "epoch": 3.79, "grad_norm": 9.385571479797363, "learning_rate": 3.137016294795794e-06, "loss": 0.0561, "step": 385400 }, { "epoch": 3.79, "grad_norm": 6.872321605682373, "learning_rate": 3.1368921723415457e-06, "loss": 0.1589, "step": 385425 }, { "epoch": 3.79, "grad_norm": 7.870985984802246, "learning_rate": 3.1367680498872973e-06, "loss": 0.0691, "step": 385450 }, { "epoch": 3.79, "grad_norm": 4.6871466636657715, "learning_rate": 3.1366439274330485e-06, "loss": 0.1772, "step": 385475 }, { "epoch": 3.79, "grad_norm": 1.5829381942749023, "learning_rate": 3.1365198049788006e-06, "loss": 0.0851, "step": 385500 }, { "epoch": 3.79, "grad_norm": 4.190202236175537, "learning_rate": 3.1363956825245518e-06, "loss": 0.2061, "step": 385525 }, { "epoch": 3.79, "grad_norm": 14.92074966430664, "learning_rate": 3.136271560070303e-06, "loss": 0.0845, "step": 385550 }, { "epoch": 3.79, "grad_norm": 4.665616035461426, "learning_rate": 3.136147437616055e-06, "loss": 0.1783, "step": 385575 }, { "epoch": 3.79, "grad_norm": 9.417333602905273, "learning_rate": 3.1360233151618063e-06, "loss": 0.0988, "step": 385600 }, { "epoch": 3.79, "grad_norm": 4.027190208435059, "learning_rate": 3.135899192707558e-06, "loss": 0.2096, "step": 385625 }, { "epoch": 3.79, "grad_norm": 5.10417366027832, "learning_rate": 3.1357750702533095e-06, "loss": 0.0544, "step": 385650 }, { "epoch": 3.79, "grad_norm": 5.89746618270874, "learning_rate": 3.135650947799061e-06, "loss": 0.1604, "step": 385675 }, { "epoch": 3.79, "grad_norm": 10.279914855957031, "learning_rate": 3.1355268253448124e-06, "loss": 0.0626, "step": 385700 }, { "epoch": 3.79, "grad_norm": 5.648159980773926, "learning_rate": 3.1354027028905636e-06, "loss": 0.1597, "step": 385725 }, { "epoch": 3.79, "grad_norm": 2.75486159324646, "learning_rate": 3.1352785804363157e-06, "loss": 0.0739, "step": 385750 }, { "epoch": 3.79, "grad_norm": 4.917194843292236, "learning_rate": 3.135154457982067e-06, "loss": 0.226, "step": 385775 }, { "epoch": 3.79, "grad_norm": 13.767590522766113, "learning_rate": 3.1350303355278185e-06, "loss": 0.0702, "step": 385800 }, { "epoch": 3.79, "grad_norm": 3.0655477046966553, "learning_rate": 3.13490621307357e-06, "loss": 0.1996, "step": 385825 }, { "epoch": 3.79, "grad_norm": 11.058534622192383, "learning_rate": 3.1347820906193218e-06, "loss": 0.0801, "step": 385850 }, { "epoch": 3.79, "grad_norm": 4.572622299194336, "learning_rate": 3.134657968165073e-06, "loss": 0.1861, "step": 385875 }, { "epoch": 3.79, "grad_norm": 5.900652885437012, "learning_rate": 3.134533845710825e-06, "loss": 0.0792, "step": 385900 }, { "epoch": 3.79, "grad_norm": 5.362847805023193, "learning_rate": 3.1344097232565763e-06, "loss": 0.1608, "step": 385925 }, { "epoch": 3.79, "grad_norm": 8.528621673583984, "learning_rate": 3.1342856008023275e-06, "loss": 0.0688, "step": 385950 }, { "epoch": 3.79, "grad_norm": 6.073687553405762, "learning_rate": 3.1341614783480795e-06, "loss": 0.1763, "step": 385975 }, { "epoch": 3.8, "grad_norm": 5.486950874328613, "learning_rate": 3.1340373558938307e-06, "loss": 0.0752, "step": 386000 }, { "epoch": 3.8, "grad_norm": 3.0168423652648926, "learning_rate": 3.1339132334395824e-06, "loss": 0.173, "step": 386025 }, { "epoch": 3.8, "grad_norm": 13.703452110290527, "learning_rate": 3.133789110985334e-06, "loss": 0.0701, "step": 386050 }, { "epoch": 3.8, "grad_norm": 4.855120658874512, "learning_rate": 3.1336649885310856e-06, "loss": 0.1728, "step": 386075 }, { "epoch": 3.8, "grad_norm": 7.099119186401367, "learning_rate": 3.133540866076837e-06, "loss": 0.0839, "step": 386100 }, { "epoch": 3.8, "grad_norm": 4.231453895568848, "learning_rate": 3.133416743622589e-06, "loss": 0.1731, "step": 386125 }, { "epoch": 3.8, "grad_norm": 8.80873966217041, "learning_rate": 3.13329262116834e-06, "loss": 0.0688, "step": 386150 }, { "epoch": 3.8, "grad_norm": 3.5597951412200928, "learning_rate": 3.1331684987140913e-06, "loss": 0.1508, "step": 386175 }, { "epoch": 3.8, "grad_norm": 9.252758026123047, "learning_rate": 3.1330443762598434e-06, "loss": 0.0919, "step": 386200 }, { "epoch": 3.8, "grad_norm": 3.7880029678344727, "learning_rate": 3.1329202538055946e-06, "loss": 0.1704, "step": 386225 }, { "epoch": 3.8, "grad_norm": 6.17334508895874, "learning_rate": 3.1327961313513462e-06, "loss": 0.0756, "step": 386250 }, { "epoch": 3.8, "grad_norm": 5.210205554962158, "learning_rate": 3.132672008897098e-06, "loss": 0.1628, "step": 386275 }, { "epoch": 3.8, "grad_norm": 13.125102043151855, "learning_rate": 3.1325478864428495e-06, "loss": 0.0569, "step": 386300 }, { "epoch": 3.8, "grad_norm": 4.3064069747924805, "learning_rate": 3.1324237639886007e-06, "loss": 0.155, "step": 386325 }, { "epoch": 3.8, "grad_norm": 3.3825061321258545, "learning_rate": 3.1322996415343528e-06, "loss": 0.0724, "step": 386350 }, { "epoch": 3.8, "grad_norm": 4.355198860168457, "learning_rate": 3.132175519080104e-06, "loss": 0.1701, "step": 386375 }, { "epoch": 3.8, "grad_norm": 9.057283401489258, "learning_rate": 3.132051396625855e-06, "loss": 0.0581, "step": 386400 }, { "epoch": 3.8, "grad_norm": 4.792356967926025, "learning_rate": 3.1319272741716073e-06, "loss": 0.1742, "step": 386425 }, { "epoch": 3.8, "grad_norm": 8.849298477172852, "learning_rate": 3.1318031517173585e-06, "loss": 0.0805, "step": 386450 }, { "epoch": 3.8, "grad_norm": 6.735045433044434, "learning_rate": 3.13167902926311e-06, "loss": 0.1627, "step": 386475 }, { "epoch": 3.8, "grad_norm": 8.340621948242188, "learning_rate": 3.1315549068088617e-06, "loss": 0.078, "step": 386500 }, { "epoch": 3.8, "grad_norm": 3.89011812210083, "learning_rate": 3.1314307843546134e-06, "loss": 0.1708, "step": 386525 }, { "epoch": 3.8, "grad_norm": 5.0428996086120605, "learning_rate": 3.1313066619003646e-06, "loss": 0.0775, "step": 386550 }, { "epoch": 3.8, "grad_norm": 8.813433647155762, "learning_rate": 3.131182539446116e-06, "loss": 0.1744, "step": 386575 }, { "epoch": 3.8, "grad_norm": 16.31949234008789, "learning_rate": 3.131058416991868e-06, "loss": 0.0767, "step": 386600 }, { "epoch": 3.8, "grad_norm": 5.0084228515625, "learning_rate": 3.130934294537619e-06, "loss": 0.2, "step": 386625 }, { "epoch": 3.8, "grad_norm": 11.83435344696045, "learning_rate": 3.1308101720833707e-06, "loss": 0.0851, "step": 386650 }, { "epoch": 3.8, "grad_norm": 5.06184196472168, "learning_rate": 3.1306860496291223e-06, "loss": 0.1961, "step": 386675 }, { "epoch": 3.8, "grad_norm": 0.9856312274932861, "learning_rate": 3.130561927174874e-06, "loss": 0.0641, "step": 386700 }, { "epoch": 3.8, "grad_norm": 4.289599895477295, "learning_rate": 3.130437804720625e-06, "loss": 0.2112, "step": 386725 }, { "epoch": 3.8, "grad_norm": 3.9592535495758057, "learning_rate": 3.1303136822663772e-06, "loss": 0.0627, "step": 386750 }, { "epoch": 3.8, "grad_norm": 5.317160606384277, "learning_rate": 3.1301895598121285e-06, "loss": 0.1758, "step": 386775 }, { "epoch": 3.8, "grad_norm": 5.3912224769592285, "learning_rate": 3.1300654373578797e-06, "loss": 0.0899, "step": 386800 }, { "epoch": 3.8, "grad_norm": 4.909718036651611, "learning_rate": 3.1299413149036317e-06, "loss": 0.2055, "step": 386825 }, { "epoch": 3.8, "grad_norm": 3.5076892375946045, "learning_rate": 3.129817192449383e-06, "loss": 0.0678, "step": 386850 }, { "epoch": 3.8, "grad_norm": 6.257256507873535, "learning_rate": 3.1296930699951346e-06, "loss": 0.1917, "step": 386875 }, { "epoch": 3.8, "grad_norm": 11.818492889404297, "learning_rate": 3.129568947540886e-06, "loss": 0.0613, "step": 386900 }, { "epoch": 3.8, "grad_norm": 4.142484664916992, "learning_rate": 3.129444825086638e-06, "loss": 0.1728, "step": 386925 }, { "epoch": 3.8, "grad_norm": 4.691737174987793, "learning_rate": 3.129320702632389e-06, "loss": 0.051, "step": 386950 }, { "epoch": 3.8, "grad_norm": 4.7374653816223145, "learning_rate": 3.129196580178141e-06, "loss": 0.1941, "step": 386975 }, { "epoch": 3.81, "grad_norm": 11.578852653503418, "learning_rate": 3.1290724577238923e-06, "loss": 0.0752, "step": 387000 }, { "epoch": 3.81, "grad_norm": 15.535133361816406, "learning_rate": 3.128953300167814e-06, "loss": 0.1922, "step": 387025 }, { "epoch": 3.81, "grad_norm": 4.935701370239258, "learning_rate": 3.1288291777135654e-06, "loss": 0.0703, "step": 387050 }, { "epoch": 3.81, "grad_norm": 6.660923004150391, "learning_rate": 3.1287050552593166e-06, "loss": 0.1906, "step": 387075 }, { "epoch": 3.81, "grad_norm": 6.593482971191406, "learning_rate": 3.1285809328050687e-06, "loss": 0.0922, "step": 387100 }, { "epoch": 3.81, "grad_norm": 4.528595924377441, "learning_rate": 3.12845681035082e-06, "loss": 0.144, "step": 387125 }, { "epoch": 3.81, "grad_norm": 5.423485279083252, "learning_rate": 3.1283326878965715e-06, "loss": 0.0517, "step": 387150 }, { "epoch": 3.81, "grad_norm": 6.168418884277344, "learning_rate": 3.1282085654423227e-06, "loss": 0.2035, "step": 387175 }, { "epoch": 3.81, "grad_norm": 5.83567476272583, "learning_rate": 3.128084442988075e-06, "loss": 0.0897, "step": 387200 }, { "epoch": 3.81, "grad_norm": 4.511355400085449, "learning_rate": 3.127960320533826e-06, "loss": 0.1921, "step": 387225 }, { "epoch": 3.81, "grad_norm": 4.138084888458252, "learning_rate": 3.1278361980795772e-06, "loss": 0.066, "step": 387250 }, { "epoch": 3.81, "grad_norm": 3.4511795043945312, "learning_rate": 3.1277120756253293e-06, "loss": 0.1876, "step": 387275 }, { "epoch": 3.81, "grad_norm": 8.828714370727539, "learning_rate": 3.1275879531710805e-06, "loss": 0.0848, "step": 387300 }, { "epoch": 3.81, "grad_norm": 4.670438289642334, "learning_rate": 3.127463830716832e-06, "loss": 0.2324, "step": 387325 }, { "epoch": 3.81, "grad_norm": 13.120875358581543, "learning_rate": 3.1273397082625838e-06, "loss": 0.0779, "step": 387350 }, { "epoch": 3.81, "grad_norm": 4.65997838973999, "learning_rate": 3.1272155858083354e-06, "loss": 0.1639, "step": 387375 }, { "epoch": 3.81, "grad_norm": 8.663178443908691, "learning_rate": 3.1270914633540866e-06, "loss": 0.0572, "step": 387400 }, { "epoch": 3.81, "grad_norm": 4.833852767944336, "learning_rate": 3.1269673408998387e-06, "loss": 0.1575, "step": 387425 }, { "epoch": 3.81, "grad_norm": 11.794718742370605, "learning_rate": 3.12684321844559e-06, "loss": 0.042, "step": 387450 }, { "epoch": 3.81, "grad_norm": 5.247845649719238, "learning_rate": 3.126719095991341e-06, "loss": 0.1419, "step": 387475 }, { "epoch": 3.81, "grad_norm": 9.140216827392578, "learning_rate": 3.126594973537093e-06, "loss": 0.1008, "step": 387500 }, { "epoch": 3.81, "grad_norm": 5.310014247894287, "learning_rate": 3.1264708510828444e-06, "loss": 0.1893, "step": 387525 }, { "epoch": 3.81, "grad_norm": 4.705896854400635, "learning_rate": 3.126346728628596e-06, "loss": 0.0622, "step": 387550 }, { "epoch": 3.81, "grad_norm": 5.989496231079102, "learning_rate": 3.1262226061743476e-06, "loss": 0.1865, "step": 387575 }, { "epoch": 3.81, "grad_norm": 4.301454544067383, "learning_rate": 3.1260984837200993e-06, "loss": 0.0526, "step": 387600 }, { "epoch": 3.81, "grad_norm": 6.103398323059082, "learning_rate": 3.1259743612658505e-06, "loss": 0.212, "step": 387625 }, { "epoch": 3.81, "grad_norm": 2.464378833770752, "learning_rate": 3.1258502388116025e-06, "loss": 0.0839, "step": 387650 }, { "epoch": 3.81, "grad_norm": 5.466170310974121, "learning_rate": 3.1257261163573537e-06, "loss": 0.1573, "step": 387675 }, { "epoch": 3.81, "grad_norm": 11.034626960754395, "learning_rate": 3.125601993903105e-06, "loss": 0.072, "step": 387700 }, { "epoch": 3.81, "grad_norm": 3.534634590148926, "learning_rate": 3.125477871448857e-06, "loss": 0.1994, "step": 387725 }, { "epoch": 3.81, "grad_norm": 5.794834136962891, "learning_rate": 3.1253537489946082e-06, "loss": 0.0868, "step": 387750 }, { "epoch": 3.81, "grad_norm": 5.216959476470947, "learning_rate": 3.12522962654036e-06, "loss": 0.1884, "step": 387775 }, { "epoch": 3.81, "grad_norm": 3.4727561473846436, "learning_rate": 3.1251055040861115e-06, "loss": 0.0922, "step": 387800 }, { "epoch": 3.81, "grad_norm": 6.591207027435303, "learning_rate": 3.124981381631863e-06, "loss": 0.1886, "step": 387825 }, { "epoch": 3.81, "grad_norm": 5.1294755935668945, "learning_rate": 3.1248572591776143e-06, "loss": 0.0493, "step": 387850 }, { "epoch": 3.81, "grad_norm": 4.362168788909912, "learning_rate": 3.1247331367233664e-06, "loss": 0.1891, "step": 387875 }, { "epoch": 3.81, "grad_norm": 6.02940559387207, "learning_rate": 3.1246090142691176e-06, "loss": 0.0817, "step": 387900 }, { "epoch": 3.81, "grad_norm": 4.04365873336792, "learning_rate": 3.124484891814869e-06, "loss": 0.1983, "step": 387925 }, { "epoch": 3.81, "grad_norm": 9.699912071228027, "learning_rate": 3.124360769360621e-06, "loss": 0.0634, "step": 387950 }, { "epoch": 3.81, "grad_norm": 9.03292179107666, "learning_rate": 3.124236646906372e-06, "loss": 0.1907, "step": 387975 }, { "epoch": 3.81, "grad_norm": 5.174799919128418, "learning_rate": 3.1241125244521237e-06, "loss": 0.0656, "step": 388000 }, { "epoch": 3.82, "grad_norm": 5.695305347442627, "learning_rate": 3.123988401997875e-06, "loss": 0.2035, "step": 388025 }, { "epoch": 3.82, "grad_norm": 10.665445327758789, "learning_rate": 3.123864279543627e-06, "loss": 0.0679, "step": 388050 }, { "epoch": 3.82, "grad_norm": 14.78122329711914, "learning_rate": 3.123740157089378e-06, "loss": 0.1585, "step": 388075 }, { "epoch": 3.82, "grad_norm": 8.125304222106934, "learning_rate": 3.12361603463513e-06, "loss": 0.0926, "step": 388100 }, { "epoch": 3.82, "grad_norm": 4.885652542114258, "learning_rate": 3.1234919121808815e-06, "loss": 0.2116, "step": 388125 }, { "epoch": 3.82, "grad_norm": 5.389511585235596, "learning_rate": 3.123367789726633e-06, "loss": 0.0688, "step": 388150 }, { "epoch": 3.82, "grad_norm": 4.62799596786499, "learning_rate": 3.1232436672723843e-06, "loss": 0.1434, "step": 388175 }, { "epoch": 3.82, "grad_norm": 9.879510879516602, "learning_rate": 3.1231195448181364e-06, "loss": 0.0685, "step": 388200 }, { "epoch": 3.82, "grad_norm": 5.489658832550049, "learning_rate": 3.1229954223638876e-06, "loss": 0.1822, "step": 388225 }, { "epoch": 3.82, "grad_norm": 8.87153148651123, "learning_rate": 3.122871299909639e-06, "loss": 0.0689, "step": 388250 }, { "epoch": 3.82, "grad_norm": 7.1177473068237305, "learning_rate": 3.122747177455391e-06, "loss": 0.1872, "step": 388275 }, { "epoch": 3.82, "grad_norm": 7.560352802276611, "learning_rate": 3.122623055001142e-06, "loss": 0.0737, "step": 388300 }, { "epoch": 3.82, "grad_norm": 4.085369110107422, "learning_rate": 3.1224989325468937e-06, "loss": 0.2033, "step": 388325 }, { "epoch": 3.82, "grad_norm": 22.27159309387207, "learning_rate": 3.1223748100926453e-06, "loss": 0.0744, "step": 388350 }, { "epoch": 3.82, "grad_norm": 5.109860420227051, "learning_rate": 3.122250687638397e-06, "loss": 0.2239, "step": 388375 }, { "epoch": 3.82, "grad_norm": 8.350436210632324, "learning_rate": 3.122126565184148e-06, "loss": 0.0681, "step": 388400 }, { "epoch": 3.82, "grad_norm": 6.1785173416137695, "learning_rate": 3.1220024427299002e-06, "loss": 0.1476, "step": 388425 }, { "epoch": 3.82, "grad_norm": 2.7245724201202393, "learning_rate": 3.1218783202756515e-06, "loss": 0.0696, "step": 388450 }, { "epoch": 3.82, "grad_norm": 5.418659687042236, "learning_rate": 3.1217541978214027e-06, "loss": 0.1951, "step": 388475 }, { "epoch": 3.82, "grad_norm": 4.242021560668945, "learning_rate": 3.1216300753671547e-06, "loss": 0.07, "step": 388500 }, { "epoch": 3.82, "grad_norm": 4.359046459197998, "learning_rate": 3.121505952912906e-06, "loss": 0.2028, "step": 388525 }, { "epoch": 3.82, "grad_norm": 0.6112359166145325, "learning_rate": 3.1213818304586576e-06, "loss": 0.0651, "step": 388550 }, { "epoch": 3.82, "grad_norm": 5.721122741699219, "learning_rate": 3.1212577080044092e-06, "loss": 0.1851, "step": 388575 }, { "epoch": 3.82, "grad_norm": 6.583786964416504, "learning_rate": 3.121133585550161e-06, "loss": 0.0795, "step": 388600 }, { "epoch": 3.82, "grad_norm": 4.528454780578613, "learning_rate": 3.121009463095912e-06, "loss": 0.1876, "step": 388625 }, { "epoch": 3.82, "grad_norm": 13.496177673339844, "learning_rate": 3.120885340641664e-06, "loss": 0.0912, "step": 388650 }, { "epoch": 3.82, "grad_norm": 3.845844268798828, "learning_rate": 3.1207612181874153e-06, "loss": 0.2102, "step": 388675 }, { "epoch": 3.82, "grad_norm": 2.8955860137939453, "learning_rate": 3.1206370957331665e-06, "loss": 0.0795, "step": 388700 }, { "epoch": 3.82, "grad_norm": 5.421259880065918, "learning_rate": 3.1205129732789186e-06, "loss": 0.1949, "step": 388725 }, { "epoch": 3.82, "grad_norm": 10.70284366607666, "learning_rate": 3.12038885082467e-06, "loss": 0.0773, "step": 388750 }, { "epoch": 3.82, "grad_norm": 10.048968315124512, "learning_rate": 3.1202647283704214e-06, "loss": 0.1784, "step": 388775 }, { "epoch": 3.82, "grad_norm": 8.01482105255127, "learning_rate": 3.120140605916173e-06, "loss": 0.082, "step": 388800 }, { "epoch": 3.82, "grad_norm": 5.478668212890625, "learning_rate": 3.1200164834619247e-06, "loss": 0.1993, "step": 388825 }, { "epoch": 3.82, "grad_norm": 8.214729309082031, "learning_rate": 3.119892361007676e-06, "loss": 0.0806, "step": 388850 }, { "epoch": 3.82, "grad_norm": 3.5494630336761475, "learning_rate": 3.119768238553427e-06, "loss": 0.1984, "step": 388875 }, { "epoch": 3.82, "grad_norm": 8.125401496887207, "learning_rate": 3.119644116099179e-06, "loss": 0.0667, "step": 388900 }, { "epoch": 3.82, "grad_norm": 5.395101547241211, "learning_rate": 3.1195199936449304e-06, "loss": 0.2064, "step": 388925 }, { "epoch": 3.82, "grad_norm": 3.867551803588867, "learning_rate": 3.119395871190682e-06, "loss": 0.0711, "step": 388950 }, { "epoch": 3.82, "grad_norm": 7.178216457366943, "learning_rate": 3.1192717487364337e-06, "loss": 0.2048, "step": 388975 }, { "epoch": 3.82, "grad_norm": 10.560683250427246, "learning_rate": 3.1191476262821853e-06, "loss": 0.0749, "step": 389000 }, { "epoch": 3.82, "grad_norm": 5.705696105957031, "learning_rate": 3.1190235038279365e-06, "loss": 0.1718, "step": 389025 }, { "epoch": 3.83, "grad_norm": 8.597506523132324, "learning_rate": 3.1188993813736886e-06, "loss": 0.0631, "step": 389050 }, { "epoch": 3.83, "grad_norm": 4.5994391441345215, "learning_rate": 3.11877525891944e-06, "loss": 0.1869, "step": 389075 }, { "epoch": 3.83, "grad_norm": 9.677616119384766, "learning_rate": 3.118651136465191e-06, "loss": 0.0696, "step": 389100 }, { "epoch": 3.83, "grad_norm": 3.8020622730255127, "learning_rate": 3.118527014010943e-06, "loss": 0.1812, "step": 389125 }, { "epoch": 3.83, "grad_norm": 11.449740409851074, "learning_rate": 3.1184028915566943e-06, "loss": 0.0859, "step": 389150 }, { "epoch": 3.83, "grad_norm": 4.1201066970825195, "learning_rate": 3.118278769102446e-06, "loss": 0.2045, "step": 389175 }, { "epoch": 3.83, "grad_norm": 0.2306024134159088, "learning_rate": 3.1181546466481975e-06, "loss": 0.0588, "step": 389200 }, { "epoch": 3.83, "grad_norm": 8.646291732788086, "learning_rate": 3.118030524193949e-06, "loss": 0.1688, "step": 389225 }, { "epoch": 3.83, "grad_norm": 17.64397621154785, "learning_rate": 3.1179064017397004e-06, "loss": 0.0766, "step": 389250 }, { "epoch": 3.83, "grad_norm": 4.856414794921875, "learning_rate": 3.1177822792854525e-06, "loss": 0.1769, "step": 389275 }, { "epoch": 3.83, "grad_norm": 5.698276042938232, "learning_rate": 3.1176581568312037e-06, "loss": 0.0619, "step": 389300 }, { "epoch": 3.83, "grad_norm": 7.085045337677002, "learning_rate": 3.117534034376955e-06, "loss": 0.1713, "step": 389325 }, { "epoch": 3.83, "grad_norm": 8.655935287475586, "learning_rate": 3.117409911922707e-06, "loss": 0.0854, "step": 389350 }, { "epoch": 3.83, "grad_norm": 4.410304069519043, "learning_rate": 3.117285789468458e-06, "loss": 0.1904, "step": 389375 }, { "epoch": 3.83, "grad_norm": 6.511792182922363, "learning_rate": 3.1171616670142098e-06, "loss": 0.086, "step": 389400 }, { "epoch": 3.83, "grad_norm": 6.520842552185059, "learning_rate": 3.1170375445599614e-06, "loss": 0.1945, "step": 389425 }, { "epoch": 3.83, "grad_norm": 4.988446235656738, "learning_rate": 3.116913422105713e-06, "loss": 0.0782, "step": 389450 }, { "epoch": 3.83, "grad_norm": 4.435455799102783, "learning_rate": 3.1167892996514643e-06, "loss": 0.1785, "step": 389475 }, { "epoch": 3.83, "grad_norm": 10.091157913208008, "learning_rate": 3.1166651771972163e-06, "loss": 0.0678, "step": 389500 }, { "epoch": 3.83, "grad_norm": 3.816373825073242, "learning_rate": 3.1165410547429675e-06, "loss": 0.143, "step": 389525 }, { "epoch": 3.83, "grad_norm": 12.777115821838379, "learning_rate": 3.1164169322887187e-06, "loss": 0.0862, "step": 389550 }, { "epoch": 3.83, "grad_norm": 5.478546142578125, "learning_rate": 3.116292809834471e-06, "loss": 0.2226, "step": 389575 }, { "epoch": 3.83, "grad_norm": 8.474282264709473, "learning_rate": 3.116168687380222e-06, "loss": 0.0748, "step": 389600 }, { "epoch": 3.83, "grad_norm": 6.422624588012695, "learning_rate": 3.1160445649259736e-06, "loss": 0.171, "step": 389625 }, { "epoch": 3.83, "grad_norm": 11.364516258239746, "learning_rate": 3.1159204424717253e-06, "loss": 0.0587, "step": 389650 }, { "epoch": 3.83, "grad_norm": 6.242223262786865, "learning_rate": 3.1158012849156467e-06, "loss": 0.2014, "step": 389675 }, { "epoch": 3.83, "grad_norm": 5.851877212524414, "learning_rate": 3.115677162461398e-06, "loss": 0.063, "step": 389700 }, { "epoch": 3.83, "grad_norm": 5.341370582580566, "learning_rate": 3.11555304000715e-06, "loss": 0.1815, "step": 389725 }, { "epoch": 3.83, "grad_norm": 8.378212928771973, "learning_rate": 3.1154289175529012e-06, "loss": 0.0602, "step": 389750 }, { "epoch": 3.83, "grad_norm": 4.698573112487793, "learning_rate": 3.1153047950986524e-06, "loss": 0.1961, "step": 389775 }, { "epoch": 3.83, "grad_norm": 15.793007850646973, "learning_rate": 3.1151806726444045e-06, "loss": 0.0755, "step": 389800 }, { "epoch": 3.83, "grad_norm": 4.549736022949219, "learning_rate": 3.1150565501901557e-06, "loss": 0.1556, "step": 389825 }, { "epoch": 3.83, "grad_norm": 8.349804878234863, "learning_rate": 3.1149324277359073e-06, "loss": 0.0828, "step": 389850 }, { "epoch": 3.83, "grad_norm": 4.782054424285889, "learning_rate": 3.114808305281659e-06, "loss": 0.1724, "step": 389875 }, { "epoch": 3.83, "grad_norm": 6.473759174346924, "learning_rate": 3.1146841828274106e-06, "loss": 0.0787, "step": 389900 }, { "epoch": 3.83, "grad_norm": 5.007660865783691, "learning_rate": 3.114560060373162e-06, "loss": 0.1828, "step": 389925 }, { "epoch": 3.83, "grad_norm": 5.187551021575928, "learning_rate": 3.114435937918914e-06, "loss": 0.0843, "step": 389950 }, { "epoch": 3.83, "grad_norm": 7.897454261779785, "learning_rate": 3.114311815464665e-06, "loss": 0.2256, "step": 389975 }, { "epoch": 3.83, "grad_norm": 4.035530090332031, "learning_rate": 3.1141876930104163e-06, "loss": 0.0568, "step": 390000 }, { "epoch": 3.83, "grad_norm": 5.487347602844238, "learning_rate": 3.1140635705561684e-06, "loss": 0.1787, "step": 390025 }, { "epoch": 3.84, "grad_norm": 11.714007377624512, "learning_rate": 3.1139394481019196e-06, "loss": 0.075, "step": 390050 }, { "epoch": 3.84, "grad_norm": 6.019669532775879, "learning_rate": 3.113815325647671e-06, "loss": 0.1946, "step": 390075 }, { "epoch": 3.84, "grad_norm": 4.805275917053223, "learning_rate": 3.113691203193423e-06, "loss": 0.0736, "step": 390100 }, { "epoch": 3.84, "grad_norm": 5.278816223144531, "learning_rate": 3.1135670807391745e-06, "loss": 0.1854, "step": 390125 }, { "epoch": 3.84, "grad_norm": 0.6323800683021545, "learning_rate": 3.1134429582849257e-06, "loss": 0.0577, "step": 390150 }, { "epoch": 3.84, "grad_norm": 4.042634963989258, "learning_rate": 3.1133188358306777e-06, "loss": 0.154, "step": 390175 }, { "epoch": 3.84, "grad_norm": 9.24535846710205, "learning_rate": 3.113194713376429e-06, "loss": 0.0726, "step": 390200 }, { "epoch": 3.84, "grad_norm": 4.229316234588623, "learning_rate": 3.11307059092218e-06, "loss": 0.1839, "step": 390225 }, { "epoch": 3.84, "grad_norm": 3.0951645374298096, "learning_rate": 3.1129464684679322e-06, "loss": 0.0889, "step": 390250 }, { "epoch": 3.84, "grad_norm": 6.340314865112305, "learning_rate": 3.1128223460136834e-06, "loss": 0.1299, "step": 390275 }, { "epoch": 3.84, "grad_norm": 12.844826698303223, "learning_rate": 3.112698223559435e-06, "loss": 0.0993, "step": 390300 }, { "epoch": 3.84, "grad_norm": 5.11911153793335, "learning_rate": 3.1125741011051863e-06, "loss": 0.2183, "step": 390325 }, { "epoch": 3.84, "grad_norm": 8.084732055664062, "learning_rate": 3.1124499786509383e-06, "loss": 0.0999, "step": 390350 }, { "epoch": 3.84, "grad_norm": 9.698199272155762, "learning_rate": 3.1123258561966895e-06, "loss": 0.1801, "step": 390375 }, { "epoch": 3.84, "grad_norm": 6.218752384185791, "learning_rate": 3.1122017337424408e-06, "loss": 0.0654, "step": 390400 }, { "epoch": 3.84, "grad_norm": 3.1390531063079834, "learning_rate": 3.112077611288193e-06, "loss": 0.1832, "step": 390425 }, { "epoch": 3.84, "grad_norm": 13.940661430358887, "learning_rate": 3.111953488833944e-06, "loss": 0.0961, "step": 390450 }, { "epoch": 3.84, "grad_norm": 4.818326473236084, "learning_rate": 3.1118293663796957e-06, "loss": 0.2126, "step": 390475 }, { "epoch": 3.84, "grad_norm": 6.663196086883545, "learning_rate": 3.1117052439254473e-06, "loss": 0.0774, "step": 390500 }, { "epoch": 3.84, "grad_norm": 4.820048809051514, "learning_rate": 3.111581121471199e-06, "loss": 0.1559, "step": 390525 }, { "epoch": 3.84, "grad_norm": 7.821446418762207, "learning_rate": 3.11145699901695e-06, "loss": 0.0725, "step": 390550 }, { "epoch": 3.84, "grad_norm": 5.998834609985352, "learning_rate": 3.111332876562702e-06, "loss": 0.2145, "step": 390575 }, { "epoch": 3.84, "grad_norm": 6.565832138061523, "learning_rate": 3.1112087541084534e-06, "loss": 0.0831, "step": 390600 }, { "epoch": 3.84, "grad_norm": 4.819139003753662, "learning_rate": 3.1110846316542046e-06, "loss": 0.1545, "step": 390625 }, { "epoch": 3.84, "grad_norm": 8.463109016418457, "learning_rate": 3.1109605091999567e-06, "loss": 0.0817, "step": 390650 }, { "epoch": 3.84, "grad_norm": 6.764618396759033, "learning_rate": 3.110836386745708e-06, "loss": 0.1838, "step": 390675 }, { "epoch": 3.84, "grad_norm": 2.7696402072906494, "learning_rate": 3.1107122642914595e-06, "loss": 0.0526, "step": 390700 }, { "epoch": 3.84, "grad_norm": 3.4859018325805664, "learning_rate": 3.110588141837211e-06, "loss": 0.1809, "step": 390725 }, { "epoch": 3.84, "grad_norm": 10.756189346313477, "learning_rate": 3.110464019382963e-06, "loss": 0.0703, "step": 390750 }, { "epoch": 3.84, "grad_norm": 6.163197040557861, "learning_rate": 3.110339896928714e-06, "loss": 0.1862, "step": 390775 }, { "epoch": 3.84, "grad_norm": 10.112724304199219, "learning_rate": 3.110215774474466e-06, "loss": 0.0815, "step": 390800 }, { "epoch": 3.84, "grad_norm": 5.090561866760254, "learning_rate": 3.1100916520202173e-06, "loss": 0.1527, "step": 390825 }, { "epoch": 3.84, "grad_norm": 7.5519118309021, "learning_rate": 3.1099675295659685e-06, "loss": 0.079, "step": 390850 }, { "epoch": 3.84, "grad_norm": 6.591720104217529, "learning_rate": 3.1098434071117206e-06, "loss": 0.1998, "step": 390875 }, { "epoch": 3.84, "grad_norm": 5.037619590759277, "learning_rate": 3.1097192846574718e-06, "loss": 0.0725, "step": 390900 }, { "epoch": 3.84, "grad_norm": 5.525399208068848, "learning_rate": 3.1095951622032234e-06, "loss": 0.1916, "step": 390925 }, { "epoch": 3.84, "grad_norm": 16.517131805419922, "learning_rate": 3.109471039748975e-06, "loss": 0.0848, "step": 390950 }, { "epoch": 3.84, "grad_norm": 5.217976093292236, "learning_rate": 3.1093469172947267e-06, "loss": 0.1944, "step": 390975 }, { "epoch": 3.84, "grad_norm": 9.927956581115723, "learning_rate": 3.109222794840478e-06, "loss": 0.0601, "step": 391000 }, { "epoch": 3.84, "grad_norm": 4.534893989562988, "learning_rate": 3.10909867238623e-06, "loss": 0.1476, "step": 391025 }, { "epoch": 3.84, "grad_norm": 4.314038276672363, "learning_rate": 3.108974549931981e-06, "loss": 0.1127, "step": 391050 }, { "epoch": 3.85, "grad_norm": 4.310606956481934, "learning_rate": 3.1088504274777328e-06, "loss": 0.1892, "step": 391075 }, { "epoch": 3.85, "grad_norm": 12.52126693725586, "learning_rate": 3.1087263050234844e-06, "loss": 0.0994, "step": 391100 }, { "epoch": 3.85, "grad_norm": 3.855036497116089, "learning_rate": 3.108602182569236e-06, "loss": 0.1881, "step": 391125 }, { "epoch": 3.85, "grad_norm": 5.930612087249756, "learning_rate": 3.1084780601149873e-06, "loss": 0.0777, "step": 391150 }, { "epoch": 3.85, "grad_norm": 4.246500015258789, "learning_rate": 3.1083539376607385e-06, "loss": 0.1663, "step": 391175 }, { "epoch": 3.85, "grad_norm": 11.226313591003418, "learning_rate": 3.1082298152064905e-06, "loss": 0.0522, "step": 391200 }, { "epoch": 3.85, "grad_norm": 5.929023742675781, "learning_rate": 3.1081056927522417e-06, "loss": 0.1523, "step": 391225 }, { "epoch": 3.85, "grad_norm": 7.717013359069824, "learning_rate": 3.1079815702979934e-06, "loss": 0.066, "step": 391250 }, { "epoch": 3.85, "grad_norm": 4.232765197753906, "learning_rate": 3.107857447843745e-06, "loss": 0.1754, "step": 391275 }, { "epoch": 3.85, "grad_norm": 7.5883049964904785, "learning_rate": 3.1077333253894967e-06, "loss": 0.0865, "step": 391300 }, { "epoch": 3.85, "grad_norm": 5.455419540405273, "learning_rate": 3.107609202935248e-06, "loss": 0.1869, "step": 391325 }, { "epoch": 3.85, "grad_norm": 5.62143611907959, "learning_rate": 3.107485080481e-06, "loss": 0.0873, "step": 391350 }, { "epoch": 3.85, "grad_norm": 4.654149055480957, "learning_rate": 3.107360958026751e-06, "loss": 0.139, "step": 391375 }, { "epoch": 3.85, "grad_norm": 8.237573623657227, "learning_rate": 3.1072368355725023e-06, "loss": 0.0766, "step": 391400 }, { "epoch": 3.85, "grad_norm": 13.475412368774414, "learning_rate": 3.1071127131182544e-06, "loss": 0.145, "step": 391425 }, { "epoch": 3.85, "grad_norm": 5.983034133911133, "learning_rate": 3.1069885906640056e-06, "loss": 0.0814, "step": 391450 }, { "epoch": 3.85, "grad_norm": 6.080432891845703, "learning_rate": 3.1068644682097573e-06, "loss": 0.1526, "step": 391475 }, { "epoch": 3.85, "grad_norm": 13.604430198669434, "learning_rate": 3.106740345755509e-06, "loss": 0.0886, "step": 391500 }, { "epoch": 3.85, "grad_norm": 5.600903511047363, "learning_rate": 3.1066162233012605e-06, "loss": 0.1726, "step": 391525 }, { "epoch": 3.85, "grad_norm": 4.958635330200195, "learning_rate": 3.1064921008470117e-06, "loss": 0.071, "step": 391550 }, { "epoch": 3.85, "grad_norm": 4.892131328582764, "learning_rate": 3.106367978392764e-06, "loss": 0.1871, "step": 391575 }, { "epoch": 3.85, "grad_norm": 12.49624252319336, "learning_rate": 3.106243855938515e-06, "loss": 0.0734, "step": 391600 }, { "epoch": 3.85, "grad_norm": 4.539218902587891, "learning_rate": 3.1061197334842662e-06, "loss": 0.1698, "step": 391625 }, { "epoch": 3.85, "grad_norm": 8.234764099121094, "learning_rate": 3.1059956110300183e-06, "loss": 0.077, "step": 391650 }, { "epoch": 3.85, "grad_norm": 5.641556739807129, "learning_rate": 3.1058714885757695e-06, "loss": 0.2004, "step": 391675 }, { "epoch": 3.85, "grad_norm": 8.813301086425781, "learning_rate": 3.105747366121521e-06, "loss": 0.0878, "step": 391700 }, { "epoch": 3.85, "grad_norm": 3.6124427318573, "learning_rate": 3.1056232436672728e-06, "loss": 0.168, "step": 391725 }, { "epoch": 3.85, "grad_norm": 2.977829933166504, "learning_rate": 3.1054991212130244e-06, "loss": 0.086, "step": 391750 }, { "epoch": 3.85, "grad_norm": 4.134501934051514, "learning_rate": 3.1053749987587756e-06, "loss": 0.1676, "step": 391775 }, { "epoch": 3.85, "grad_norm": 9.01533317565918, "learning_rate": 3.1052508763045277e-06, "loss": 0.0646, "step": 391800 }, { "epoch": 3.85, "grad_norm": 4.408246040344238, "learning_rate": 3.105126753850279e-06, "loss": 0.2251, "step": 391825 }, { "epoch": 3.85, "grad_norm": 8.545998573303223, "learning_rate": 3.10500263139603e-06, "loss": 0.0893, "step": 391850 }, { "epoch": 3.85, "grad_norm": 4.835101127624512, "learning_rate": 3.104883473839952e-06, "loss": 0.1943, "step": 391875 }, { "epoch": 3.85, "grad_norm": 8.455462455749512, "learning_rate": 3.104759351385703e-06, "loss": 0.0669, "step": 391900 }, { "epoch": 3.85, "grad_norm": 3.569695472717285, "learning_rate": 3.104635228931455e-06, "loss": 0.1857, "step": 391925 }, { "epoch": 3.85, "grad_norm": 13.476670265197754, "learning_rate": 3.1045111064772064e-06, "loss": 0.0641, "step": 391950 }, { "epoch": 3.85, "grad_norm": 3.811229705810547, "learning_rate": 3.104386984022958e-06, "loss": 0.1958, "step": 391975 }, { "epoch": 3.85, "grad_norm": 3.7935373783111572, "learning_rate": 3.1042628615687093e-06, "loss": 0.0818, "step": 392000 }, { "epoch": 3.85, "grad_norm": 4.169443607330322, "learning_rate": 3.1041387391144613e-06, "loss": 0.148, "step": 392025 }, { "epoch": 3.85, "grad_norm": 5.256955623626709, "learning_rate": 3.1040146166602126e-06, "loss": 0.0656, "step": 392050 }, { "epoch": 3.85, "grad_norm": 4.800978183746338, "learning_rate": 3.1038904942059638e-06, "loss": 0.1908, "step": 392075 }, { "epoch": 3.86, "grad_norm": 11.872097969055176, "learning_rate": 3.103766371751716e-06, "loss": 0.0582, "step": 392100 }, { "epoch": 3.86, "grad_norm": 4.233293056488037, "learning_rate": 3.103642249297467e-06, "loss": 0.1795, "step": 392125 }, { "epoch": 3.86, "grad_norm": 16.925817489624023, "learning_rate": 3.1035181268432187e-06, "loss": 0.0699, "step": 392150 }, { "epoch": 3.86, "grad_norm": 4.926158905029297, "learning_rate": 3.1033940043889703e-06, "loss": 0.1931, "step": 392175 }, { "epoch": 3.86, "grad_norm": 6.571911811828613, "learning_rate": 3.103269881934722e-06, "loss": 0.0857, "step": 392200 }, { "epoch": 3.86, "grad_norm": 6.537509441375732, "learning_rate": 3.103145759480473e-06, "loss": 0.1676, "step": 392225 }, { "epoch": 3.86, "grad_norm": 9.675765991210938, "learning_rate": 3.103021637026225e-06, "loss": 0.0537, "step": 392250 }, { "epoch": 3.86, "grad_norm": 5.581658363342285, "learning_rate": 3.1028975145719764e-06, "loss": 0.1756, "step": 392275 }, { "epoch": 3.86, "grad_norm": 6.548645973205566, "learning_rate": 3.1027733921177276e-06, "loss": 0.0829, "step": 392300 }, { "epoch": 3.86, "grad_norm": 3.1850409507751465, "learning_rate": 3.1026492696634797e-06, "loss": 0.1694, "step": 392325 }, { "epoch": 3.86, "grad_norm": 7.600452423095703, "learning_rate": 3.102525147209231e-06, "loss": 0.0743, "step": 392350 }, { "epoch": 3.86, "grad_norm": 3.1468403339385986, "learning_rate": 3.1024010247549825e-06, "loss": 0.1963, "step": 392375 }, { "epoch": 3.86, "grad_norm": 13.339334487915039, "learning_rate": 3.102276902300734e-06, "loss": 0.0909, "step": 392400 }, { "epoch": 3.86, "grad_norm": 5.162622928619385, "learning_rate": 3.102152779846486e-06, "loss": 0.167, "step": 392425 }, { "epoch": 3.86, "grad_norm": 7.566372871398926, "learning_rate": 3.102028657392237e-06, "loss": 0.0644, "step": 392450 }, { "epoch": 3.86, "grad_norm": 8.077232360839844, "learning_rate": 3.101904534937989e-06, "loss": 0.2074, "step": 392475 }, { "epoch": 3.86, "grad_norm": 2.6525492668151855, "learning_rate": 3.1017804124837403e-06, "loss": 0.0691, "step": 392500 }, { "epoch": 3.86, "grad_norm": 5.400857925415039, "learning_rate": 3.1016562900294915e-06, "loss": 0.1912, "step": 392525 }, { "epoch": 3.86, "grad_norm": 8.055712699890137, "learning_rate": 3.1015321675752436e-06, "loss": 0.0653, "step": 392550 }, { "epoch": 3.86, "grad_norm": 3.8945975303649902, "learning_rate": 3.1014080451209948e-06, "loss": 0.1885, "step": 392575 }, { "epoch": 3.86, "grad_norm": 1.4584938287734985, "learning_rate": 3.1012839226667464e-06, "loss": 0.0608, "step": 392600 }, { "epoch": 3.86, "grad_norm": 3.8206489086151123, "learning_rate": 3.1011598002124976e-06, "loss": 0.1743, "step": 392625 }, { "epoch": 3.86, "grad_norm": 5.84746789932251, "learning_rate": 3.1010356777582497e-06, "loss": 0.0597, "step": 392650 }, { "epoch": 3.86, "grad_norm": 4.724675178527832, "learning_rate": 3.100911555304001e-06, "loss": 0.2038, "step": 392675 }, { "epoch": 3.86, "grad_norm": 10.800891876220703, "learning_rate": 3.100787432849752e-06, "loss": 0.0779, "step": 392700 }, { "epoch": 3.86, "grad_norm": 4.163821220397949, "learning_rate": 3.100663310395504e-06, "loss": 0.2174, "step": 392725 }, { "epoch": 3.86, "grad_norm": 12.793859481811523, "learning_rate": 3.1005391879412554e-06, "loss": 0.0545, "step": 392750 }, { "epoch": 3.86, "grad_norm": 8.856325149536133, "learning_rate": 3.100415065487007e-06, "loss": 0.1648, "step": 392775 }, { "epoch": 3.86, "grad_norm": 6.878993988037109, "learning_rate": 3.1002909430327586e-06, "loss": 0.0801, "step": 392800 }, { "epoch": 3.86, "grad_norm": 4.091322898864746, "learning_rate": 3.1001668205785103e-06, "loss": 0.178, "step": 392825 }, { "epoch": 3.86, "grad_norm": 12.90689754486084, "learning_rate": 3.1000426981242615e-06, "loss": 0.0778, "step": 392850 }, { "epoch": 3.86, "grad_norm": 4.875079154968262, "learning_rate": 3.0999185756700135e-06, "loss": 0.1904, "step": 392875 }, { "epoch": 3.86, "grad_norm": 25.721467971801758, "learning_rate": 3.0997944532157648e-06, "loss": 0.0932, "step": 392900 }, { "epoch": 3.86, "grad_norm": 4.780977725982666, "learning_rate": 3.099670330761516e-06, "loss": 0.1747, "step": 392925 }, { "epoch": 3.86, "grad_norm": 9.184414863586426, "learning_rate": 3.099546208307268e-06, "loss": 0.0827, "step": 392950 }, { "epoch": 3.86, "grad_norm": 4.694855690002441, "learning_rate": 3.0994220858530192e-06, "loss": 0.1798, "step": 392975 }, { "epoch": 3.86, "grad_norm": 5.967635154724121, "learning_rate": 3.099297963398771e-06, "loss": 0.062, "step": 393000 }, { "epoch": 3.86, "grad_norm": 5.578059196472168, "learning_rate": 3.0991738409445225e-06, "loss": 0.1837, "step": 393025 }, { "epoch": 3.86, "grad_norm": 7.538774490356445, "learning_rate": 3.099049718490274e-06, "loss": 0.0992, "step": 393050 }, { "epoch": 3.86, "grad_norm": 5.805355548858643, "learning_rate": 3.0989255960360254e-06, "loss": 0.2122, "step": 393075 }, { "epoch": 3.87, "grad_norm": 6.243109703063965, "learning_rate": 3.0988014735817774e-06, "loss": 0.0617, "step": 393100 }, { "epoch": 3.87, "grad_norm": 4.074704647064209, "learning_rate": 3.0986773511275286e-06, "loss": 0.1677, "step": 393125 }, { "epoch": 3.87, "grad_norm": 3.2047958374023438, "learning_rate": 3.09855322867328e-06, "loss": 0.057, "step": 393150 }, { "epoch": 3.87, "grad_norm": 9.485058784484863, "learning_rate": 3.098429106219032e-06, "loss": 0.1897, "step": 393175 }, { "epoch": 3.87, "grad_norm": 8.387953758239746, "learning_rate": 3.098304983764783e-06, "loss": 0.066, "step": 393200 }, { "epoch": 3.87, "grad_norm": 4.385645389556885, "learning_rate": 3.0981808613105347e-06, "loss": 0.2173, "step": 393225 }, { "epoch": 3.87, "grad_norm": 4.378373622894287, "learning_rate": 3.0980567388562864e-06, "loss": 0.0663, "step": 393250 }, { "epoch": 3.87, "grad_norm": 5.906747817993164, "learning_rate": 3.097932616402038e-06, "loss": 0.2005, "step": 393275 }, { "epoch": 3.87, "grad_norm": 5.755321979522705, "learning_rate": 3.0978084939477892e-06, "loss": 0.0698, "step": 393300 }, { "epoch": 3.87, "grad_norm": 4.792512893676758, "learning_rate": 3.0976843714935413e-06, "loss": 0.199, "step": 393325 }, { "epoch": 3.87, "grad_norm": 14.238938331604004, "learning_rate": 3.0975602490392925e-06, "loss": 0.0664, "step": 393350 }, { "epoch": 3.87, "grad_norm": 3.6404917240142822, "learning_rate": 3.0974361265850437e-06, "loss": 0.1802, "step": 393375 }, { "epoch": 3.87, "grad_norm": 7.724837779998779, "learning_rate": 3.0973120041307958e-06, "loss": 0.0855, "step": 393400 }, { "epoch": 3.87, "grad_norm": 4.594176769256592, "learning_rate": 3.097187881676547e-06, "loss": 0.1949, "step": 393425 }, { "epoch": 3.87, "grad_norm": 9.395286560058594, "learning_rate": 3.0970637592222986e-06, "loss": 0.0731, "step": 393450 }, { "epoch": 3.87, "grad_norm": 3.56353497505188, "learning_rate": 3.09693963676805e-06, "loss": 0.1587, "step": 393475 }, { "epoch": 3.87, "grad_norm": 19.922245025634766, "learning_rate": 3.096815514313802e-06, "loss": 0.0883, "step": 393500 }, { "epoch": 3.87, "grad_norm": 5.00341272354126, "learning_rate": 3.096691391859553e-06, "loss": 0.2062, "step": 393525 }, { "epoch": 3.87, "grad_norm": 14.296542167663574, "learning_rate": 3.0965672694053043e-06, "loss": 0.0578, "step": 393550 }, { "epoch": 3.87, "grad_norm": 2.8616154193878174, "learning_rate": 3.0964431469510564e-06, "loss": 0.1785, "step": 393575 }, { "epoch": 3.87, "grad_norm": 6.540980339050293, "learning_rate": 3.0963190244968076e-06, "loss": 0.0666, "step": 393600 }, { "epoch": 3.87, "grad_norm": 6.816886901855469, "learning_rate": 3.096194902042559e-06, "loss": 0.1586, "step": 393625 }, { "epoch": 3.87, "grad_norm": 15.649012565612793, "learning_rate": 3.096070779588311e-06, "loss": 0.0684, "step": 393650 }, { "epoch": 3.87, "grad_norm": 3.545579671859741, "learning_rate": 3.0959466571340625e-06, "loss": 0.1691, "step": 393675 }, { "epoch": 3.87, "grad_norm": 6.6993818283081055, "learning_rate": 3.0958225346798137e-06, "loss": 0.0732, "step": 393700 }, { "epoch": 3.87, "grad_norm": 5.554410457611084, "learning_rate": 3.0956984122255657e-06, "loss": 0.174, "step": 393725 }, { "epoch": 3.87, "grad_norm": 4.733217716217041, "learning_rate": 3.095574289771317e-06, "loss": 0.0506, "step": 393750 }, { "epoch": 3.87, "grad_norm": 5.329948425292969, "learning_rate": 3.095450167317068e-06, "loss": 0.1891, "step": 393775 }, { "epoch": 3.87, "grad_norm": 14.562947273254395, "learning_rate": 3.0953260448628202e-06, "loss": 0.0866, "step": 393800 }, { "epoch": 3.87, "grad_norm": 5.994865894317627, "learning_rate": 3.0952019224085714e-06, "loss": 0.201, "step": 393825 }, { "epoch": 3.87, "grad_norm": 10.74074935913086, "learning_rate": 3.095077799954323e-06, "loss": 0.0809, "step": 393850 }, { "epoch": 3.87, "grad_norm": 4.779068946838379, "learning_rate": 3.0949586423982445e-06, "loss": 0.1567, "step": 393875 }, { "epoch": 3.87, "grad_norm": 15.484197616577148, "learning_rate": 3.094834519943996e-06, "loss": 0.0815, "step": 393900 }, { "epoch": 3.87, "grad_norm": 17.407535552978516, "learning_rate": 3.094710397489748e-06, "loss": 0.1623, "step": 393925 }, { "epoch": 3.87, "grad_norm": 3.5460283756256104, "learning_rate": 3.0945862750354994e-06, "loss": 0.0715, "step": 393950 }, { "epoch": 3.87, "grad_norm": 4.263329982757568, "learning_rate": 3.0944621525812506e-06, "loss": 0.2192, "step": 393975 }, { "epoch": 3.87, "grad_norm": 8.894806861877441, "learning_rate": 3.0943380301270027e-06, "loss": 0.1002, "step": 394000 }, { "epoch": 3.87, "grad_norm": 8.722495079040527, "learning_rate": 3.094213907672754e-06, "loss": 0.2087, "step": 394025 }, { "epoch": 3.87, "grad_norm": 9.666182518005371, "learning_rate": 3.0940897852185055e-06, "loss": 0.0768, "step": 394050 }, { "epoch": 3.87, "grad_norm": 5.16067361831665, "learning_rate": 3.0939656627642568e-06, "loss": 0.2289, "step": 394075 }, { "epoch": 3.87, "grad_norm": 9.253955841064453, "learning_rate": 3.093841540310009e-06, "loss": 0.0754, "step": 394100 }, { "epoch": 3.88, "grad_norm": 2.0500402450561523, "learning_rate": 3.09371741785576e-06, "loss": 0.1751, "step": 394125 }, { "epoch": 3.88, "grad_norm": 4.415120601654053, "learning_rate": 3.0935932954015112e-06, "loss": 0.0464, "step": 394150 }, { "epoch": 3.88, "grad_norm": 8.106776237487793, "learning_rate": 3.0934691729472633e-06, "loss": 0.1855, "step": 394175 }, { "epoch": 3.88, "grad_norm": 3.5110280513763428, "learning_rate": 3.0933450504930145e-06, "loss": 0.0672, "step": 394200 }, { "epoch": 3.88, "grad_norm": 6.099782466888428, "learning_rate": 3.093220928038766e-06, "loss": 0.1936, "step": 394225 }, { "epoch": 3.88, "grad_norm": 5.325457572937012, "learning_rate": 3.0930968055845178e-06, "loss": 0.0738, "step": 394250 }, { "epoch": 3.88, "grad_norm": 8.527777671813965, "learning_rate": 3.0929726831302694e-06, "loss": 0.232, "step": 394275 }, { "epoch": 3.88, "grad_norm": 8.624693870544434, "learning_rate": 3.0928485606760206e-06, "loss": 0.0935, "step": 394300 }, { "epoch": 3.88, "grad_norm": 4.133591651916504, "learning_rate": 3.0927244382217727e-06, "loss": 0.1788, "step": 394325 }, { "epoch": 3.88, "grad_norm": 7.267733097076416, "learning_rate": 3.092600315767524e-06, "loss": 0.0702, "step": 394350 }, { "epoch": 3.88, "grad_norm": 10.771632194519043, "learning_rate": 3.092476193313275e-06, "loss": 0.1888, "step": 394375 }, { "epoch": 3.88, "grad_norm": 8.567148208618164, "learning_rate": 3.092352070859027e-06, "loss": 0.1024, "step": 394400 }, { "epoch": 3.88, "grad_norm": 6.333133220672607, "learning_rate": 3.0922279484047784e-06, "loss": 0.1886, "step": 394425 }, { "epoch": 3.88, "grad_norm": 5.8841753005981445, "learning_rate": 3.09210382595053e-06, "loss": 0.0699, "step": 394450 }, { "epoch": 3.88, "grad_norm": 4.242857456207275, "learning_rate": 3.0919797034962816e-06, "loss": 0.1525, "step": 394475 }, { "epoch": 3.88, "grad_norm": 8.343807220458984, "learning_rate": 3.0918555810420333e-06, "loss": 0.0598, "step": 394500 }, { "epoch": 3.88, "grad_norm": 5.0850396156311035, "learning_rate": 3.0917314585877845e-06, "loss": 0.176, "step": 394525 }, { "epoch": 3.88, "grad_norm": 6.5889763832092285, "learning_rate": 3.0916073361335365e-06, "loss": 0.0624, "step": 394550 }, { "epoch": 3.88, "grad_norm": 4.020398139953613, "learning_rate": 3.0914832136792878e-06, "loss": 0.2109, "step": 394575 }, { "epoch": 3.88, "grad_norm": 10.373992919921875, "learning_rate": 3.091359091225039e-06, "loss": 0.0839, "step": 394600 }, { "epoch": 3.88, "grad_norm": 4.345200538635254, "learning_rate": 3.091234968770791e-06, "loss": 0.1806, "step": 394625 }, { "epoch": 3.88, "grad_norm": 1.8705930709838867, "learning_rate": 3.0911108463165422e-06, "loss": 0.0756, "step": 394650 }, { "epoch": 3.88, "grad_norm": 22.965606689453125, "learning_rate": 3.090986723862294e-06, "loss": 0.1692, "step": 394675 }, { "epoch": 3.88, "grad_norm": 5.956781387329102, "learning_rate": 3.0908626014080455e-06, "loss": 0.0805, "step": 394700 }, { "epoch": 3.88, "grad_norm": 4.784816265106201, "learning_rate": 3.090738478953797e-06, "loss": 0.1937, "step": 394725 }, { "epoch": 3.88, "grad_norm": 6.923928260803223, "learning_rate": 3.0906143564995484e-06, "loss": 0.0888, "step": 394750 }, { "epoch": 3.88, "grad_norm": 4.210282325744629, "learning_rate": 3.0904902340453004e-06, "loss": 0.169, "step": 394775 }, { "epoch": 3.88, "grad_norm": 6.884840488433838, "learning_rate": 3.0903661115910516e-06, "loss": 0.0849, "step": 394800 }, { "epoch": 3.88, "grad_norm": 3.950047492980957, "learning_rate": 3.090241989136803e-06, "loss": 0.2089, "step": 394825 }, { "epoch": 3.88, "grad_norm": 13.149582862854004, "learning_rate": 3.090117866682555e-06, "loss": 0.0626, "step": 394850 }, { "epoch": 3.88, "grad_norm": 4.5118608474731445, "learning_rate": 3.089993744228306e-06, "loss": 0.1987, "step": 394875 }, { "epoch": 3.88, "grad_norm": 10.113042831420898, "learning_rate": 3.0898696217740577e-06, "loss": 0.0841, "step": 394900 }, { "epoch": 3.88, "grad_norm": 4.486703872680664, "learning_rate": 3.089745499319809e-06, "loss": 0.1909, "step": 394925 }, { "epoch": 3.88, "grad_norm": 6.814149856567383, "learning_rate": 3.089621376865561e-06, "loss": 0.0558, "step": 394950 }, { "epoch": 3.88, "grad_norm": 5.974793434143066, "learning_rate": 3.0894972544113122e-06, "loss": 0.2037, "step": 394975 }, { "epoch": 3.88, "grad_norm": 5.698663234710693, "learning_rate": 3.0893731319570634e-06, "loss": 0.1069, "step": 395000 }, { "epoch": 3.88, "grad_norm": 3.953885316848755, "learning_rate": 3.0892490095028155e-06, "loss": 0.1662, "step": 395025 }, { "epoch": 3.88, "grad_norm": 6.846494197845459, "learning_rate": 3.0891248870485667e-06, "loss": 0.0625, "step": 395050 }, { "epoch": 3.88, "grad_norm": 4.696717262268066, "learning_rate": 3.0890007645943183e-06, "loss": 0.1875, "step": 395075 }, { "epoch": 3.88, "grad_norm": 4.322030067443848, "learning_rate": 3.08887664214007e-06, "loss": 0.0707, "step": 395100 }, { "epoch": 3.88, "grad_norm": 3.8499999046325684, "learning_rate": 3.0887525196858216e-06, "loss": 0.1924, "step": 395125 }, { "epoch": 3.89, "grad_norm": 10.157114028930664, "learning_rate": 3.088628397231573e-06, "loss": 0.0764, "step": 395150 }, { "epoch": 3.89, "grad_norm": 3.6392877101898193, "learning_rate": 3.088504274777325e-06, "loss": 0.1737, "step": 395175 }, { "epoch": 3.89, "grad_norm": 13.00044059753418, "learning_rate": 3.088380152323076e-06, "loss": 0.0562, "step": 395200 }, { "epoch": 3.89, "grad_norm": 7.045124530792236, "learning_rate": 3.0882560298688273e-06, "loss": 0.2095, "step": 395225 }, { "epoch": 3.89, "grad_norm": 5.546828269958496, "learning_rate": 3.0881319074145794e-06, "loss": 0.0633, "step": 395250 }, { "epoch": 3.89, "grad_norm": 3.9045839309692383, "learning_rate": 3.0880077849603306e-06, "loss": 0.1826, "step": 395275 }, { "epoch": 3.89, "grad_norm": 6.22730827331543, "learning_rate": 3.087883662506082e-06, "loss": 0.0766, "step": 395300 }, { "epoch": 3.89, "grad_norm": 4.547641754150391, "learning_rate": 3.087759540051834e-06, "loss": 0.1631, "step": 395325 }, { "epoch": 3.89, "grad_norm": 10.159828186035156, "learning_rate": 3.0876354175975855e-06, "loss": 0.0632, "step": 395350 }, { "epoch": 3.89, "grad_norm": 3.545048952102661, "learning_rate": 3.0875112951433367e-06, "loss": 0.1657, "step": 395375 }, { "epoch": 3.89, "grad_norm": 10.184060096740723, "learning_rate": 3.0873871726890888e-06, "loss": 0.0955, "step": 395400 }, { "epoch": 3.89, "grad_norm": 4.647834777832031, "learning_rate": 3.08726305023484e-06, "loss": 0.1629, "step": 395425 }, { "epoch": 3.89, "grad_norm": 4.236319065093994, "learning_rate": 3.087138927780591e-06, "loss": 0.0485, "step": 395450 }, { "epoch": 3.89, "grad_norm": 5.853058815002441, "learning_rate": 3.0870148053263432e-06, "loss": 0.1612, "step": 395475 }, { "epoch": 3.89, "grad_norm": 6.2960615158081055, "learning_rate": 3.0868906828720944e-06, "loss": 0.0728, "step": 395500 }, { "epoch": 3.89, "grad_norm": 3.944958209991455, "learning_rate": 3.086766560417846e-06, "loss": 0.1471, "step": 395525 }, { "epoch": 3.89, "grad_norm": 4.253376483917236, "learning_rate": 3.0866424379635977e-06, "loss": 0.0822, "step": 395550 }, { "epoch": 3.89, "grad_norm": 3.5604989528656006, "learning_rate": 3.0865183155093493e-06, "loss": 0.2079, "step": 395575 }, { "epoch": 3.89, "grad_norm": 8.33820915222168, "learning_rate": 3.0863941930551006e-06, "loss": 0.0823, "step": 395600 }, { "epoch": 3.89, "grad_norm": 3.8768768310546875, "learning_rate": 3.0862700706008526e-06, "loss": 0.1825, "step": 395625 }, { "epoch": 3.89, "grad_norm": 3.1276466846466064, "learning_rate": 3.086145948146604e-06, "loss": 0.0609, "step": 395650 }, { "epoch": 3.89, "grad_norm": 5.217674255371094, "learning_rate": 3.086021825692355e-06, "loss": 0.1788, "step": 395675 }, { "epoch": 3.89, "grad_norm": 12.41965103149414, "learning_rate": 3.085897703238107e-06, "loss": 0.0933, "step": 395700 }, { "epoch": 3.89, "grad_norm": 7.159239768981934, "learning_rate": 3.0857735807838583e-06, "loss": 0.1767, "step": 395725 }, { "epoch": 3.89, "grad_norm": 7.6710076332092285, "learning_rate": 3.08564945832961e-06, "loss": 0.0505, "step": 395750 }, { "epoch": 3.89, "grad_norm": 11.789334297180176, "learning_rate": 3.085525335875361e-06, "loss": 0.2081, "step": 395775 }, { "epoch": 3.89, "grad_norm": 7.629930019378662, "learning_rate": 3.0854012134211132e-06, "loss": 0.086, "step": 395800 }, { "epoch": 3.89, "grad_norm": 4.716104984283447, "learning_rate": 3.0852770909668644e-06, "loss": 0.1696, "step": 395825 }, { "epoch": 3.89, "grad_norm": 8.489553451538086, "learning_rate": 3.0851529685126156e-06, "loss": 0.0616, "step": 395850 }, { "epoch": 3.89, "grad_norm": 5.848435878753662, "learning_rate": 3.0850288460583677e-06, "loss": 0.1836, "step": 395875 }, { "epoch": 3.89, "grad_norm": 18.116985321044922, "learning_rate": 3.084904723604119e-06, "loss": 0.0833, "step": 395900 }, { "epoch": 3.89, "grad_norm": 6.0433831214904785, "learning_rate": 3.0847806011498705e-06, "loss": 0.2036, "step": 395925 }, { "epoch": 3.89, "grad_norm": 10.519112586975098, "learning_rate": 3.084656478695622e-06, "loss": 0.0816, "step": 395950 }, { "epoch": 3.89, "grad_norm": 4.725085735321045, "learning_rate": 3.084532356241374e-06, "loss": 0.1688, "step": 395975 }, { "epoch": 3.89, "grad_norm": 11.211997032165527, "learning_rate": 3.084408233787125e-06, "loss": 0.094, "step": 396000 }, { "epoch": 3.89, "grad_norm": 5.245209693908691, "learning_rate": 3.084284111332877e-06, "loss": 0.1732, "step": 396025 }, { "epoch": 3.89, "grad_norm": 6.069971084594727, "learning_rate": 3.0841599888786283e-06, "loss": 0.0785, "step": 396050 }, { "epoch": 3.89, "grad_norm": 5.076736927032471, "learning_rate": 3.0840358664243795e-06, "loss": 0.1749, "step": 396075 }, { "epoch": 3.89, "grad_norm": 11.75646686553955, "learning_rate": 3.0839117439701316e-06, "loss": 0.0828, "step": 396100 }, { "epoch": 3.89, "grad_norm": 4.240447521209717, "learning_rate": 3.0837925864140526e-06, "loss": 0.2044, "step": 396125 }, { "epoch": 3.9, "grad_norm": 10.769010543823242, "learning_rate": 3.0836684639598047e-06, "loss": 0.0848, "step": 396150 }, { "epoch": 3.9, "grad_norm": 4.590118408203125, "learning_rate": 3.083544341505556e-06, "loss": 0.193, "step": 396175 }, { "epoch": 3.9, "grad_norm": 8.754886627197266, "learning_rate": 3.0834202190513075e-06, "loss": 0.0839, "step": 396200 }, { "epoch": 3.9, "grad_norm": 2.700096368789673, "learning_rate": 3.083296096597059e-06, "loss": 0.1926, "step": 396225 }, { "epoch": 3.9, "grad_norm": 12.304741859436035, "learning_rate": 3.0831719741428108e-06, "loss": 0.0739, "step": 396250 }, { "epoch": 3.9, "grad_norm": 4.696753025054932, "learning_rate": 3.083047851688562e-06, "loss": 0.2197, "step": 396275 }, { "epoch": 3.9, "grad_norm": 5.3845319747924805, "learning_rate": 3.082923729234314e-06, "loss": 0.0669, "step": 396300 }, { "epoch": 3.9, "grad_norm": 6.149573802947998, "learning_rate": 3.0827996067800652e-06, "loss": 0.1846, "step": 396325 }, { "epoch": 3.9, "grad_norm": 16.060855865478516, "learning_rate": 3.0826754843258165e-06, "loss": 0.0729, "step": 396350 }, { "epoch": 3.9, "grad_norm": 4.327937602996826, "learning_rate": 3.082551361871568e-06, "loss": 0.1611, "step": 396375 }, { "epoch": 3.9, "grad_norm": 6.981261730194092, "learning_rate": 3.0824272394173197e-06, "loss": 0.0603, "step": 396400 }, { "epoch": 3.9, "grad_norm": 4.771718502044678, "learning_rate": 3.0823031169630714e-06, "loss": 0.1912, "step": 396425 }, { "epoch": 3.9, "grad_norm": 1.83173668384552, "learning_rate": 3.0821789945088226e-06, "loss": 0.0748, "step": 396450 }, { "epoch": 3.9, "grad_norm": 3.3660411834716797, "learning_rate": 3.0820548720545746e-06, "loss": 0.1914, "step": 396475 }, { "epoch": 3.9, "grad_norm": 0.15090025961399078, "learning_rate": 3.081930749600326e-06, "loss": 0.0665, "step": 396500 }, { "epoch": 3.9, "grad_norm": 10.748024940490723, "learning_rate": 3.081806627146077e-06, "loss": 0.1563, "step": 396525 }, { "epoch": 3.9, "grad_norm": 3.4858109951019287, "learning_rate": 3.081682504691829e-06, "loss": 0.0684, "step": 396550 }, { "epoch": 3.9, "grad_norm": 4.844398498535156, "learning_rate": 3.0815583822375803e-06, "loss": 0.1823, "step": 396575 }, { "epoch": 3.9, "grad_norm": 7.7092108726501465, "learning_rate": 3.081434259783332e-06, "loss": 0.0578, "step": 396600 }, { "epoch": 3.9, "grad_norm": 5.980508804321289, "learning_rate": 3.0813101373290836e-06, "loss": 0.1792, "step": 396625 }, { "epoch": 3.9, "grad_norm": 8.248918533325195, "learning_rate": 3.0811860148748352e-06, "loss": 0.0768, "step": 396650 }, { "epoch": 3.9, "grad_norm": 5.576404571533203, "learning_rate": 3.0810618924205864e-06, "loss": 0.1922, "step": 396675 }, { "epoch": 3.9, "grad_norm": 8.9847993850708, "learning_rate": 3.0809377699663385e-06, "loss": 0.0948, "step": 396700 }, { "epoch": 3.9, "grad_norm": 4.295513153076172, "learning_rate": 3.0808136475120897e-06, "loss": 0.2024, "step": 396725 }, { "epoch": 3.9, "grad_norm": 7.688586235046387, "learning_rate": 3.080689525057841e-06, "loss": 0.0766, "step": 396750 }, { "epoch": 3.9, "grad_norm": 4.951910972595215, "learning_rate": 3.080565402603593e-06, "loss": 0.2366, "step": 396775 }, { "epoch": 3.9, "grad_norm": 6.389841556549072, "learning_rate": 3.080441280149344e-06, "loss": 0.0764, "step": 396800 }, { "epoch": 3.9, "grad_norm": 4.498983860015869, "learning_rate": 3.080317157695096e-06, "loss": 0.1706, "step": 396825 }, { "epoch": 3.9, "grad_norm": 8.199517250061035, "learning_rate": 3.0801930352408475e-06, "loss": 0.0804, "step": 396850 }, { "epoch": 3.9, "grad_norm": 6.665314674377441, "learning_rate": 3.080068912786599e-06, "loss": 0.1999, "step": 396875 }, { "epoch": 3.9, "grad_norm": 10.690201759338379, "learning_rate": 3.0799447903323503e-06, "loss": 0.0764, "step": 396900 }, { "epoch": 3.9, "grad_norm": 10.851581573486328, "learning_rate": 3.0798206678781024e-06, "loss": 0.1687, "step": 396925 }, { "epoch": 3.9, "grad_norm": 9.115705490112305, "learning_rate": 3.0796965454238536e-06, "loss": 0.0749, "step": 396950 }, { "epoch": 3.9, "grad_norm": 4.334133148193359, "learning_rate": 3.0795724229696052e-06, "loss": 0.1965, "step": 396975 }, { "epoch": 3.9, "grad_norm": 4.444189548492432, "learning_rate": 3.079448300515357e-06, "loss": 0.0539, "step": 397000 }, { "epoch": 3.9, "grad_norm": 5.385648250579834, "learning_rate": 3.0793241780611085e-06, "loss": 0.2014, "step": 397025 }, { "epoch": 3.9, "grad_norm": 11.87536334991455, "learning_rate": 3.0792000556068597e-06, "loss": 0.0772, "step": 397050 }, { "epoch": 3.9, "grad_norm": 5.675532341003418, "learning_rate": 3.0790759331526118e-06, "loss": 0.181, "step": 397075 }, { "epoch": 3.9, "grad_norm": 9.643973350524902, "learning_rate": 3.078951810698363e-06, "loss": 0.0575, "step": 397100 }, { "epoch": 3.9, "grad_norm": 8.359944343566895, "learning_rate": 3.078827688244114e-06, "loss": 0.1881, "step": 397125 }, { "epoch": 3.9, "grad_norm": 6.027689456939697, "learning_rate": 3.0787035657898662e-06, "loss": 0.0678, "step": 397150 }, { "epoch": 3.91, "grad_norm": 4.468578815460205, "learning_rate": 3.0785794433356174e-06, "loss": 0.1961, "step": 397175 }, { "epoch": 3.91, "grad_norm": 6.637608051300049, "learning_rate": 3.078455320881369e-06, "loss": 0.0744, "step": 397200 }, { "epoch": 3.91, "grad_norm": 6.187272071838379, "learning_rate": 3.0783311984271203e-06, "loss": 0.1633, "step": 397225 }, { "epoch": 3.91, "grad_norm": 10.489646911621094, "learning_rate": 3.0782070759728724e-06, "loss": 0.0733, "step": 397250 }, { "epoch": 3.91, "grad_norm": 4.952552795410156, "learning_rate": 3.0780829535186236e-06, "loss": 0.1798, "step": 397275 }, { "epoch": 3.91, "grad_norm": 21.420276641845703, "learning_rate": 3.0779588310643748e-06, "loss": 0.1054, "step": 397300 }, { "epoch": 3.91, "grad_norm": 3.9382851123809814, "learning_rate": 3.077834708610127e-06, "loss": 0.1958, "step": 397325 }, { "epoch": 3.91, "grad_norm": 3.888315200805664, "learning_rate": 3.077710586155878e-06, "loss": 0.0736, "step": 397350 }, { "epoch": 3.91, "grad_norm": 4.2316460609436035, "learning_rate": 3.0775864637016297e-06, "loss": 0.2249, "step": 397375 }, { "epoch": 3.91, "grad_norm": 5.191118240356445, "learning_rate": 3.0774623412473813e-06, "loss": 0.0578, "step": 397400 }, { "epoch": 3.91, "grad_norm": 4.596349239349365, "learning_rate": 3.077338218793133e-06, "loss": 0.1547, "step": 397425 }, { "epoch": 3.91, "grad_norm": 3.8324177265167236, "learning_rate": 3.077214096338884e-06, "loss": 0.0828, "step": 397450 }, { "epoch": 3.91, "grad_norm": 5.3985276222229, "learning_rate": 3.0770899738846362e-06, "loss": 0.1925, "step": 397475 }, { "epoch": 3.91, "grad_norm": 4.586624622344971, "learning_rate": 3.0769658514303874e-06, "loss": 0.0761, "step": 397500 }, { "epoch": 3.91, "grad_norm": 3.58725643157959, "learning_rate": 3.0768417289761386e-06, "loss": 0.1614, "step": 397525 }, { "epoch": 3.91, "grad_norm": 8.303885459899902, "learning_rate": 3.0767176065218907e-06, "loss": 0.0671, "step": 397550 }, { "epoch": 3.91, "grad_norm": 5.899742603302002, "learning_rate": 3.076593484067642e-06, "loss": 0.1786, "step": 397575 }, { "epoch": 3.91, "grad_norm": 9.03346061706543, "learning_rate": 3.0764693616133936e-06, "loss": 0.0619, "step": 397600 }, { "epoch": 3.91, "grad_norm": 5.6699934005737305, "learning_rate": 3.076345239159145e-06, "loss": 0.1473, "step": 397625 }, { "epoch": 3.91, "grad_norm": 6.358608245849609, "learning_rate": 3.076221116704897e-06, "loss": 0.0807, "step": 397650 }, { "epoch": 3.91, "grad_norm": 7.660702705383301, "learning_rate": 3.076096994250648e-06, "loss": 0.1991, "step": 397675 }, { "epoch": 3.91, "grad_norm": 11.86819839477539, "learning_rate": 3.0759728717964e-06, "loss": 0.0826, "step": 397700 }, { "epoch": 3.91, "grad_norm": 4.786874771118164, "learning_rate": 3.0758487493421513e-06, "loss": 0.2051, "step": 397725 }, { "epoch": 3.91, "grad_norm": 7.462764263153076, "learning_rate": 3.0757246268879025e-06, "loss": 0.0588, "step": 397750 }, { "epoch": 3.91, "grad_norm": 4.203281879425049, "learning_rate": 3.0756005044336546e-06, "loss": 0.1832, "step": 397775 }, { "epoch": 3.91, "grad_norm": 9.438854217529297, "learning_rate": 3.0754763819794058e-06, "loss": 0.0653, "step": 397800 }, { "epoch": 3.91, "grad_norm": 6.19537353515625, "learning_rate": 3.0753522595251574e-06, "loss": 0.169, "step": 397825 }, { "epoch": 3.91, "grad_norm": 6.586461544036865, "learning_rate": 3.075228137070909e-06, "loss": 0.0619, "step": 397850 }, { "epoch": 3.91, "grad_norm": 11.010497093200684, "learning_rate": 3.0751040146166607e-06, "loss": 0.1593, "step": 397875 }, { "epoch": 3.91, "grad_norm": 17.317922592163086, "learning_rate": 3.074979892162412e-06, "loss": 0.0652, "step": 397900 }, { "epoch": 3.91, "grad_norm": 6.202341556549072, "learning_rate": 3.074855769708164e-06, "loss": 0.2022, "step": 397925 }, { "epoch": 3.91, "grad_norm": 9.889803886413574, "learning_rate": 3.074731647253915e-06, "loss": 0.0782, "step": 397950 }, { "epoch": 3.91, "grad_norm": 5.153050899505615, "learning_rate": 3.0746075247996664e-06, "loss": 0.1819, "step": 397975 }, { "epoch": 3.91, "grad_norm": 11.704794883728027, "learning_rate": 3.0744834023454184e-06, "loss": 0.0737, "step": 398000 }, { "epoch": 3.91, "grad_norm": 4.916092395782471, "learning_rate": 3.0743592798911697e-06, "loss": 0.1968, "step": 398025 }, { "epoch": 3.91, "grad_norm": 9.24295425415039, "learning_rate": 3.0742351574369213e-06, "loss": 0.0884, "step": 398050 }, { "epoch": 3.91, "grad_norm": 5.470932483673096, "learning_rate": 3.0741110349826725e-06, "loss": 0.1759, "step": 398075 }, { "epoch": 3.91, "grad_norm": 11.758462905883789, "learning_rate": 3.0739869125284246e-06, "loss": 0.0908, "step": 398100 }, { "epoch": 3.91, "grad_norm": 4.004223823547363, "learning_rate": 3.0738677549723456e-06, "loss": 0.1827, "step": 398125 }, { "epoch": 3.91, "grad_norm": 11.00682544708252, "learning_rate": 3.0737436325180976e-06, "loss": 0.0639, "step": 398150 }, { "epoch": 3.91, "grad_norm": 5.252964973449707, "learning_rate": 3.073619510063849e-06, "loss": 0.176, "step": 398175 }, { "epoch": 3.92, "grad_norm": 13.133939743041992, "learning_rate": 3.0734953876096e-06, "loss": 0.102, "step": 398200 }, { "epoch": 3.92, "grad_norm": 4.189673900604248, "learning_rate": 3.073371265155352e-06, "loss": 0.2026, "step": 398225 }, { "epoch": 3.92, "grad_norm": 4.744934558868408, "learning_rate": 3.0732471427011033e-06, "loss": 0.0644, "step": 398250 }, { "epoch": 3.92, "grad_norm": 17.181177139282227, "learning_rate": 3.073123020246855e-06, "loss": 0.2162, "step": 398275 }, { "epoch": 3.92, "grad_norm": 6.688724994659424, "learning_rate": 3.0729988977926066e-06, "loss": 0.0767, "step": 398300 }, { "epoch": 3.92, "grad_norm": 4.308200836181641, "learning_rate": 3.0728747753383582e-06, "loss": 0.2001, "step": 398325 }, { "epoch": 3.92, "grad_norm": 5.689128875732422, "learning_rate": 3.0727506528841095e-06, "loss": 0.0563, "step": 398350 }, { "epoch": 3.92, "grad_norm": 8.028482437133789, "learning_rate": 3.0726265304298615e-06, "loss": 0.1705, "step": 398375 }, { "epoch": 3.92, "grad_norm": 10.406371116638184, "learning_rate": 3.0725024079756127e-06, "loss": 0.0819, "step": 398400 }, { "epoch": 3.92, "grad_norm": 3.268895387649536, "learning_rate": 3.072378285521364e-06, "loss": 0.1697, "step": 398425 }, { "epoch": 3.92, "grad_norm": 6.46976375579834, "learning_rate": 3.072254163067116e-06, "loss": 0.0807, "step": 398450 }, { "epoch": 3.92, "grad_norm": 4.401222229003906, "learning_rate": 3.072130040612867e-06, "loss": 0.1666, "step": 398475 }, { "epoch": 3.92, "grad_norm": 5.818552017211914, "learning_rate": 3.072005918158619e-06, "loss": 0.0771, "step": 398500 }, { "epoch": 3.92, "grad_norm": 6.792208194732666, "learning_rate": 3.0718817957043705e-06, "loss": 0.1745, "step": 398525 }, { "epoch": 3.92, "grad_norm": 5.920064449310303, "learning_rate": 3.071757673250122e-06, "loss": 0.0661, "step": 398550 }, { "epoch": 3.92, "grad_norm": 4.981492519378662, "learning_rate": 3.0716335507958733e-06, "loss": 0.1673, "step": 398575 }, { "epoch": 3.92, "grad_norm": 9.799210548400879, "learning_rate": 3.0715094283416245e-06, "loss": 0.085, "step": 398600 }, { "epoch": 3.92, "grad_norm": 4.097463607788086, "learning_rate": 3.0713853058873766e-06, "loss": 0.1963, "step": 398625 }, { "epoch": 3.92, "grad_norm": 8.068235397338867, "learning_rate": 3.071261183433128e-06, "loss": 0.0826, "step": 398650 }, { "epoch": 3.92, "grad_norm": 6.459956169128418, "learning_rate": 3.0711370609788794e-06, "loss": 0.1701, "step": 398675 }, { "epoch": 3.92, "grad_norm": 9.755017280578613, "learning_rate": 3.071012938524631e-06, "loss": 0.0679, "step": 398700 }, { "epoch": 3.92, "grad_norm": 5.33136510848999, "learning_rate": 3.0708888160703827e-06, "loss": 0.1721, "step": 398725 }, { "epoch": 3.92, "grad_norm": 4.628294467926025, "learning_rate": 3.070764693616134e-06, "loss": 0.0753, "step": 398750 }, { "epoch": 3.92, "grad_norm": 6.070278167724609, "learning_rate": 3.070640571161886e-06, "loss": 0.1915, "step": 398775 }, { "epoch": 3.92, "grad_norm": 13.216646194458008, "learning_rate": 3.070516448707637e-06, "loss": 0.0762, "step": 398800 }, { "epoch": 3.92, "grad_norm": 7.82775354385376, "learning_rate": 3.0703923262533884e-06, "loss": 0.2127, "step": 398825 }, { "epoch": 3.92, "grad_norm": 14.243528366088867, "learning_rate": 3.0702682037991405e-06, "loss": 0.0705, "step": 398850 }, { "epoch": 3.92, "grad_norm": 2.091398239135742, "learning_rate": 3.0701440813448917e-06, "loss": 0.1774, "step": 398875 }, { "epoch": 3.92, "grad_norm": 6.286144733428955, "learning_rate": 3.0700199588906433e-06, "loss": 0.0465, "step": 398900 }, { "epoch": 3.92, "grad_norm": 8.154318809509277, "learning_rate": 3.069895836436395e-06, "loss": 0.1922, "step": 398925 }, { "epoch": 3.92, "grad_norm": 7.086320400238037, "learning_rate": 3.0697717139821466e-06, "loss": 0.0585, "step": 398950 }, { "epoch": 3.92, "grad_norm": 6.2934250831604, "learning_rate": 3.0696475915278978e-06, "loss": 0.1493, "step": 398975 }, { "epoch": 3.92, "grad_norm": 8.132938385009766, "learning_rate": 3.06952346907365e-06, "loss": 0.0603, "step": 399000 }, { "epoch": 3.92, "grad_norm": 3.1559603214263916, "learning_rate": 3.069399346619401e-06, "loss": 0.1889, "step": 399025 }, { "epoch": 3.92, "grad_norm": 9.075135231018066, "learning_rate": 3.0692752241651523e-06, "loss": 0.0591, "step": 399050 }, { "epoch": 3.92, "grad_norm": 8.978163719177246, "learning_rate": 3.0691511017109043e-06, "loss": 0.1498, "step": 399075 }, { "epoch": 3.92, "grad_norm": 5.0914764404296875, "learning_rate": 3.0690269792566555e-06, "loss": 0.0688, "step": 399100 }, { "epoch": 3.92, "grad_norm": 3.6029560565948486, "learning_rate": 3.068902856802407e-06, "loss": 0.1767, "step": 399125 }, { "epoch": 3.92, "grad_norm": 8.613367080688477, "learning_rate": 3.068778734348159e-06, "loss": 0.0603, "step": 399150 }, { "epoch": 3.92, "grad_norm": 10.910828590393066, "learning_rate": 3.0686546118939104e-06, "loss": 0.1915, "step": 399175 }, { "epoch": 3.93, "grad_norm": 10.645758628845215, "learning_rate": 3.0685304894396617e-06, "loss": 0.0912, "step": 399200 }, { "epoch": 3.93, "grad_norm": 4.076060771942139, "learning_rate": 3.0684063669854137e-06, "loss": 0.1834, "step": 399225 }, { "epoch": 3.93, "grad_norm": 7.63289213180542, "learning_rate": 3.068282244531165e-06, "loss": 0.0702, "step": 399250 }, { "epoch": 3.93, "grad_norm": 3.7169575691223145, "learning_rate": 3.068158122076916e-06, "loss": 0.1975, "step": 399275 }, { "epoch": 3.93, "grad_norm": 11.314000129699707, "learning_rate": 3.068033999622668e-06, "loss": 0.075, "step": 399300 }, { "epoch": 3.93, "grad_norm": 5.256716251373291, "learning_rate": 3.0679098771684194e-06, "loss": 0.1658, "step": 399325 }, { "epoch": 3.93, "grad_norm": 4.922446250915527, "learning_rate": 3.067785754714171e-06, "loss": 0.061, "step": 399350 }, { "epoch": 3.93, "grad_norm": 4.281105041503906, "learning_rate": 3.0676616322599227e-06, "loss": 0.1875, "step": 399375 }, { "epoch": 3.93, "grad_norm": 7.809100151062012, "learning_rate": 3.0675375098056743e-06, "loss": 0.0774, "step": 399400 }, { "epoch": 3.93, "grad_norm": 12.683107376098633, "learning_rate": 3.0674133873514255e-06, "loss": 0.1785, "step": 399425 }, { "epoch": 3.93, "grad_norm": 7.830026149749756, "learning_rate": 3.0672892648971767e-06, "loss": 0.0705, "step": 399450 }, { "epoch": 3.93, "grad_norm": 7.488847255706787, "learning_rate": 3.067165142442929e-06, "loss": 0.1572, "step": 399475 }, { "epoch": 3.93, "grad_norm": 5.756078720092773, "learning_rate": 3.06704101998868e-06, "loss": 0.0639, "step": 399500 }, { "epoch": 3.93, "grad_norm": 7.249074459075928, "learning_rate": 3.0669168975344316e-06, "loss": 0.183, "step": 399525 }, { "epoch": 3.93, "grad_norm": 9.774046897888184, "learning_rate": 3.0667927750801833e-06, "loss": 0.0602, "step": 399550 }, { "epoch": 3.93, "grad_norm": 4.984583377838135, "learning_rate": 3.066668652625935e-06, "loss": 0.1759, "step": 399575 }, { "epoch": 3.93, "grad_norm": 6.377261638641357, "learning_rate": 3.066544530171686e-06, "loss": 0.0701, "step": 399600 }, { "epoch": 3.93, "grad_norm": 3.608868360519409, "learning_rate": 3.066420407717438e-06, "loss": 0.1797, "step": 399625 }, { "epoch": 3.93, "grad_norm": 14.042760848999023, "learning_rate": 3.0662962852631894e-06, "loss": 0.0645, "step": 399650 }, { "epoch": 3.93, "grad_norm": 4.7870073318481445, "learning_rate": 3.066172162808941e-06, "loss": 0.2073, "step": 399675 }, { "epoch": 3.93, "grad_norm": 14.418966293334961, "learning_rate": 3.0660480403546927e-06, "loss": 0.0567, "step": 399700 }, { "epoch": 3.93, "grad_norm": 3.992077112197876, "learning_rate": 3.065923917900444e-06, "loss": 0.1595, "step": 399725 }, { "epoch": 3.93, "grad_norm": 7.495055675506592, "learning_rate": 3.0657997954461955e-06, "loss": 0.0592, "step": 399750 }, { "epoch": 3.93, "grad_norm": 5.887655258178711, "learning_rate": 3.065675672991947e-06, "loss": 0.2038, "step": 399775 }, { "epoch": 3.93, "grad_norm": 25.463268280029297, "learning_rate": 3.0655515505376988e-06, "loss": 0.0777, "step": 399800 }, { "epoch": 3.93, "grad_norm": 5.467955112457275, "learning_rate": 3.06542742808345e-06, "loss": 0.1569, "step": 399825 }, { "epoch": 3.93, "grad_norm": 4.7823710441589355, "learning_rate": 3.065303305629202e-06, "loss": 0.0739, "step": 399850 }, { "epoch": 3.93, "grad_norm": 4.315609455108643, "learning_rate": 3.0651791831749533e-06, "loss": 0.1788, "step": 399875 }, { "epoch": 3.93, "grad_norm": 9.883551597595215, "learning_rate": 3.065055060720705e-06, "loss": 0.0739, "step": 399900 }, { "epoch": 3.93, "grad_norm": 7.810389995574951, "learning_rate": 3.0649309382664565e-06, "loss": 0.1826, "step": 399925 }, { "epoch": 3.93, "grad_norm": 8.305685997009277, "learning_rate": 3.064806815812208e-06, "loss": 0.0765, "step": 399950 }, { "epoch": 3.93, "grad_norm": 4.380582332611084, "learning_rate": 3.0646826933579594e-06, "loss": 0.1868, "step": 399975 }, { "epoch": 3.93, "grad_norm": 6.1352057456970215, "learning_rate": 3.0645585709037114e-06, "loss": 0.0664, "step": 400000 }, { "epoch": 3.93, "eval_loss": 0.6367394924163818, "eval_runtime": 6103.2135, "eval_samples_per_second": 1.551, "eval_steps_per_second": 0.194, "eval_wer": 0.1160754522629241, "step": 400000 }, { "epoch": 3.93, "grad_norm": 6.635791778564453, "learning_rate": 3.0644344484494626e-06, "loss": 0.1814, "step": 400025 }, { "epoch": 3.93, "grad_norm": 6.310611724853516, "learning_rate": 3.064310325995214e-06, "loss": 0.0702, "step": 400050 }, { "epoch": 3.93, "grad_norm": 5.432407379150391, "learning_rate": 3.064186203540966e-06, "loss": 0.1972, "step": 400075 }, { "epoch": 3.93, "grad_norm": 4.301867485046387, "learning_rate": 3.064062081086717e-06, "loss": 0.0975, "step": 400100 }, { "epoch": 3.93, "grad_norm": 4.866554260253906, "learning_rate": 3.0639379586324688e-06, "loss": 0.1568, "step": 400125 }, { "epoch": 3.93, "grad_norm": 6.753674507141113, "learning_rate": 3.0638138361782204e-06, "loss": 0.0686, "step": 400150 }, { "epoch": 3.93, "grad_norm": 3.466841697692871, "learning_rate": 3.063689713723972e-06, "loss": 0.2116, "step": 400175 }, { "epoch": 3.93, "grad_norm": 11.183147430419922, "learning_rate": 3.0635655912697232e-06, "loss": 0.0689, "step": 400200 }, { "epoch": 3.94, "grad_norm": 5.903531074523926, "learning_rate": 3.0634414688154753e-06, "loss": 0.1823, "step": 400225 }, { "epoch": 3.94, "grad_norm": 6.4138383865356445, "learning_rate": 3.0633173463612265e-06, "loss": 0.0739, "step": 400250 }, { "epoch": 3.94, "grad_norm": 4.432861328125, "learning_rate": 3.0631932239069777e-06, "loss": 0.1648, "step": 400275 }, { "epoch": 3.94, "grad_norm": 11.481328964233398, "learning_rate": 3.0630691014527298e-06, "loss": 0.0732, "step": 400300 }, { "epoch": 3.94, "grad_norm": 4.373838424682617, "learning_rate": 3.062944978998481e-06, "loss": 0.163, "step": 400325 }, { "epoch": 3.94, "grad_norm": 17.167043685913086, "learning_rate": 3.0628208565442326e-06, "loss": 0.0649, "step": 400350 }, { "epoch": 3.94, "grad_norm": 4.817312240600586, "learning_rate": 3.062701698988154e-06, "loss": 0.2039, "step": 400375 }, { "epoch": 3.94, "grad_norm": 9.161885261535645, "learning_rate": 3.0625775765339057e-06, "loss": 0.0621, "step": 400400 }, { "epoch": 3.94, "grad_norm": 6.31585168838501, "learning_rate": 3.062453454079657e-06, "loss": 0.1811, "step": 400425 }, { "epoch": 3.94, "grad_norm": 10.019025802612305, "learning_rate": 3.062329331625409e-06, "loss": 0.0676, "step": 400450 }, { "epoch": 3.94, "grad_norm": 4.083001136779785, "learning_rate": 3.06220520917116e-06, "loss": 0.173, "step": 400475 }, { "epoch": 3.94, "grad_norm": 7.293359756469727, "learning_rate": 3.0620810867169114e-06, "loss": 0.0599, "step": 400500 }, { "epoch": 3.94, "grad_norm": 5.332977294921875, "learning_rate": 3.0619569642626635e-06, "loss": 0.1974, "step": 400525 }, { "epoch": 3.94, "grad_norm": 10.871908187866211, "learning_rate": 3.0618328418084147e-06, "loss": 0.0785, "step": 400550 }, { "epoch": 3.94, "grad_norm": 6.599825382232666, "learning_rate": 3.0617087193541663e-06, "loss": 0.1938, "step": 400575 }, { "epoch": 3.94, "grad_norm": 8.902300834655762, "learning_rate": 3.061584596899918e-06, "loss": 0.0721, "step": 400600 }, { "epoch": 3.94, "grad_norm": 5.079383850097656, "learning_rate": 3.0614604744456696e-06, "loss": 0.2063, "step": 400625 }, { "epoch": 3.94, "grad_norm": 13.35377025604248, "learning_rate": 3.061336351991421e-06, "loss": 0.0829, "step": 400650 }, { "epoch": 3.94, "grad_norm": 3.9162826538085938, "learning_rate": 3.061212229537173e-06, "loss": 0.1653, "step": 400675 }, { "epoch": 3.94, "grad_norm": 5.122238636016846, "learning_rate": 3.061088107082924e-06, "loss": 0.0739, "step": 400700 }, { "epoch": 3.94, "grad_norm": 5.26242733001709, "learning_rate": 3.0609639846286753e-06, "loss": 0.1775, "step": 400725 }, { "epoch": 3.94, "grad_norm": 8.722290992736816, "learning_rate": 3.0608398621744273e-06, "loss": 0.0555, "step": 400750 }, { "epoch": 3.94, "grad_norm": 2.8084115982055664, "learning_rate": 3.0607157397201785e-06, "loss": 0.182, "step": 400775 }, { "epoch": 3.94, "grad_norm": 7.668212413787842, "learning_rate": 3.06059161726593e-06, "loss": 0.0594, "step": 400800 }, { "epoch": 3.94, "grad_norm": 6.279974937438965, "learning_rate": 3.060467494811682e-06, "loss": 0.2034, "step": 400825 }, { "epoch": 3.94, "grad_norm": 4.662548542022705, "learning_rate": 3.0603433723574334e-06, "loss": 0.0783, "step": 400850 }, { "epoch": 3.94, "grad_norm": 6.200741291046143, "learning_rate": 3.0602192499031847e-06, "loss": 0.1867, "step": 400875 }, { "epoch": 3.94, "grad_norm": 7.851552486419678, "learning_rate": 3.060095127448936e-06, "loss": 0.0479, "step": 400900 }, { "epoch": 3.94, "grad_norm": 4.316497802734375, "learning_rate": 3.059971004994688e-06, "loss": 0.2003, "step": 400925 }, { "epoch": 3.94, "grad_norm": 13.03852653503418, "learning_rate": 3.059846882540439e-06, "loss": 0.1013, "step": 400950 }, { "epoch": 3.94, "grad_norm": 4.385216236114502, "learning_rate": 3.0597227600861908e-06, "loss": 0.1722, "step": 400975 }, { "epoch": 3.94, "grad_norm": 14.728869438171387, "learning_rate": 3.0595986376319424e-06, "loss": 0.082, "step": 401000 }, { "epoch": 3.94, "grad_norm": 6.654936790466309, "learning_rate": 3.059474515177694e-06, "loss": 0.1722, "step": 401025 }, { "epoch": 3.94, "grad_norm": 10.847572326660156, "learning_rate": 3.0593503927234453e-06, "loss": 0.0831, "step": 401050 }, { "epoch": 3.94, "grad_norm": 4.471545696258545, "learning_rate": 3.0592262702691973e-06, "loss": 0.1767, "step": 401075 }, { "epoch": 3.94, "grad_norm": 3.386657953262329, "learning_rate": 3.0591021478149485e-06, "loss": 0.0537, "step": 401100 }, { "epoch": 3.94, "grad_norm": 3.6852896213531494, "learning_rate": 3.0589780253606997e-06, "loss": 0.1615, "step": 401125 }, { "epoch": 3.94, "grad_norm": 9.261883735656738, "learning_rate": 3.058853902906452e-06, "loss": 0.095, "step": 401150 }, { "epoch": 3.94, "grad_norm": 3.9616551399230957, "learning_rate": 3.058729780452203e-06, "loss": 0.1744, "step": 401175 }, { "epoch": 3.94, "grad_norm": 0.5150530338287354, "learning_rate": 3.0586056579979546e-06, "loss": 0.0795, "step": 401200 }, { "epoch": 3.94, "grad_norm": 3.579968214035034, "learning_rate": 3.0584815355437063e-06, "loss": 0.1862, "step": 401225 }, { "epoch": 3.95, "grad_norm": 10.249654769897461, "learning_rate": 3.058357413089458e-06, "loss": 0.0777, "step": 401250 }, { "epoch": 3.95, "grad_norm": 3.9285049438476562, "learning_rate": 3.058233290635209e-06, "loss": 0.1731, "step": 401275 }, { "epoch": 3.95, "grad_norm": 9.747489929199219, "learning_rate": 3.058109168180961e-06, "loss": 0.052, "step": 401300 }, { "epoch": 3.95, "grad_norm": 4.601944446563721, "learning_rate": 3.0579850457267124e-06, "loss": 0.2092, "step": 401325 }, { "epoch": 3.95, "grad_norm": 5.446873664855957, "learning_rate": 3.0578609232724636e-06, "loss": 0.0603, "step": 401350 }, { "epoch": 3.95, "grad_norm": 7.189175128936768, "learning_rate": 3.0577368008182157e-06, "loss": 0.1876, "step": 401375 }, { "epoch": 3.95, "grad_norm": 17.64849853515625, "learning_rate": 3.057612678363967e-06, "loss": 0.1105, "step": 401400 }, { "epoch": 3.95, "grad_norm": 4.286383628845215, "learning_rate": 3.0574885559097185e-06, "loss": 0.2088, "step": 401425 }, { "epoch": 3.95, "grad_norm": 8.35367202758789, "learning_rate": 3.05736443345547e-06, "loss": 0.0756, "step": 401450 }, { "epoch": 3.95, "grad_norm": 3.8771302700042725, "learning_rate": 3.0572403110012218e-06, "loss": 0.1538, "step": 401475 }, { "epoch": 3.95, "grad_norm": 9.98357105255127, "learning_rate": 3.057116188546973e-06, "loss": 0.0613, "step": 401500 }, { "epoch": 3.95, "grad_norm": 6.091207981109619, "learning_rate": 3.056992066092725e-06, "loss": 0.1713, "step": 401525 }, { "epoch": 3.95, "grad_norm": 4.878608703613281, "learning_rate": 3.0568679436384763e-06, "loss": 0.0818, "step": 401550 }, { "epoch": 3.95, "grad_norm": 6.252301216125488, "learning_rate": 3.0567438211842275e-06, "loss": 0.2057, "step": 401575 }, { "epoch": 3.95, "grad_norm": 10.114486694335938, "learning_rate": 3.0566196987299795e-06, "loss": 0.06, "step": 401600 }, { "epoch": 3.95, "grad_norm": 14.016427040100098, "learning_rate": 3.0564955762757307e-06, "loss": 0.1971, "step": 401625 }, { "epoch": 3.95, "grad_norm": 15.156930923461914, "learning_rate": 3.0563714538214824e-06, "loss": 0.0771, "step": 401650 }, { "epoch": 3.95, "grad_norm": 5.9617018699646, "learning_rate": 3.056247331367234e-06, "loss": 0.1553, "step": 401675 }, { "epoch": 3.95, "grad_norm": 12.531167984008789, "learning_rate": 3.0561232089129856e-06, "loss": 0.0864, "step": 401700 }, { "epoch": 3.95, "grad_norm": 4.231363296508789, "learning_rate": 3.055999086458737e-06, "loss": 0.223, "step": 401725 }, { "epoch": 3.95, "grad_norm": 13.096756935119629, "learning_rate": 3.055874964004488e-06, "loss": 0.0644, "step": 401750 }, { "epoch": 3.95, "grad_norm": 5.335269927978516, "learning_rate": 3.05575084155024e-06, "loss": 0.1931, "step": 401775 }, { "epoch": 3.95, "grad_norm": 9.278995513916016, "learning_rate": 3.0556267190959913e-06, "loss": 0.0752, "step": 401800 }, { "epoch": 3.95, "grad_norm": 7.164768218994141, "learning_rate": 3.055502596641743e-06, "loss": 0.1927, "step": 401825 }, { "epoch": 3.95, "grad_norm": 9.67735767364502, "learning_rate": 3.0553784741874946e-06, "loss": 0.0725, "step": 401850 }, { "epoch": 3.95, "grad_norm": 6.400874137878418, "learning_rate": 3.0552543517332462e-06, "loss": 0.1632, "step": 401875 }, { "epoch": 3.95, "grad_norm": 6.811487674713135, "learning_rate": 3.0551302292789975e-06, "loss": 0.0849, "step": 401900 }, { "epoch": 3.95, "grad_norm": 4.181429386138916, "learning_rate": 3.0550061068247495e-06, "loss": 0.2023, "step": 401925 }, { "epoch": 3.95, "grad_norm": 6.713500022888184, "learning_rate": 3.0548819843705007e-06, "loss": 0.0775, "step": 401950 }, { "epoch": 3.95, "grad_norm": 6.295383930206299, "learning_rate": 3.054757861916252e-06, "loss": 0.1833, "step": 401975 }, { "epoch": 3.95, "grad_norm": 10.533601760864258, "learning_rate": 3.054633739462004e-06, "loss": 0.0652, "step": 402000 }, { "epoch": 3.95, "grad_norm": 5.107059955596924, "learning_rate": 3.054509617007755e-06, "loss": 0.156, "step": 402025 }, { "epoch": 3.95, "grad_norm": 8.900022506713867, "learning_rate": 3.054385494553507e-06, "loss": 0.0837, "step": 402050 }, { "epoch": 3.95, "grad_norm": 5.892040252685547, "learning_rate": 3.0542613720992585e-06, "loss": 0.1861, "step": 402075 }, { "epoch": 3.95, "grad_norm": 8.370549201965332, "learning_rate": 3.05413724964501e-06, "loss": 0.0902, "step": 402100 }, { "epoch": 3.95, "grad_norm": 6.757467746734619, "learning_rate": 3.0540131271907613e-06, "loss": 0.1612, "step": 402125 }, { "epoch": 3.95, "grad_norm": 7.598698616027832, "learning_rate": 3.0538890047365134e-06, "loss": 0.0596, "step": 402150 }, { "epoch": 3.95, "grad_norm": 5.319530010223389, "learning_rate": 3.0537648822822646e-06, "loss": 0.1608, "step": 402175 }, { "epoch": 3.95, "grad_norm": 6.206918239593506, "learning_rate": 3.053640759828016e-06, "loss": 0.0769, "step": 402200 }, { "epoch": 3.95, "grad_norm": 5.495988368988037, "learning_rate": 3.053516637373768e-06, "loss": 0.1914, "step": 402225 }, { "epoch": 3.95, "grad_norm": 8.012052536010742, "learning_rate": 3.053392514919519e-06, "loss": 0.07, "step": 402250 }, { "epoch": 3.96, "grad_norm": 6.4330267906188965, "learning_rate": 3.0532683924652707e-06, "loss": 0.208, "step": 402275 }, { "epoch": 3.96, "grad_norm": 11.975768089294434, "learning_rate": 3.0531442700110223e-06, "loss": 0.0532, "step": 402300 }, { "epoch": 3.96, "grad_norm": 7.048717975616455, "learning_rate": 3.053020147556774e-06, "loss": 0.1632, "step": 402325 }, { "epoch": 3.96, "grad_norm": 13.06116771697998, "learning_rate": 3.052896025102525e-06, "loss": 0.0679, "step": 402350 }, { "epoch": 3.96, "grad_norm": 5.742613792419434, "learning_rate": 3.0527719026482773e-06, "loss": 0.2154, "step": 402375 }, { "epoch": 3.96, "grad_norm": 8.609309196472168, "learning_rate": 3.0526477801940285e-06, "loss": 0.0884, "step": 402400 }, { "epoch": 3.96, "grad_norm": 6.094275951385498, "learning_rate": 3.0525236577397797e-06, "loss": 0.1841, "step": 402425 }, { "epoch": 3.96, "grad_norm": 7.677504062652588, "learning_rate": 3.0523995352855317e-06, "loss": 0.0755, "step": 402450 }, { "epoch": 3.96, "grad_norm": 4.405211448669434, "learning_rate": 3.052275412831283e-06, "loss": 0.1883, "step": 402475 }, { "epoch": 3.96, "grad_norm": 4.97147274017334, "learning_rate": 3.0521512903770346e-06, "loss": 0.0838, "step": 402500 }, { "epoch": 3.96, "grad_norm": 3.5421626567840576, "learning_rate": 3.0520271679227862e-06, "loss": 0.1779, "step": 402525 }, { "epoch": 3.96, "grad_norm": 14.672065734863281, "learning_rate": 3.051903045468538e-06, "loss": 0.0666, "step": 402550 }, { "epoch": 3.96, "grad_norm": 4.88598108291626, "learning_rate": 3.051778923014289e-06, "loss": 0.1794, "step": 402575 }, { "epoch": 3.96, "grad_norm": 23.624197006225586, "learning_rate": 3.0516548005600407e-06, "loss": 0.0664, "step": 402600 }, { "epoch": 3.96, "grad_norm": 5.132523536682129, "learning_rate": 3.0515306781057923e-06, "loss": 0.2013, "step": 402625 }, { "epoch": 3.96, "grad_norm": 10.36640739440918, "learning_rate": 3.051406555651544e-06, "loss": 0.0587, "step": 402650 }, { "epoch": 3.96, "grad_norm": 4.244257926940918, "learning_rate": 3.051282433197295e-06, "loss": 0.2356, "step": 402675 }, { "epoch": 3.96, "grad_norm": 4.576515197753906, "learning_rate": 3.0511583107430472e-06, "loss": 0.0686, "step": 402700 }, { "epoch": 3.96, "grad_norm": 4.10715389251709, "learning_rate": 3.0510341882887984e-06, "loss": 0.1682, "step": 402725 }, { "epoch": 3.96, "grad_norm": 9.106789588928223, "learning_rate": 3.0509100658345497e-06, "loss": 0.0776, "step": 402750 }, { "epoch": 3.96, "grad_norm": 6.26800012588501, "learning_rate": 3.0507909082784715e-06, "loss": 0.189, "step": 402775 }, { "epoch": 3.96, "grad_norm": 8.771150588989258, "learning_rate": 3.0506667858242227e-06, "loss": 0.087, "step": 402800 }, { "epoch": 3.96, "grad_norm": 4.050158977508545, "learning_rate": 3.050542663369975e-06, "loss": 0.1461, "step": 402825 }, { "epoch": 3.96, "grad_norm": 3.153073310852051, "learning_rate": 3.050418540915726e-06, "loss": 0.0548, "step": 402850 }, { "epoch": 3.96, "grad_norm": 3.9375877380371094, "learning_rate": 3.0502944184614776e-06, "loss": 0.1785, "step": 402875 }, { "epoch": 3.96, "grad_norm": 12.364887237548828, "learning_rate": 3.0501702960072293e-06, "loss": 0.0809, "step": 402900 }, { "epoch": 3.96, "grad_norm": 5.327844142913818, "learning_rate": 3.050046173552981e-06, "loss": 0.1966, "step": 402925 }, { "epoch": 3.96, "grad_norm": 6.053534984588623, "learning_rate": 3.049922051098732e-06, "loss": 0.0755, "step": 402950 }, { "epoch": 3.96, "grad_norm": 5.044287204742432, "learning_rate": 3.049797928644484e-06, "loss": 0.1949, "step": 402975 }, { "epoch": 3.96, "grad_norm": 8.290472030639648, "learning_rate": 3.0496738061902354e-06, "loss": 0.0649, "step": 403000 }, { "epoch": 3.96, "grad_norm": 4.777218818664551, "learning_rate": 3.0495496837359866e-06, "loss": 0.1832, "step": 403025 }, { "epoch": 3.96, "grad_norm": 6.298468112945557, "learning_rate": 3.0494255612817387e-06, "loss": 0.0747, "step": 403050 }, { "epoch": 3.96, "grad_norm": 3.0516698360443115, "learning_rate": 3.04930143882749e-06, "loss": 0.1916, "step": 403075 }, { "epoch": 3.96, "grad_norm": 14.533482551574707, "learning_rate": 3.0491773163732415e-06, "loss": 0.0739, "step": 403100 }, { "epoch": 3.96, "grad_norm": 3.5237269401550293, "learning_rate": 3.049053193918993e-06, "loss": 0.2039, "step": 403125 }, { "epoch": 3.96, "grad_norm": 5.548145771026611, "learning_rate": 3.0489290714647448e-06, "loss": 0.0776, "step": 403150 }, { "epoch": 3.96, "grad_norm": 4.418071269989014, "learning_rate": 3.048804949010496e-06, "loss": 0.1921, "step": 403175 }, { "epoch": 3.96, "grad_norm": 6.121677875518799, "learning_rate": 3.048680826556247e-06, "loss": 0.0752, "step": 403200 }, { "epoch": 3.96, "grad_norm": 5.091963768005371, "learning_rate": 3.0485567041019993e-06, "loss": 0.1762, "step": 403225 }, { "epoch": 3.96, "grad_norm": 3.1904821395874023, "learning_rate": 3.0484325816477505e-06, "loss": 0.0601, "step": 403250 }, { "epoch": 3.97, "grad_norm": 5.209019660949707, "learning_rate": 3.048308459193502e-06, "loss": 0.1715, "step": 403275 }, { "epoch": 3.97, "grad_norm": 9.461724281311035, "learning_rate": 3.0481843367392537e-06, "loss": 0.0689, "step": 403300 }, { "epoch": 3.97, "grad_norm": 3.5806047916412354, "learning_rate": 3.0480602142850054e-06, "loss": 0.153, "step": 403325 }, { "epoch": 3.97, "grad_norm": 18.48643684387207, "learning_rate": 3.0479360918307566e-06, "loss": 0.07, "step": 403350 }, { "epoch": 3.97, "grad_norm": 5.634657382965088, "learning_rate": 3.0478119693765087e-06, "loss": 0.1998, "step": 403375 }, { "epoch": 3.97, "grad_norm": 9.927624702453613, "learning_rate": 3.04768784692226e-06, "loss": 0.061, "step": 403400 }, { "epoch": 3.97, "grad_norm": 7.123123645782471, "learning_rate": 3.047563724468011e-06, "loss": 0.1621, "step": 403425 }, { "epoch": 3.97, "grad_norm": 4.143581867218018, "learning_rate": 3.047439602013763e-06, "loss": 0.0605, "step": 403450 }, { "epoch": 3.97, "grad_norm": 6.004348278045654, "learning_rate": 3.0473154795595143e-06, "loss": 0.1844, "step": 403475 }, { "epoch": 3.97, "grad_norm": 8.383162498474121, "learning_rate": 3.047191357105266e-06, "loss": 0.0697, "step": 403500 }, { "epoch": 3.97, "grad_norm": 19.99358367919922, "learning_rate": 3.0470672346510176e-06, "loss": 0.1728, "step": 403525 }, { "epoch": 3.97, "grad_norm": 13.918105125427246, "learning_rate": 3.0469431121967693e-06, "loss": 0.0826, "step": 403550 }, { "epoch": 3.97, "grad_norm": 5.6434712409973145, "learning_rate": 3.0468189897425205e-06, "loss": 0.1488, "step": 403575 }, { "epoch": 3.97, "grad_norm": 11.724132537841797, "learning_rate": 3.0466948672882725e-06, "loss": 0.0666, "step": 403600 }, { "epoch": 3.97, "grad_norm": 6.247593402862549, "learning_rate": 3.0465707448340237e-06, "loss": 0.1818, "step": 403625 }, { "epoch": 3.97, "grad_norm": 4.881746768951416, "learning_rate": 3.046446622379775e-06, "loss": 0.0975, "step": 403650 }, { "epoch": 3.97, "grad_norm": 3.8000893592834473, "learning_rate": 3.046322499925527e-06, "loss": 0.2059, "step": 403675 }, { "epoch": 3.97, "grad_norm": 5.789217472076416, "learning_rate": 3.0461983774712782e-06, "loss": 0.0892, "step": 403700 }, { "epoch": 3.97, "grad_norm": 4.323719501495361, "learning_rate": 3.04607425501703e-06, "loss": 0.2397, "step": 403725 }, { "epoch": 3.97, "grad_norm": 6.765284538269043, "learning_rate": 3.0459501325627815e-06, "loss": 0.0898, "step": 403750 }, { "epoch": 3.97, "grad_norm": 4.661753177642822, "learning_rate": 3.045826010108533e-06, "loss": 0.1837, "step": 403775 }, { "epoch": 3.97, "grad_norm": 10.461748123168945, "learning_rate": 3.0457018876542843e-06, "loss": 0.0849, "step": 403800 }, { "epoch": 3.97, "grad_norm": 5.805319786071777, "learning_rate": 3.0455777652000364e-06, "loss": 0.1765, "step": 403825 }, { "epoch": 3.97, "grad_norm": 5.078740119934082, "learning_rate": 3.0454536427457876e-06, "loss": 0.054, "step": 403850 }, { "epoch": 3.97, "grad_norm": 5.383565425872803, "learning_rate": 3.045329520291539e-06, "loss": 0.1688, "step": 403875 }, { "epoch": 3.97, "grad_norm": 9.166556358337402, "learning_rate": 3.045205397837291e-06, "loss": 0.0807, "step": 403900 }, { "epoch": 3.97, "grad_norm": 4.1161370277404785, "learning_rate": 3.045081275383042e-06, "loss": 0.1773, "step": 403925 }, { "epoch": 3.97, "grad_norm": 5.52871036529541, "learning_rate": 3.0449571529287937e-06, "loss": 0.1049, "step": 403950 }, { "epoch": 3.97, "grad_norm": 5.903719902038574, "learning_rate": 3.0448330304745454e-06, "loss": 0.1733, "step": 403975 }, { "epoch": 3.97, "grad_norm": 18.62611961364746, "learning_rate": 3.044708908020297e-06, "loss": 0.0455, "step": 404000 }, { "epoch": 3.97, "grad_norm": 4.517467498779297, "learning_rate": 3.044584785566048e-06, "loss": 0.1669, "step": 404025 }, { "epoch": 3.97, "grad_norm": 5.816644191741943, "learning_rate": 3.0444606631117994e-06, "loss": 0.0686, "step": 404050 }, { "epoch": 3.97, "grad_norm": 4.670510292053223, "learning_rate": 3.0443365406575515e-06, "loss": 0.2071, "step": 404075 }, { "epoch": 3.97, "grad_norm": 5.807093143463135, "learning_rate": 3.0442124182033027e-06, "loss": 0.0822, "step": 404100 }, { "epoch": 3.97, "grad_norm": 3.64601731300354, "learning_rate": 3.0440882957490543e-06, "loss": 0.1933, "step": 404125 }, { "epoch": 3.97, "grad_norm": 10.737324714660645, "learning_rate": 3.043964173294806e-06, "loss": 0.0711, "step": 404150 }, { "epoch": 3.97, "grad_norm": 6.199014663696289, "learning_rate": 3.0438400508405576e-06, "loss": 0.2085, "step": 404175 }, { "epoch": 3.97, "grad_norm": 2.8380696773529053, "learning_rate": 3.043715928386309e-06, "loss": 0.0764, "step": 404200 }, { "epoch": 3.97, "grad_norm": 5.898951530456543, "learning_rate": 3.043591805932061e-06, "loss": 0.1315, "step": 404225 }, { "epoch": 3.97, "grad_norm": 4.922440528869629, "learning_rate": 3.043467683477812e-06, "loss": 0.0782, "step": 404250 }, { "epoch": 3.97, "grad_norm": 6.493924617767334, "learning_rate": 3.0433435610235633e-06, "loss": 0.1704, "step": 404275 }, { "epoch": 3.98, "grad_norm": 9.037717819213867, "learning_rate": 3.0432194385693153e-06, "loss": 0.0717, "step": 404300 }, { "epoch": 3.98, "grad_norm": 5.872896194458008, "learning_rate": 3.0430953161150665e-06, "loss": 0.1621, "step": 404325 }, { "epoch": 3.98, "grad_norm": 8.745183944702148, "learning_rate": 3.042971193660818e-06, "loss": 0.0882, "step": 404350 }, { "epoch": 3.98, "grad_norm": 4.796828269958496, "learning_rate": 3.04284707120657e-06, "loss": 0.1985, "step": 404375 }, { "epoch": 3.98, "grad_norm": 10.170427322387695, "learning_rate": 3.0427229487523215e-06, "loss": 0.0742, "step": 404400 }, { "epoch": 3.98, "grad_norm": 7.535851955413818, "learning_rate": 3.0425988262980727e-06, "loss": 0.202, "step": 404425 }, { "epoch": 3.98, "grad_norm": 10.31161880493164, "learning_rate": 3.0424747038438247e-06, "loss": 0.0634, "step": 404450 }, { "epoch": 3.98, "grad_norm": 3.2111310958862305, "learning_rate": 3.042350581389576e-06, "loss": 0.1624, "step": 404475 }, { "epoch": 3.98, "grad_norm": 6.783591270446777, "learning_rate": 3.042226458935327e-06, "loss": 0.07, "step": 404500 }, { "epoch": 3.98, "grad_norm": 6.849042892456055, "learning_rate": 3.042102336481079e-06, "loss": 0.167, "step": 404525 }, { "epoch": 3.98, "grad_norm": 2.124903440475464, "learning_rate": 3.0419782140268304e-06, "loss": 0.0488, "step": 404550 }, { "epoch": 3.98, "grad_norm": 3.934069871902466, "learning_rate": 3.041854091572582e-06, "loss": 0.1944, "step": 404575 }, { "epoch": 3.98, "grad_norm": 11.932424545288086, "learning_rate": 3.0417299691183337e-06, "loss": 0.0763, "step": 404600 }, { "epoch": 3.98, "grad_norm": 2.949303388595581, "learning_rate": 3.0416058466640853e-06, "loss": 0.1824, "step": 404625 }, { "epoch": 3.98, "grad_norm": 3.9796762466430664, "learning_rate": 3.0414817242098365e-06, "loss": 0.0672, "step": 404650 }, { "epoch": 3.98, "grad_norm": 7.339446067810059, "learning_rate": 3.0413576017555886e-06, "loss": 0.1978, "step": 404675 }, { "epoch": 3.98, "grad_norm": 6.278001308441162, "learning_rate": 3.04123347930134e-06, "loss": 0.0697, "step": 404700 }, { "epoch": 3.98, "grad_norm": 7.523995876312256, "learning_rate": 3.041109356847091e-06, "loss": 0.1887, "step": 404725 }, { "epoch": 3.98, "grad_norm": 10.283955574035645, "learning_rate": 3.040985234392843e-06, "loss": 0.0991, "step": 404750 }, { "epoch": 3.98, "grad_norm": 6.121203422546387, "learning_rate": 3.0408611119385943e-06, "loss": 0.2002, "step": 404775 }, { "epoch": 3.98, "grad_norm": 7.9586501121521, "learning_rate": 3.040736989484346e-06, "loss": 0.0744, "step": 404800 }, { "epoch": 3.98, "grad_norm": 5.002898693084717, "learning_rate": 3.0406128670300976e-06, "loss": 0.172, "step": 404825 }, { "epoch": 3.98, "grad_norm": 6.0147929191589355, "learning_rate": 3.040488744575849e-06, "loss": 0.0754, "step": 404850 }, { "epoch": 3.98, "grad_norm": 4.181313991546631, "learning_rate": 3.0403646221216004e-06, "loss": 0.2023, "step": 404875 }, { "epoch": 3.98, "grad_norm": 0.248905211687088, "learning_rate": 3.0402404996673516e-06, "loss": 0.0733, "step": 404900 }, { "epoch": 3.98, "grad_norm": 5.219127655029297, "learning_rate": 3.0401163772131037e-06, "loss": 0.1466, "step": 404925 }, { "epoch": 3.98, "grad_norm": 7.699726581573486, "learning_rate": 3.039992254758855e-06, "loss": 0.1015, "step": 404950 }, { "epoch": 3.98, "grad_norm": 3.879802703857422, "learning_rate": 3.0398681323046065e-06, "loss": 0.1689, "step": 404975 }, { "epoch": 3.98, "grad_norm": 11.959163665771484, "learning_rate": 3.039744009850358e-06, "loss": 0.0764, "step": 405000 }, { "epoch": 3.98, "grad_norm": 6.071841239929199, "learning_rate": 3.0396198873961098e-06, "loss": 0.134, "step": 405025 }, { "epoch": 3.98, "grad_norm": 5.168472766876221, "learning_rate": 3.039495764941861e-06, "loss": 0.065, "step": 405050 }, { "epoch": 3.98, "grad_norm": 4.937386512756348, "learning_rate": 3.039371642487613e-06, "loss": 0.1953, "step": 405075 }, { "epoch": 3.98, "grad_norm": 3.272421360015869, "learning_rate": 3.0392475200333643e-06, "loss": 0.0537, "step": 405100 }, { "epoch": 3.98, "grad_norm": 5.919389247894287, "learning_rate": 3.0391233975791155e-06, "loss": 0.1578, "step": 405125 }, { "epoch": 3.98, "grad_norm": 1.531130075454712, "learning_rate": 3.0389992751248675e-06, "loss": 0.0644, "step": 405150 }, { "epoch": 3.98, "grad_norm": 4.536112308502197, "learning_rate": 3.0388751526706187e-06, "loss": 0.2005, "step": 405175 }, { "epoch": 3.98, "grad_norm": 6.100564002990723, "learning_rate": 3.0387510302163704e-06, "loss": 0.0965, "step": 405200 }, { "epoch": 3.98, "grad_norm": 3.861431837081909, "learning_rate": 3.038626907762122e-06, "loss": 0.2196, "step": 405225 }, { "epoch": 3.98, "grad_norm": 9.875743865966797, "learning_rate": 3.0385027853078737e-06, "loss": 0.0604, "step": 405250 }, { "epoch": 3.98, "grad_norm": 7.556149005889893, "learning_rate": 3.038378662853625e-06, "loss": 0.2114, "step": 405275 }, { "epoch": 3.98, "grad_norm": 4.476739406585693, "learning_rate": 3.038254540399377e-06, "loss": 0.0731, "step": 405300 }, { "epoch": 3.99, "grad_norm": 5.659483432769775, "learning_rate": 3.038130417945128e-06, "loss": 0.1913, "step": 405325 }, { "epoch": 3.99, "grad_norm": 2.438382625579834, "learning_rate": 3.0380062954908793e-06, "loss": 0.078, "step": 405350 }, { "epoch": 3.99, "grad_norm": 4.5868754386901855, "learning_rate": 3.0378821730366314e-06, "loss": 0.2068, "step": 405375 }, { "epoch": 3.99, "grad_norm": 7.472979545593262, "learning_rate": 3.0377580505823826e-06, "loss": 0.0762, "step": 405400 }, { "epoch": 3.99, "grad_norm": 4.872584819793701, "learning_rate": 3.0376339281281343e-06, "loss": 0.1679, "step": 405425 }, { "epoch": 3.99, "grad_norm": 10.01265811920166, "learning_rate": 3.037509805673886e-06, "loss": 0.0921, "step": 405450 }, { "epoch": 3.99, "grad_norm": 12.744234085083008, "learning_rate": 3.0373856832196375e-06, "loss": 0.2084, "step": 405475 }, { "epoch": 3.99, "grad_norm": 4.185946941375732, "learning_rate": 3.0372615607653887e-06, "loss": 0.0683, "step": 405500 }, { "epoch": 3.99, "grad_norm": 9.560279846191406, "learning_rate": 3.037137438311141e-06, "loss": 0.1935, "step": 405525 }, { "epoch": 3.99, "grad_norm": 3.752656936645508, "learning_rate": 3.037013315856892e-06, "loss": 0.0705, "step": 405550 }, { "epoch": 3.99, "grad_norm": 6.19281005859375, "learning_rate": 3.0368891934026436e-06, "loss": 0.1507, "step": 405575 }, { "epoch": 3.99, "grad_norm": 8.51445484161377, "learning_rate": 3.0367650709483953e-06, "loss": 0.0761, "step": 405600 }, { "epoch": 3.99, "grad_norm": 3.78305721282959, "learning_rate": 3.036640948494147e-06, "loss": 0.1824, "step": 405625 }, { "epoch": 3.99, "grad_norm": 5.797223091125488, "learning_rate": 3.036516826039898e-06, "loss": 0.0731, "step": 405650 }, { "epoch": 3.99, "grad_norm": 4.484222412109375, "learning_rate": 3.03639270358565e-06, "loss": 0.1848, "step": 405675 }, { "epoch": 3.99, "grad_norm": 8.637869834899902, "learning_rate": 3.0362685811314014e-06, "loss": 0.1083, "step": 405700 }, { "epoch": 3.99, "grad_norm": 3.8344767093658447, "learning_rate": 3.0361444586771526e-06, "loss": 0.1726, "step": 405725 }, { "epoch": 3.99, "grad_norm": 8.388551712036133, "learning_rate": 3.0360203362229042e-06, "loss": 0.0802, "step": 405750 }, { "epoch": 3.99, "grad_norm": 3.2294375896453857, "learning_rate": 3.035896213768656e-06, "loss": 0.1587, "step": 405775 }, { "epoch": 3.99, "grad_norm": 8.308241844177246, "learning_rate": 3.0357720913144075e-06, "loss": 0.0657, "step": 405800 }, { "epoch": 3.99, "grad_norm": 5.233386516571045, "learning_rate": 3.0356479688601587e-06, "loss": 0.1645, "step": 405825 }, { "epoch": 3.99, "grad_norm": 6.19605827331543, "learning_rate": 3.0355238464059108e-06, "loss": 0.1006, "step": 405850 }, { "epoch": 3.99, "grad_norm": 9.206116676330566, "learning_rate": 3.035399723951662e-06, "loss": 0.1794, "step": 405875 }, { "epoch": 3.99, "grad_norm": 9.611442565917969, "learning_rate": 3.035275601497413e-06, "loss": 0.0738, "step": 405900 }, { "epoch": 3.99, "grad_norm": 4.354119300842285, "learning_rate": 3.0351514790431653e-06, "loss": 0.1632, "step": 405925 }, { "epoch": 3.99, "grad_norm": 5.55330228805542, "learning_rate": 3.0350273565889165e-06, "loss": 0.0741, "step": 405950 }, { "epoch": 3.99, "grad_norm": 5.850188255310059, "learning_rate": 3.0349081990328383e-06, "loss": 0.1563, "step": 405975 }, { "epoch": 3.99, "grad_norm": 6.840007781982422, "learning_rate": 3.0347840765785896e-06, "loss": 0.0516, "step": 406000 }, { "epoch": 3.99, "grad_norm": 5.849987506866455, "learning_rate": 3.034659954124341e-06, "loss": 0.2022, "step": 406025 }, { "epoch": 3.99, "grad_norm": 10.077184677124023, "learning_rate": 3.034535831670093e-06, "loss": 0.0732, "step": 406050 }, { "epoch": 3.99, "grad_norm": 3.410684108734131, "learning_rate": 3.0344117092158445e-06, "loss": 0.1916, "step": 406075 }, { "epoch": 3.99, "grad_norm": 7.6145758628845215, "learning_rate": 3.0342875867615957e-06, "loss": 0.0727, "step": 406100 }, { "epoch": 3.99, "grad_norm": 6.161559104919434, "learning_rate": 3.0341634643073477e-06, "loss": 0.215, "step": 406125 }, { "epoch": 3.99, "grad_norm": 6.543083190917969, "learning_rate": 3.034039341853099e-06, "loss": 0.0843, "step": 406150 }, { "epoch": 3.99, "grad_norm": 5.985695838928223, "learning_rate": 3.03391521939885e-06, "loss": 0.1863, "step": 406175 }, { "epoch": 3.99, "grad_norm": 5.870121479034424, "learning_rate": 3.033791096944602e-06, "loss": 0.076, "step": 406200 }, { "epoch": 3.99, "grad_norm": 4.3254876136779785, "learning_rate": 3.0336669744903534e-06, "loss": 0.1903, "step": 406225 }, { "epoch": 3.99, "grad_norm": 9.319151878356934, "learning_rate": 3.033542852036105e-06, "loss": 0.0566, "step": 406250 }, { "epoch": 3.99, "grad_norm": 4.638073444366455, "learning_rate": 3.0334187295818567e-06, "loss": 0.1519, "step": 406275 }, { "epoch": 3.99, "grad_norm": 4.297248840332031, "learning_rate": 3.0332946071276083e-06, "loss": 0.0743, "step": 406300 }, { "epoch": 4.0, "grad_norm": 4.558462619781494, "learning_rate": 3.0331704846733595e-06, "loss": 0.1537, "step": 406325 }, { "epoch": 4.0, "grad_norm": 8.338096618652344, "learning_rate": 3.0330463622191108e-06, "loss": 0.063, "step": 406350 }, { "epoch": 4.0, "grad_norm": 5.888991355895996, "learning_rate": 3.032922239764863e-06, "loss": 0.2165, "step": 406375 }, { "epoch": 4.0, "grad_norm": 10.674240112304688, "learning_rate": 3.032798117310614e-06, "loss": 0.0731, "step": 406400 }, { "epoch": 4.0, "grad_norm": 9.359487533569336, "learning_rate": 3.0326739948563657e-06, "loss": 0.1576, "step": 406425 }, { "epoch": 4.0, "grad_norm": 5.483623027801514, "learning_rate": 3.0325498724021173e-06, "loss": 0.0593, "step": 406450 }, { "epoch": 4.0, "grad_norm": 7.506251811981201, "learning_rate": 3.032425749947869e-06, "loss": 0.1685, "step": 406475 }, { "epoch": 4.0, "grad_norm": 19.034069061279297, "learning_rate": 3.03230162749362e-06, "loss": 0.0888, "step": 406500 }, { "epoch": 4.0, "grad_norm": 5.059454917907715, "learning_rate": 3.032177505039372e-06, "loss": 0.1852, "step": 406525 }, { "epoch": 4.0, "grad_norm": 9.333645820617676, "learning_rate": 3.0320533825851234e-06, "loss": 0.0642, "step": 406550 }, { "epoch": 4.0, "grad_norm": 6.476823806762695, "learning_rate": 3.0319292601308746e-06, "loss": 0.1899, "step": 406575 }, { "epoch": 4.0, "grad_norm": 14.064518928527832, "learning_rate": 3.0318051376766267e-06, "loss": 0.0803, "step": 406600 }, { "epoch": 4.0, "grad_norm": 8.563210487365723, "learning_rate": 3.031681015222378e-06, "loss": 0.2011, "step": 406625 }, { "epoch": 4.0, "grad_norm": 2.085192918777466, "learning_rate": 3.0315568927681295e-06, "loss": 0.0718, "step": 406650 }, { "epoch": 4.0, "grad_norm": 3.8181138038635254, "learning_rate": 3.031432770313881e-06, "loss": 0.1715, "step": 406675 }, { "epoch": 4.0, "grad_norm": 9.317447662353516, "learning_rate": 3.031308647859633e-06, "loss": 0.0627, "step": 406700 }, { "epoch": 4.0, "grad_norm": 7.828896522521973, "learning_rate": 3.031184525405384e-06, "loss": 0.2177, "step": 406725 }, { "epoch": 4.0, "grad_norm": 14.503403663635254, "learning_rate": 3.031060402951136e-06, "loss": 0.079, "step": 406750 }, { "epoch": 4.0, "grad_norm": 3.265061378479004, "learning_rate": 3.0309362804968873e-06, "loss": 0.1917, "step": 406775 }, { "epoch": 4.0, "grad_norm": 10.16165542602539, "learning_rate": 3.0308121580426385e-06, "loss": 0.0776, "step": 406800 }, { "epoch": 4.0, "grad_norm": 22.83427619934082, "learning_rate": 3.0306880355883905e-06, "loss": 0.2023, "step": 406825 }, { "epoch": 4.0, "grad_norm": 4.518238544464111, "learning_rate": 3.0305639131341418e-06, "loss": 0.0611, "step": 406850 }, { "epoch": 4.0, "grad_norm": 13.829261779785156, "learning_rate": 3.0304397906798934e-06, "loss": 0.081, "step": 406875 }, { "epoch": 4.0, "grad_norm": 1.7729133367538452, "learning_rate": 3.030315668225645e-06, "loss": 0.0602, "step": 406900 }, { "epoch": 4.0, "grad_norm": 9.93199634552002, "learning_rate": 3.0301915457713967e-06, "loss": 0.0938, "step": 406925 }, { "epoch": 4.0, "grad_norm": 4.2636799812316895, "learning_rate": 3.030067423317148e-06, "loss": 0.0739, "step": 406950 }, { "epoch": 4.0, "grad_norm": 10.39355754852295, "learning_rate": 3.0299433008629e-06, "loss": 0.0918, "step": 406975 }, { "epoch": 4.0, "grad_norm": 0.2939576804637909, "learning_rate": 3.029819178408651e-06, "loss": 0.0674, "step": 407000 }, { "epoch": 4.0, "grad_norm": 13.195528984069824, "learning_rate": 3.0296950559544024e-06, "loss": 0.1021, "step": 407025 }, { "epoch": 4.0, "grad_norm": 4.246133327484131, "learning_rate": 3.0295709335001544e-06, "loss": 0.0855, "step": 407050 }, { "epoch": 4.0, "grad_norm": 20.464284896850586, "learning_rate": 3.0294468110459056e-06, "loss": 0.1189, "step": 407075 }, { "epoch": 4.0, "grad_norm": 3.6849255561828613, "learning_rate": 3.0293226885916573e-06, "loss": 0.086, "step": 407100 }, { "epoch": 4.0, "grad_norm": 16.491548538208008, "learning_rate": 3.029198566137409e-06, "loss": 0.1007, "step": 407125 }, { "epoch": 4.0, "grad_norm": 4.708064079284668, "learning_rate": 3.0290744436831605e-06, "loss": 0.0675, "step": 407150 }, { "epoch": 4.0, "grad_norm": 10.963067054748535, "learning_rate": 3.0289503212289117e-06, "loss": 0.1156, "step": 407175 }, { "epoch": 4.0, "grad_norm": 3.547715663909912, "learning_rate": 3.028826198774663e-06, "loss": 0.0631, "step": 407200 }, { "epoch": 4.0, "grad_norm": 20.554187774658203, "learning_rate": 3.028702076320415e-06, "loss": 0.1351, "step": 407225 }, { "epoch": 4.0, "grad_norm": 0.26737359166145325, "learning_rate": 3.0285779538661662e-06, "loss": 0.0777, "step": 407250 }, { "epoch": 4.0, "grad_norm": 9.886842727661133, "learning_rate": 3.028453831411918e-06, "loss": 0.0878, "step": 407275 }, { "epoch": 4.0, "grad_norm": 4.3530683517456055, "learning_rate": 3.0283297089576695e-06, "loss": 0.0511, "step": 407300 }, { "epoch": 4.0, "grad_norm": 13.008816719055176, "learning_rate": 3.028205586503421e-06, "loss": 0.0959, "step": 407325 }, { "epoch": 4.01, "grad_norm": 2.5250415802001953, "learning_rate": 3.0280814640491723e-06, "loss": 0.0584, "step": 407350 }, { "epoch": 4.01, "grad_norm": 13.87643814086914, "learning_rate": 3.0279573415949244e-06, "loss": 0.1181, "step": 407375 }, { "epoch": 4.01, "grad_norm": 4.372517108917236, "learning_rate": 3.0278332191406756e-06, "loss": 0.0717, "step": 407400 }, { "epoch": 4.01, "grad_norm": 12.885506629943848, "learning_rate": 3.027709096686427e-06, "loss": 0.0862, "step": 407425 }, { "epoch": 4.01, "grad_norm": 5.127554893493652, "learning_rate": 3.027584974232179e-06, "loss": 0.112, "step": 407450 }, { "epoch": 4.01, "grad_norm": 5.724043846130371, "learning_rate": 3.02746085177793e-06, "loss": 0.1035, "step": 407475 }, { "epoch": 4.01, "grad_norm": 0.8411464095115662, "learning_rate": 3.0273367293236817e-06, "loss": 0.0674, "step": 407500 }, { "epoch": 4.01, "grad_norm": 14.577996253967285, "learning_rate": 3.0272126068694334e-06, "loss": 0.116, "step": 407525 }, { "epoch": 4.01, "grad_norm": 6.108013153076172, "learning_rate": 3.027088484415185e-06, "loss": 0.0928, "step": 407550 }, { "epoch": 4.01, "grad_norm": 11.131797790527344, "learning_rate": 3.026964361960936e-06, "loss": 0.0809, "step": 407575 }, { "epoch": 4.01, "grad_norm": 4.819129467010498, "learning_rate": 3.0268402395066883e-06, "loss": 0.0804, "step": 407600 }, { "epoch": 4.01, "grad_norm": 16.690940856933594, "learning_rate": 3.0267161170524395e-06, "loss": 0.119, "step": 407625 }, { "epoch": 4.01, "grad_norm": 4.570580005645752, "learning_rate": 3.0265919945981907e-06, "loss": 0.0643, "step": 407650 }, { "epoch": 4.01, "grad_norm": 9.763992309570312, "learning_rate": 3.0264678721439427e-06, "loss": 0.0723, "step": 407675 }, { "epoch": 4.01, "grad_norm": 4.503561973571777, "learning_rate": 3.026343749689694e-06, "loss": 0.0827, "step": 407700 }, { "epoch": 4.01, "grad_norm": 11.524027824401855, "learning_rate": 3.0262196272354456e-06, "loss": 0.1025, "step": 407725 }, { "epoch": 4.01, "grad_norm": 0.45039796829223633, "learning_rate": 3.0260955047811972e-06, "loss": 0.0797, "step": 407750 }, { "epoch": 4.01, "grad_norm": 8.670317649841309, "learning_rate": 3.025971382326949e-06, "loss": 0.1308, "step": 407775 }, { "epoch": 4.01, "grad_norm": 5.468086242675781, "learning_rate": 3.0258472598727e-06, "loss": 0.0772, "step": 407800 }, { "epoch": 4.01, "grad_norm": 14.427833557128906, "learning_rate": 3.025723137418452e-06, "loss": 0.1229, "step": 407825 }, { "epoch": 4.01, "grad_norm": 7.682521343231201, "learning_rate": 3.0255990149642033e-06, "loss": 0.0953, "step": 407850 }, { "epoch": 4.01, "grad_norm": 9.817404747009277, "learning_rate": 3.0254748925099546e-06, "loss": 0.1187, "step": 407875 }, { "epoch": 4.01, "grad_norm": 2.0216000080108643, "learning_rate": 3.0253507700557066e-06, "loss": 0.078, "step": 407900 }, { "epoch": 4.01, "grad_norm": 11.487820625305176, "learning_rate": 3.025226647601458e-06, "loss": 0.1023, "step": 407925 }, { "epoch": 4.01, "grad_norm": 6.083747386932373, "learning_rate": 3.0251025251472095e-06, "loss": 0.0721, "step": 407950 }, { "epoch": 4.01, "grad_norm": 11.980063438415527, "learning_rate": 3.024978402692961e-06, "loss": 0.0907, "step": 407975 }, { "epoch": 4.01, "grad_norm": 3.8122622966766357, "learning_rate": 3.0248542802387127e-06, "loss": 0.0879, "step": 408000 }, { "epoch": 4.01, "grad_norm": 10.187125205993652, "learning_rate": 3.024730157784464e-06, "loss": 0.1411, "step": 408025 }, { "epoch": 4.01, "grad_norm": 6.104165554046631, "learning_rate": 3.024606035330215e-06, "loss": 0.0594, "step": 408050 }, { "epoch": 4.01, "grad_norm": 20.319101333618164, "learning_rate": 3.024481912875967e-06, "loss": 0.0987, "step": 408075 }, { "epoch": 4.01, "grad_norm": 3.986729621887207, "learning_rate": 3.0243577904217184e-06, "loss": 0.0584, "step": 408100 }, { "epoch": 4.01, "grad_norm": 11.538402557373047, "learning_rate": 3.02423366796747e-06, "loss": 0.1265, "step": 408125 }, { "epoch": 4.01, "grad_norm": 1.4025746583938599, "learning_rate": 3.0241095455132217e-06, "loss": 0.0499, "step": 408150 }, { "epoch": 4.01, "grad_norm": 15.397475242614746, "learning_rate": 3.0239854230589733e-06, "loss": 0.0939, "step": 408175 }, { "epoch": 4.01, "grad_norm": 4.676251411437988, "learning_rate": 3.0238613006047245e-06, "loss": 0.0552, "step": 408200 }, { "epoch": 4.01, "grad_norm": 14.308847427368164, "learning_rate": 3.0237371781504766e-06, "loss": 0.0912, "step": 408225 }, { "epoch": 4.01, "grad_norm": 3.102778911590576, "learning_rate": 3.023613055696228e-06, "loss": 0.0811, "step": 408250 }, { "epoch": 4.01, "grad_norm": 14.164799690246582, "learning_rate": 3.023488933241979e-06, "loss": 0.1227, "step": 408275 }, { "epoch": 4.01, "grad_norm": 1.6445561647415161, "learning_rate": 3.023364810787731e-06, "loss": 0.0611, "step": 408300 }, { "epoch": 4.01, "grad_norm": 15.12342643737793, "learning_rate": 3.0232406883334823e-06, "loss": 0.1239, "step": 408325 }, { "epoch": 4.01, "grad_norm": 11.152361869812012, "learning_rate": 3.023116565879234e-06, "loss": 0.0783, "step": 408350 }, { "epoch": 4.02, "grad_norm": 18.291290283203125, "learning_rate": 3.0229924434249856e-06, "loss": 0.1045, "step": 408375 }, { "epoch": 4.02, "grad_norm": 5.103695392608643, "learning_rate": 3.022868320970737e-06, "loss": 0.0806, "step": 408400 }, { "epoch": 4.02, "grad_norm": 12.27524471282959, "learning_rate": 3.0227441985164884e-06, "loss": 0.1001, "step": 408425 }, { "epoch": 4.02, "grad_norm": 3.596397876739502, "learning_rate": 3.0226250409604103e-06, "loss": 0.0736, "step": 408450 }, { "epoch": 4.02, "grad_norm": 13.741704940795898, "learning_rate": 3.0225009185061615e-06, "loss": 0.114, "step": 408475 }, { "epoch": 4.02, "grad_norm": 10.230627059936523, "learning_rate": 3.0223767960519136e-06, "loss": 0.0807, "step": 408500 }, { "epoch": 4.02, "grad_norm": 7.490191459655762, "learning_rate": 3.0222526735976648e-06, "loss": 0.096, "step": 408525 }, { "epoch": 4.02, "grad_norm": 3.107468605041504, "learning_rate": 3.0221285511434164e-06, "loss": 0.0654, "step": 408550 }, { "epoch": 4.02, "grad_norm": 3.2496016025543213, "learning_rate": 3.022004428689168e-06, "loss": 0.074, "step": 408575 }, { "epoch": 4.02, "grad_norm": 7.151740074157715, "learning_rate": 3.0218803062349197e-06, "loss": 0.0788, "step": 408600 }, { "epoch": 4.02, "grad_norm": 7.506615161895752, "learning_rate": 3.021756183780671e-06, "loss": 0.1301, "step": 408625 }, { "epoch": 4.02, "grad_norm": 2.953861951828003, "learning_rate": 3.021632061326422e-06, "loss": 0.0801, "step": 408650 }, { "epoch": 4.02, "grad_norm": 12.607157707214355, "learning_rate": 3.021507938872174e-06, "loss": 0.1091, "step": 408675 }, { "epoch": 4.02, "grad_norm": 0.7647747993469238, "learning_rate": 3.0213838164179254e-06, "loss": 0.0738, "step": 408700 }, { "epoch": 4.02, "grad_norm": 29.911418914794922, "learning_rate": 3.021259693963677e-06, "loss": 0.1556, "step": 408725 }, { "epoch": 4.02, "grad_norm": 5.3250579833984375, "learning_rate": 3.0211355715094286e-06, "loss": 0.0673, "step": 408750 }, { "epoch": 4.02, "grad_norm": 17.88912010192871, "learning_rate": 3.0210114490551803e-06, "loss": 0.1076, "step": 408775 }, { "epoch": 4.02, "grad_norm": 6.218302249908447, "learning_rate": 3.0208873266009315e-06, "loss": 0.0762, "step": 408800 }, { "epoch": 4.02, "grad_norm": 13.121441841125488, "learning_rate": 3.0207632041466835e-06, "loss": 0.1087, "step": 408825 }, { "epoch": 4.02, "grad_norm": 3.554239273071289, "learning_rate": 3.0206390816924347e-06, "loss": 0.0746, "step": 408850 }, { "epoch": 4.02, "grad_norm": 13.867691040039062, "learning_rate": 3.020514959238186e-06, "loss": 0.1062, "step": 408875 }, { "epoch": 4.02, "grad_norm": 1.7560309171676636, "learning_rate": 3.020390836783938e-06, "loss": 0.0962, "step": 408900 }, { "epoch": 4.02, "grad_norm": 17.35953140258789, "learning_rate": 3.0202667143296892e-06, "loss": 0.1083, "step": 408925 }, { "epoch": 4.02, "grad_norm": 2.6060540676116943, "learning_rate": 3.020142591875441e-06, "loss": 0.0621, "step": 408950 }, { "epoch": 4.02, "grad_norm": 3.605506658554077, "learning_rate": 3.0200184694211925e-06, "loss": 0.0877, "step": 408975 }, { "epoch": 4.02, "grad_norm": 1.7756693363189697, "learning_rate": 3.019894346966944e-06, "loss": 0.0872, "step": 409000 }, { "epoch": 4.02, "grad_norm": 5.25555419921875, "learning_rate": 3.0197702245126953e-06, "loss": 0.09, "step": 409025 }, { "epoch": 4.02, "grad_norm": 6.737198829650879, "learning_rate": 3.0196461020584474e-06, "loss": 0.0913, "step": 409050 }, { "epoch": 4.02, "grad_norm": 17.82830810546875, "learning_rate": 3.0195219796041986e-06, "loss": 0.1236, "step": 409075 }, { "epoch": 4.02, "grad_norm": 2.2400832176208496, "learning_rate": 3.01939785714995e-06, "loss": 0.054, "step": 409100 }, { "epoch": 4.02, "grad_norm": 12.341726303100586, "learning_rate": 3.019273734695702e-06, "loss": 0.1181, "step": 409125 }, { "epoch": 4.02, "grad_norm": 5.552335262298584, "learning_rate": 3.019149612241453e-06, "loss": 0.0814, "step": 409150 }, { "epoch": 4.02, "grad_norm": 4.03236722946167, "learning_rate": 3.0190254897872047e-06, "loss": 0.0995, "step": 409175 }, { "epoch": 4.02, "grad_norm": 5.170211315155029, "learning_rate": 3.0189013673329564e-06, "loss": 0.0627, "step": 409200 }, { "epoch": 4.02, "grad_norm": 7.597507476806641, "learning_rate": 3.018777244878708e-06, "loss": 0.1029, "step": 409225 }, { "epoch": 4.02, "grad_norm": 5.92630672454834, "learning_rate": 3.0186531224244592e-06, "loss": 0.0792, "step": 409250 }, { "epoch": 4.02, "grad_norm": 18.647714614868164, "learning_rate": 3.0185289999702113e-06, "loss": 0.1292, "step": 409275 }, { "epoch": 4.02, "grad_norm": 2.607849359512329, "learning_rate": 3.0184048775159625e-06, "loss": 0.077, "step": 409300 }, { "epoch": 4.02, "grad_norm": 12.315065383911133, "learning_rate": 3.0182807550617137e-06, "loss": 0.128, "step": 409325 }, { "epoch": 4.02, "grad_norm": 4.673731803894043, "learning_rate": 3.0181566326074658e-06, "loss": 0.0628, "step": 409350 }, { "epoch": 4.03, "grad_norm": 13.249407768249512, "learning_rate": 3.018032510153217e-06, "loss": 0.0903, "step": 409375 }, { "epoch": 4.03, "grad_norm": 3.4747517108917236, "learning_rate": 3.0179083876989686e-06, "loss": 0.0688, "step": 409400 }, { "epoch": 4.03, "grad_norm": 15.896002769470215, "learning_rate": 3.0177842652447202e-06, "loss": 0.1046, "step": 409425 }, { "epoch": 4.03, "grad_norm": 8.837007522583008, "learning_rate": 3.017660142790472e-06, "loss": 0.0612, "step": 409450 }, { "epoch": 4.03, "grad_norm": 14.383678436279297, "learning_rate": 3.017536020336223e-06, "loss": 0.1194, "step": 409475 }, { "epoch": 4.03, "grad_norm": 0.12378770112991333, "learning_rate": 3.0174118978819743e-06, "loss": 0.0711, "step": 409500 }, { "epoch": 4.03, "grad_norm": 4.569643974304199, "learning_rate": 3.0172877754277263e-06, "loss": 0.1151, "step": 409525 }, { "epoch": 4.03, "grad_norm": 3.7105724811553955, "learning_rate": 3.0171636529734776e-06, "loss": 0.0945, "step": 409550 }, { "epoch": 4.03, "grad_norm": 15.779807090759277, "learning_rate": 3.017039530519229e-06, "loss": 0.0892, "step": 409575 }, { "epoch": 4.03, "grad_norm": 1.8724199533462524, "learning_rate": 3.016915408064981e-06, "loss": 0.0732, "step": 409600 }, { "epoch": 4.03, "grad_norm": 12.77463436126709, "learning_rate": 3.0167912856107325e-06, "loss": 0.1226, "step": 409625 }, { "epoch": 4.03, "grad_norm": 4.790457248687744, "learning_rate": 3.0166671631564837e-06, "loss": 0.0811, "step": 409650 }, { "epoch": 4.03, "grad_norm": 11.679817199707031, "learning_rate": 3.0165430407022357e-06, "loss": 0.1084, "step": 409675 }, { "epoch": 4.03, "grad_norm": 1.897698163986206, "learning_rate": 3.016418918247987e-06, "loss": 0.0883, "step": 409700 }, { "epoch": 4.03, "grad_norm": 20.969953536987305, "learning_rate": 3.016294795793738e-06, "loss": 0.1053, "step": 409725 }, { "epoch": 4.03, "grad_norm": 2.787290334701538, "learning_rate": 3.0161706733394902e-06, "loss": 0.0677, "step": 409750 }, { "epoch": 4.03, "grad_norm": 19.504064559936523, "learning_rate": 3.0160465508852414e-06, "loss": 0.0807, "step": 409775 }, { "epoch": 4.03, "grad_norm": 10.032756805419922, "learning_rate": 3.015922428430993e-06, "loss": 0.0633, "step": 409800 }, { "epoch": 4.03, "grad_norm": 15.89531421661377, "learning_rate": 3.0157983059767447e-06, "loss": 0.1063, "step": 409825 }, { "epoch": 4.03, "grad_norm": 3.935628652572632, "learning_rate": 3.0156741835224963e-06, "loss": 0.0625, "step": 409850 }, { "epoch": 4.03, "grad_norm": 13.06491470336914, "learning_rate": 3.0155500610682475e-06, "loss": 0.1044, "step": 409875 }, { "epoch": 4.03, "grad_norm": 1.4663023948669434, "learning_rate": 3.0154259386139996e-06, "loss": 0.0927, "step": 409900 }, { "epoch": 4.03, "grad_norm": 8.789175987243652, "learning_rate": 3.015301816159751e-06, "loss": 0.1089, "step": 409925 }, { "epoch": 4.03, "grad_norm": 7.431745529174805, "learning_rate": 3.015177693705502e-06, "loss": 0.0654, "step": 409950 }, { "epoch": 4.03, "grad_norm": 13.610360145568848, "learning_rate": 3.015053571251254e-06, "loss": 0.0652, "step": 409975 }, { "epoch": 4.03, "grad_norm": 8.21674633026123, "learning_rate": 3.0149294487970053e-06, "loss": 0.0642, "step": 410000 }, { "epoch": 4.03, "grad_norm": 16.410371780395508, "learning_rate": 3.014805326342757e-06, "loss": 0.1153, "step": 410025 }, { "epoch": 4.03, "grad_norm": 4.546678066253662, "learning_rate": 3.0146812038885086e-06, "loss": 0.0873, "step": 410050 }, { "epoch": 4.03, "grad_norm": 9.38772201538086, "learning_rate": 3.01455708143426e-06, "loss": 0.1169, "step": 410075 }, { "epoch": 4.03, "grad_norm": 3.320002317428589, "learning_rate": 3.0144329589800114e-06, "loss": 0.0629, "step": 410100 }, { "epoch": 4.03, "grad_norm": 13.651435852050781, "learning_rate": 3.0143088365257635e-06, "loss": 0.0973, "step": 410125 }, { "epoch": 4.03, "grad_norm": 0.9776839017868042, "learning_rate": 3.0141847140715147e-06, "loss": 0.0872, "step": 410150 }, { "epoch": 4.03, "grad_norm": 15.081635475158691, "learning_rate": 3.014060591617266e-06, "loss": 0.1078, "step": 410175 }, { "epoch": 4.03, "grad_norm": 9.938494682312012, "learning_rate": 3.013936469163018e-06, "loss": 0.0888, "step": 410200 }, { "epoch": 4.03, "grad_norm": 15.535743713378906, "learning_rate": 3.013812346708769e-06, "loss": 0.1034, "step": 410225 }, { "epoch": 4.03, "grad_norm": 13.396110534667969, "learning_rate": 3.013688224254521e-06, "loss": 0.0608, "step": 410250 }, { "epoch": 4.03, "grad_norm": 13.10559368133545, "learning_rate": 3.0135641018002724e-06, "loss": 0.0877, "step": 410275 }, { "epoch": 4.03, "grad_norm": 0.11880096793174744, "learning_rate": 3.013439979346024e-06, "loss": 0.0633, "step": 410300 }, { "epoch": 4.03, "grad_norm": 16.039466857910156, "learning_rate": 3.0133158568917753e-06, "loss": 0.0752, "step": 410325 }, { "epoch": 4.03, "grad_norm": 10.284260749816895, "learning_rate": 3.0131917344375265e-06, "loss": 0.0988, "step": 410350 }, { "epoch": 4.03, "grad_norm": 13.470593452453613, "learning_rate": 3.0130676119832786e-06, "loss": 0.1205, "step": 410375 }, { "epoch": 4.04, "grad_norm": 9.626215934753418, "learning_rate": 3.0129434895290298e-06, "loss": 0.0621, "step": 410400 }, { "epoch": 4.04, "grad_norm": 4.741380214691162, "learning_rate": 3.0128193670747814e-06, "loss": 0.0905, "step": 410425 }, { "epoch": 4.04, "grad_norm": 0.7989411354064941, "learning_rate": 3.012695244620533e-06, "loss": 0.078, "step": 410450 }, { "epoch": 4.04, "grad_norm": 4.381231784820557, "learning_rate": 3.0125711221662847e-06, "loss": 0.0903, "step": 410475 }, { "epoch": 4.04, "grad_norm": 8.342941284179688, "learning_rate": 3.012451964610206e-06, "loss": 0.0829, "step": 410500 }, { "epoch": 4.04, "grad_norm": 12.021917343139648, "learning_rate": 3.0123278421559578e-06, "loss": 0.1002, "step": 410525 }, { "epoch": 4.04, "grad_norm": 3.4121203422546387, "learning_rate": 3.012203719701709e-06, "loss": 0.0931, "step": 410550 }, { "epoch": 4.04, "grad_norm": 15.556474685668945, "learning_rate": 3.012079597247461e-06, "loss": 0.1206, "step": 410575 }, { "epoch": 4.04, "grad_norm": 3.975027322769165, "learning_rate": 3.0119554747932122e-06, "loss": 0.0681, "step": 410600 }, { "epoch": 4.04, "grad_norm": 18.650522232055664, "learning_rate": 3.0118313523389634e-06, "loss": 0.1132, "step": 410625 }, { "epoch": 4.04, "grad_norm": 5.032966136932373, "learning_rate": 3.0117072298847155e-06, "loss": 0.0788, "step": 410650 }, { "epoch": 4.04, "grad_norm": 9.54948616027832, "learning_rate": 3.0115831074304667e-06, "loss": 0.1007, "step": 410675 }, { "epoch": 4.04, "grad_norm": 2.242988348007202, "learning_rate": 3.0114589849762184e-06, "loss": 0.0903, "step": 410700 }, { "epoch": 4.04, "grad_norm": 12.854873657226562, "learning_rate": 3.01133486252197e-06, "loss": 0.1148, "step": 410725 }, { "epoch": 4.04, "grad_norm": 3.514238119125366, "learning_rate": 3.0112107400677216e-06, "loss": 0.0801, "step": 410750 }, { "epoch": 4.04, "grad_norm": 12.525572776794434, "learning_rate": 3.011086617613473e-06, "loss": 0.0863, "step": 410775 }, { "epoch": 4.04, "grad_norm": 1.7417348623275757, "learning_rate": 3.010962495159225e-06, "loss": 0.0686, "step": 410800 }, { "epoch": 4.04, "grad_norm": 10.41590404510498, "learning_rate": 3.010838372704976e-06, "loss": 0.0868, "step": 410825 }, { "epoch": 4.04, "grad_norm": 1.2953227758407593, "learning_rate": 3.0107142502507273e-06, "loss": 0.0712, "step": 410850 }, { "epoch": 4.04, "grad_norm": 14.957756042480469, "learning_rate": 3.0105901277964794e-06, "loss": 0.1223, "step": 410875 }, { "epoch": 4.04, "grad_norm": 10.710906982421875, "learning_rate": 3.0104660053422306e-06, "loss": 0.0809, "step": 410900 }, { "epoch": 4.04, "grad_norm": 20.9721736907959, "learning_rate": 3.0103418828879822e-06, "loss": 0.1076, "step": 410925 }, { "epoch": 4.04, "grad_norm": 7.8712358474731445, "learning_rate": 3.0102177604337334e-06, "loss": 0.0752, "step": 410950 }, { "epoch": 4.04, "grad_norm": 6.095584392547607, "learning_rate": 3.0100936379794855e-06, "loss": 0.0881, "step": 410975 }, { "epoch": 4.04, "grad_norm": 4.584076881408691, "learning_rate": 3.0099695155252367e-06, "loss": 0.0966, "step": 411000 }, { "epoch": 4.04, "grad_norm": 8.168167114257812, "learning_rate": 3.009845393070988e-06, "loss": 0.0908, "step": 411025 }, { "epoch": 4.04, "grad_norm": 9.311420440673828, "learning_rate": 3.00972127061674e-06, "loss": 0.0527, "step": 411050 }, { "epoch": 4.04, "grad_norm": 19.68669319152832, "learning_rate": 3.009597148162491e-06, "loss": 0.1014, "step": 411075 }, { "epoch": 4.04, "grad_norm": 1.339852213859558, "learning_rate": 3.009473025708243e-06, "loss": 0.0827, "step": 411100 }, { "epoch": 4.04, "grad_norm": 18.392309188842773, "learning_rate": 3.0093489032539945e-06, "loss": 0.1091, "step": 411125 }, { "epoch": 4.04, "grad_norm": 3.8777644634246826, "learning_rate": 3.009224780799746e-06, "loss": 0.0596, "step": 411150 }, { "epoch": 4.04, "grad_norm": 9.394477844238281, "learning_rate": 3.0091006583454973e-06, "loss": 0.0862, "step": 411175 }, { "epoch": 4.04, "grad_norm": 4.642704963684082, "learning_rate": 3.0089765358912494e-06, "loss": 0.075, "step": 411200 }, { "epoch": 4.04, "grad_norm": 22.787967681884766, "learning_rate": 3.0088524134370006e-06, "loss": 0.1686, "step": 411225 }, { "epoch": 4.04, "grad_norm": 5.635565280914307, "learning_rate": 3.0087282909827518e-06, "loss": 0.0918, "step": 411250 }, { "epoch": 4.04, "grad_norm": 15.47916316986084, "learning_rate": 3.008604168528504e-06, "loss": 0.11, "step": 411275 }, { "epoch": 4.04, "grad_norm": 2.3747544288635254, "learning_rate": 3.008480046074255e-06, "loss": 0.0907, "step": 411300 }, { "epoch": 4.04, "grad_norm": 6.683485507965088, "learning_rate": 3.0083559236200067e-06, "loss": 0.0831, "step": 411325 }, { "epoch": 4.04, "grad_norm": 2.838063955307007, "learning_rate": 3.0082318011657583e-06, "loss": 0.0716, "step": 411350 }, { "epoch": 4.04, "grad_norm": 18.20425796508789, "learning_rate": 3.00810767871151e-06, "loss": 0.1304, "step": 411375 }, { "epoch": 4.04, "grad_norm": 1.2858575582504272, "learning_rate": 3.007983556257261e-06, "loss": 0.0931, "step": 411400 }, { "epoch": 4.05, "grad_norm": 3.503213405609131, "learning_rate": 3.0078594338030132e-06, "loss": 0.0943, "step": 411425 }, { "epoch": 4.05, "grad_norm": 0.6851447820663452, "learning_rate": 3.0077353113487644e-06, "loss": 0.0748, "step": 411450 }, { "epoch": 4.05, "grad_norm": 15.916367530822754, "learning_rate": 3.007611188894516e-06, "loss": 0.1186, "step": 411475 }, { "epoch": 4.05, "grad_norm": 3.0622074604034424, "learning_rate": 3.0074870664402677e-06, "loss": 0.0935, "step": 411500 }, { "epoch": 4.05, "grad_norm": 11.720926284790039, "learning_rate": 3.0073629439860193e-06, "loss": 0.1058, "step": 411525 }, { "epoch": 4.05, "grad_norm": 0.8224113583564758, "learning_rate": 3.0072388215317706e-06, "loss": 0.079, "step": 411550 }, { "epoch": 4.05, "grad_norm": 5.017154216766357, "learning_rate": 3.0071146990775226e-06, "loss": 0.111, "step": 411575 }, { "epoch": 4.05, "grad_norm": 5.702760696411133, "learning_rate": 3.006990576623274e-06, "loss": 0.0608, "step": 411600 }, { "epoch": 4.05, "grad_norm": 16.95513916015625, "learning_rate": 3.006866454169025e-06, "loss": 0.0904, "step": 411625 }, { "epoch": 4.05, "grad_norm": 6.464034557342529, "learning_rate": 3.006742331714777e-06, "loss": 0.0546, "step": 411650 }, { "epoch": 4.05, "grad_norm": 10.777979850769043, "learning_rate": 3.0066182092605283e-06, "loss": 0.0718, "step": 411675 }, { "epoch": 4.05, "grad_norm": 3.427171230316162, "learning_rate": 3.00649408680628e-06, "loss": 0.0725, "step": 411700 }, { "epoch": 4.05, "grad_norm": 9.444745063781738, "learning_rate": 3.0063699643520316e-06, "loss": 0.099, "step": 411725 }, { "epoch": 4.05, "grad_norm": 5.832585334777832, "learning_rate": 3.006245841897783e-06, "loss": 0.0833, "step": 411750 }, { "epoch": 4.05, "grad_norm": 5.160419940948486, "learning_rate": 3.0061217194435344e-06, "loss": 0.1071, "step": 411775 }, { "epoch": 4.05, "grad_norm": 0.9541805386543274, "learning_rate": 3.0059975969892856e-06, "loss": 0.0694, "step": 411800 }, { "epoch": 4.05, "grad_norm": 11.817852973937988, "learning_rate": 3.0058734745350377e-06, "loss": 0.077, "step": 411825 }, { "epoch": 4.05, "grad_norm": 11.546409606933594, "learning_rate": 3.005749352080789e-06, "loss": 0.0719, "step": 411850 }, { "epoch": 4.05, "grad_norm": 12.777585983276367, "learning_rate": 3.0056252296265405e-06, "loss": 0.1241, "step": 411875 }, { "epoch": 4.05, "grad_norm": 1.0216456651687622, "learning_rate": 3.005501107172292e-06, "loss": 0.0565, "step": 411900 }, { "epoch": 4.05, "grad_norm": 10.536811828613281, "learning_rate": 3.005376984718044e-06, "loss": 0.0935, "step": 411925 }, { "epoch": 4.05, "grad_norm": 3.098358631134033, "learning_rate": 3.005252862263795e-06, "loss": 0.0604, "step": 411950 }, { "epoch": 4.05, "grad_norm": 13.19112777709961, "learning_rate": 3.005128739809547e-06, "loss": 0.1121, "step": 411975 }, { "epoch": 4.05, "grad_norm": 1.604498267173767, "learning_rate": 3.0050046173552983e-06, "loss": 0.0715, "step": 412000 }, { "epoch": 4.05, "grad_norm": 16.968488693237305, "learning_rate": 3.0048804949010495e-06, "loss": 0.1277, "step": 412025 }, { "epoch": 4.05, "grad_norm": 4.546682834625244, "learning_rate": 3.0047563724468016e-06, "loss": 0.0846, "step": 412050 }, { "epoch": 4.05, "grad_norm": 6.479991436004639, "learning_rate": 3.0046322499925528e-06, "loss": 0.0911, "step": 412075 }, { "epoch": 4.05, "grad_norm": 2.1370413303375244, "learning_rate": 3.0045081275383044e-06, "loss": 0.0968, "step": 412100 }, { "epoch": 4.05, "grad_norm": 11.143513679504395, "learning_rate": 3.004384005084056e-06, "loss": 0.0921, "step": 412125 }, { "epoch": 4.05, "grad_norm": 9.835893630981445, "learning_rate": 3.0042598826298077e-06, "loss": 0.0658, "step": 412150 }, { "epoch": 4.05, "grad_norm": 12.300700187683105, "learning_rate": 3.004135760175559e-06, "loss": 0.0999, "step": 412175 }, { "epoch": 4.05, "grad_norm": 0.4069351851940155, "learning_rate": 3.004011637721311e-06, "loss": 0.0698, "step": 412200 }, { "epoch": 4.05, "grad_norm": 9.86031436920166, "learning_rate": 3.003887515267062e-06, "loss": 0.0885, "step": 412225 }, { "epoch": 4.05, "grad_norm": 8.190394401550293, "learning_rate": 3.0037633928128134e-06, "loss": 0.097, "step": 412250 }, { "epoch": 4.05, "grad_norm": 13.29401969909668, "learning_rate": 3.0036392703585654e-06, "loss": 0.0974, "step": 412275 }, { "epoch": 4.05, "grad_norm": 0.740767776966095, "learning_rate": 3.0035151479043166e-06, "loss": 0.0555, "step": 412300 }, { "epoch": 4.05, "grad_norm": 20.56608772277832, "learning_rate": 3.0033910254500683e-06, "loss": 0.1121, "step": 412325 }, { "epoch": 4.05, "grad_norm": 8.532316207885742, "learning_rate": 3.00326690299582e-06, "loss": 0.0624, "step": 412350 }, { "epoch": 4.05, "grad_norm": 10.542828559875488, "learning_rate": 3.0031427805415715e-06, "loss": 0.0905, "step": 412375 }, { "epoch": 4.05, "grad_norm": 1.6555261611938477, "learning_rate": 3.0030186580873228e-06, "loss": 0.0699, "step": 412400 }, { "epoch": 4.06, "grad_norm": 11.012858390808105, "learning_rate": 3.002894535633075e-06, "loss": 0.1019, "step": 412425 }, { "epoch": 4.06, "grad_norm": 7.882094383239746, "learning_rate": 3.002770413178826e-06, "loss": 0.0956, "step": 412450 }, { "epoch": 4.06, "grad_norm": 13.425252914428711, "learning_rate": 3.0026462907245772e-06, "loss": 0.1116, "step": 412475 }, { "epoch": 4.06, "grad_norm": 1.9072524309158325, "learning_rate": 3.0025221682703293e-06, "loss": 0.0883, "step": 412500 }, { "epoch": 4.06, "grad_norm": Infinity, "learning_rate": 3.0024030107142503e-06, "loss": 0.1244, "step": 412525 }, { "epoch": 4.06, "grad_norm": 13.20173454284668, "learning_rate": 3.002278888260002e-06, "loss": 0.0576, "step": 412550 }, { "epoch": 4.06, "grad_norm": 12.361560821533203, "learning_rate": 3.0021547658057536e-06, "loss": 0.1182, "step": 412575 }, { "epoch": 4.06, "grad_norm": 5.797773838043213, "learning_rate": 3.0020306433515052e-06, "loss": 0.0738, "step": 412600 }, { "epoch": 4.06, "grad_norm": 11.700647354125977, "learning_rate": 3.0019065208972564e-06, "loss": 0.0972, "step": 412625 }, { "epoch": 4.06, "grad_norm": 8.767143249511719, "learning_rate": 3.0017823984430085e-06, "loss": 0.0877, "step": 412650 }, { "epoch": 4.06, "grad_norm": 15.052199363708496, "learning_rate": 3.0016582759887597e-06, "loss": 0.1233, "step": 412675 }, { "epoch": 4.06, "grad_norm": 6.187386989593506, "learning_rate": 3.001534153534511e-06, "loss": 0.0638, "step": 412700 }, { "epoch": 4.06, "grad_norm": 10.939775466918945, "learning_rate": 3.001410031080263e-06, "loss": 0.1105, "step": 412725 }, { "epoch": 4.06, "grad_norm": 5.239241123199463, "learning_rate": 3.001285908626014e-06, "loss": 0.0731, "step": 412750 }, { "epoch": 4.06, "grad_norm": 15.464594841003418, "learning_rate": 3.001161786171766e-06, "loss": 0.0989, "step": 412775 }, { "epoch": 4.06, "grad_norm": 6.093519687652588, "learning_rate": 3.0010376637175175e-06, "loss": 0.0742, "step": 412800 }, { "epoch": 4.06, "grad_norm": 22.542621612548828, "learning_rate": 3.000913541263269e-06, "loss": 0.1048, "step": 412825 }, { "epoch": 4.06, "grad_norm": 2.6839890480041504, "learning_rate": 3.0007894188090203e-06, "loss": 0.0863, "step": 412850 }, { "epoch": 4.06, "grad_norm": 21.309370040893555, "learning_rate": 3.0006652963547724e-06, "loss": 0.11, "step": 412875 }, { "epoch": 4.06, "grad_norm": 5.804527759552002, "learning_rate": 3.0005411739005236e-06, "loss": 0.0766, "step": 412900 }, { "epoch": 4.06, "grad_norm": 13.610247611999512, "learning_rate": 3.0004170514462748e-06, "loss": 0.1169, "step": 412925 }, { "epoch": 4.06, "grad_norm": 11.789127349853516, "learning_rate": 3.000292928992027e-06, "loss": 0.0808, "step": 412950 }, { "epoch": 4.06, "grad_norm": 6.666757106781006, "learning_rate": 3.000168806537778e-06, "loss": 0.0757, "step": 412975 }, { "epoch": 4.06, "grad_norm": 2.887791872024536, "learning_rate": 3.0000446840835297e-06, "loss": 0.0575, "step": 413000 }, { "epoch": 4.06, "grad_norm": 5.969167232513428, "learning_rate": 2.9999205616292813e-06, "loss": 0.1321, "step": 413025 }, { "epoch": 4.06, "grad_norm": 2.332465171813965, "learning_rate": 2.999796439175033e-06, "loss": 0.0528, "step": 413050 }, { "epoch": 4.06, "grad_norm": 10.332504272460938, "learning_rate": 2.999672316720784e-06, "loss": 0.1006, "step": 413075 }, { "epoch": 4.06, "grad_norm": 5.54313325881958, "learning_rate": 2.9995481942665362e-06, "loss": 0.0918, "step": 413100 }, { "epoch": 4.06, "grad_norm": 6.847917556762695, "learning_rate": 2.9994240718122874e-06, "loss": 0.0958, "step": 413125 }, { "epoch": 4.06, "grad_norm": 1.1969172954559326, "learning_rate": 2.9992999493580387e-06, "loss": 0.0869, "step": 413150 }, { "epoch": 4.06, "grad_norm": 7.239313125610352, "learning_rate": 2.9991758269037907e-06, "loss": 0.08, "step": 413175 }, { "epoch": 4.06, "grad_norm": 8.530464172363281, "learning_rate": 2.999051704449542e-06, "loss": 0.0935, "step": 413200 }, { "epoch": 4.06, "grad_norm": 9.961590766906738, "learning_rate": 2.9989275819952936e-06, "loss": 0.1139, "step": 413225 }, { "epoch": 4.06, "grad_norm": 13.967961311340332, "learning_rate": 2.9988034595410448e-06, "loss": 0.0702, "step": 413250 }, { "epoch": 4.06, "grad_norm": 14.922821044921875, "learning_rate": 2.998679337086797e-06, "loss": 0.0944, "step": 413275 }, { "epoch": 4.06, "grad_norm": 4.560370922088623, "learning_rate": 2.998555214632548e-06, "loss": 0.0707, "step": 413300 }, { "epoch": 4.06, "grad_norm": 11.779480934143066, "learning_rate": 2.9984310921782993e-06, "loss": 0.1047, "step": 413325 }, { "epoch": 4.06, "grad_norm": 5.330445766448975, "learning_rate": 2.9983069697240513e-06, "loss": 0.0629, "step": 413350 }, { "epoch": 4.06, "grad_norm": 5.280592918395996, "learning_rate": 2.9981828472698025e-06, "loss": 0.1333, "step": 413375 }, { "epoch": 4.06, "grad_norm": 1.8176106214523315, "learning_rate": 2.998058724815554e-06, "loss": 0.0714, "step": 413400 }, { "epoch": 4.06, "grad_norm": 8.991997718811035, "learning_rate": 2.997934602361306e-06, "loss": 0.0844, "step": 413425 }, { "epoch": 4.07, "grad_norm": 5.207722187042236, "learning_rate": 2.9978104799070574e-06, "loss": 0.0604, "step": 413450 }, { "epoch": 4.07, "grad_norm": 9.130894660949707, "learning_rate": 2.9976863574528086e-06, "loss": 0.0854, "step": 413475 }, { "epoch": 4.07, "grad_norm": 6.248180389404297, "learning_rate": 2.9975622349985607e-06, "loss": 0.0693, "step": 413500 }, { "epoch": 4.07, "grad_norm": 13.161128044128418, "learning_rate": 2.997438112544312e-06, "loss": 0.1033, "step": 413525 }, { "epoch": 4.07, "grad_norm": 6.356400012969971, "learning_rate": 2.997313990090063e-06, "loss": 0.0856, "step": 413550 }, { "epoch": 4.07, "grad_norm": 7.965918064117432, "learning_rate": 2.997189867635815e-06, "loss": 0.096, "step": 413575 }, { "epoch": 4.07, "grad_norm": 4.554141521453857, "learning_rate": 2.9970657451815664e-06, "loss": 0.0723, "step": 413600 }, { "epoch": 4.07, "grad_norm": 7.235602378845215, "learning_rate": 2.996941622727318e-06, "loss": 0.088, "step": 413625 }, { "epoch": 4.07, "grad_norm": 5.985235214233398, "learning_rate": 2.9968175002730697e-06, "loss": 0.0483, "step": 413650 }, { "epoch": 4.07, "grad_norm": 13.505568504333496, "learning_rate": 2.9966933778188213e-06, "loss": 0.1071, "step": 413675 }, { "epoch": 4.07, "grad_norm": 2.611621379852295, "learning_rate": 2.9965692553645725e-06, "loss": 0.0555, "step": 413700 }, { "epoch": 4.07, "grad_norm": 12.433173179626465, "learning_rate": 2.9964451329103246e-06, "loss": 0.0962, "step": 413725 }, { "epoch": 4.07, "grad_norm": 2.9246296882629395, "learning_rate": 2.9963210104560758e-06, "loss": 0.0795, "step": 413750 }, { "epoch": 4.07, "grad_norm": 11.24395751953125, "learning_rate": 2.996196888001827e-06, "loss": 0.0887, "step": 413775 }, { "epoch": 4.07, "grad_norm": 7.706377029418945, "learning_rate": 2.996072765547579e-06, "loss": 0.0547, "step": 413800 }, { "epoch": 4.07, "grad_norm": 5.746007919311523, "learning_rate": 2.9959486430933303e-06, "loss": 0.0892, "step": 413825 }, { "epoch": 4.07, "grad_norm": 2.814957618713379, "learning_rate": 2.995824520639082e-06, "loss": 0.056, "step": 413850 }, { "epoch": 4.07, "grad_norm": 11.787484169006348, "learning_rate": 2.9957003981848335e-06, "loss": 0.0982, "step": 413875 }, { "epoch": 4.07, "grad_norm": 1.621734380722046, "learning_rate": 2.995576275730585e-06, "loss": 0.0658, "step": 413900 }, { "epoch": 4.07, "grad_norm": 10.905622482299805, "learning_rate": 2.9954521532763364e-06, "loss": 0.099, "step": 413925 }, { "epoch": 4.07, "grad_norm": 6.82398796081543, "learning_rate": 2.9953280308220884e-06, "loss": 0.0674, "step": 413950 }, { "epoch": 4.07, "grad_norm": 16.405799865722656, "learning_rate": 2.9952039083678396e-06, "loss": 0.1074, "step": 413975 }, { "epoch": 4.07, "grad_norm": 0.6254077553749084, "learning_rate": 2.995079785913591e-06, "loss": 0.0863, "step": 414000 }, { "epoch": 4.07, "grad_norm": 14.832366943359375, "learning_rate": 2.994955663459343e-06, "loss": 0.1048, "step": 414025 }, { "epoch": 4.07, "grad_norm": 0.32086557149887085, "learning_rate": 2.994831541005094e-06, "loss": 0.0695, "step": 414050 }, { "epoch": 4.07, "grad_norm": 10.271613121032715, "learning_rate": 2.9947074185508458e-06, "loss": 0.0963, "step": 414075 }, { "epoch": 4.07, "grad_norm": 1.4021334648132324, "learning_rate": 2.994583296096597e-06, "loss": 0.0871, "step": 414100 }, { "epoch": 4.07, "grad_norm": 11.967057228088379, "learning_rate": 2.994459173642349e-06, "loss": 0.0889, "step": 414125 }, { "epoch": 4.07, "grad_norm": 0.11324442923069, "learning_rate": 2.9943350511881002e-06, "loss": 0.0691, "step": 414150 }, { "epoch": 4.07, "grad_norm": 16.350419998168945, "learning_rate": 2.9942109287338515e-06, "loss": 0.1171, "step": 414175 }, { "epoch": 4.07, "grad_norm": 8.332027435302734, "learning_rate": 2.9940868062796035e-06, "loss": 0.0713, "step": 414200 }, { "epoch": 4.07, "grad_norm": 12.278008460998535, "learning_rate": 2.9939626838253547e-06, "loss": 0.1035, "step": 414225 }, { "epoch": 4.07, "grad_norm": 2.8634345531463623, "learning_rate": 2.9938385613711064e-06, "loss": 0.0919, "step": 414250 }, { "epoch": 4.07, "grad_norm": 8.732605934143066, "learning_rate": 2.993714438916858e-06, "loss": 0.1072, "step": 414275 }, { "epoch": 4.07, "grad_norm": 8.421409606933594, "learning_rate": 2.9935903164626096e-06, "loss": 0.0545, "step": 414300 }, { "epoch": 4.07, "grad_norm": 16.284549713134766, "learning_rate": 2.993466194008361e-06, "loss": 0.1084, "step": 414325 }, { "epoch": 4.07, "grad_norm": 14.241223335266113, "learning_rate": 2.993342071554113e-06, "loss": 0.0989, "step": 414350 }, { "epoch": 4.07, "grad_norm": 12.503983497619629, "learning_rate": 2.993217949099864e-06, "loss": 0.0746, "step": 414375 }, { "epoch": 4.07, "grad_norm": 1.5337363481521606, "learning_rate": 2.9930938266456157e-06, "loss": 0.0667, "step": 414400 }, { "epoch": 4.07, "grad_norm": 8.592606544494629, "learning_rate": 2.9929697041913674e-06, "loss": 0.1173, "step": 414425 }, { "epoch": 4.07, "grad_norm": 9.817343711853027, "learning_rate": 2.992845581737119e-06, "loss": 0.091, "step": 414450 }, { "epoch": 4.08, "grad_norm": 17.27547836303711, "learning_rate": 2.9927214592828702e-06, "loss": 0.1169, "step": 414475 }, { "epoch": 4.08, "grad_norm": 14.510972023010254, "learning_rate": 2.9925973368286223e-06, "loss": 0.0772, "step": 414500 }, { "epoch": 4.08, "grad_norm": 14.172741889953613, "learning_rate": 2.9924732143743735e-06, "loss": 0.0918, "step": 414525 }, { "epoch": 4.08, "grad_norm": 9.034290313720703, "learning_rate": 2.9923490919201247e-06, "loss": 0.0808, "step": 414550 }, { "epoch": 4.08, "grad_norm": 11.478067398071289, "learning_rate": 2.9922249694658768e-06, "loss": 0.1157, "step": 414575 }, { "epoch": 4.08, "grad_norm": 2.034183979034424, "learning_rate": 2.992100847011628e-06, "loss": 0.0583, "step": 414600 }, { "epoch": 4.08, "grad_norm": 9.430399894714355, "learning_rate": 2.9919767245573796e-06, "loss": 0.0778, "step": 414625 }, { "epoch": 4.08, "grad_norm": 5.7110595703125, "learning_rate": 2.9918526021031312e-06, "loss": 0.0648, "step": 414650 }, { "epoch": 4.08, "grad_norm": 12.964475631713867, "learning_rate": 2.991728479648883e-06, "loss": 0.0775, "step": 414675 }, { "epoch": 4.08, "grad_norm": 3.5971016883850098, "learning_rate": 2.991604357194634e-06, "loss": 0.0581, "step": 414700 }, { "epoch": 4.08, "grad_norm": 13.910923957824707, "learning_rate": 2.991480234740386e-06, "loss": 0.089, "step": 414725 }, { "epoch": 4.08, "grad_norm": 3.346231460571289, "learning_rate": 2.9913561122861374e-06, "loss": 0.0827, "step": 414750 }, { "epoch": 4.08, "grad_norm": 22.051536560058594, "learning_rate": 2.9912319898318886e-06, "loss": 0.1014, "step": 414775 }, { "epoch": 4.08, "grad_norm": 11.780518531799316, "learning_rate": 2.9911078673776406e-06, "loss": 0.0757, "step": 414800 }, { "epoch": 4.08, "grad_norm": 13.501142501831055, "learning_rate": 2.990983744923392e-06, "loss": 0.1053, "step": 414825 }, { "epoch": 4.08, "grad_norm": 1.9103055000305176, "learning_rate": 2.9908596224691435e-06, "loss": 0.0802, "step": 414850 }, { "epoch": 4.08, "grad_norm": 18.092859268188477, "learning_rate": 2.990735500014895e-06, "loss": 0.1054, "step": 414875 }, { "epoch": 4.08, "grad_norm": 1.4919966459274292, "learning_rate": 2.9906113775606467e-06, "loss": 0.0931, "step": 414900 }, { "epoch": 4.08, "grad_norm": 21.155668258666992, "learning_rate": 2.990487255106398e-06, "loss": 0.101, "step": 414925 }, { "epoch": 4.08, "grad_norm": 4.972846031188965, "learning_rate": 2.990363132652149e-06, "loss": 0.0855, "step": 414950 }, { "epoch": 4.08, "grad_norm": 7.771456718444824, "learning_rate": 2.9902390101979012e-06, "loss": 0.102, "step": 414975 }, { "epoch": 4.08, "grad_norm": 6.688016891479492, "learning_rate": 2.9901148877436524e-06, "loss": 0.0695, "step": 415000 }, { "epoch": 4.08, "grad_norm": 11.501906394958496, "learning_rate": 2.989990765289404e-06, "loss": 0.131, "step": 415025 }, { "epoch": 4.08, "grad_norm": 0.9680315256118774, "learning_rate": 2.9898666428351557e-06, "loss": 0.0762, "step": 415050 }, { "epoch": 4.08, "grad_norm": 8.521467208862305, "learning_rate": 2.9897425203809073e-06, "loss": 0.0985, "step": 415075 }, { "epoch": 4.08, "grad_norm": 0.6095636487007141, "learning_rate": 2.9896183979266586e-06, "loss": 0.0873, "step": 415100 }, { "epoch": 4.08, "grad_norm": 3.2469239234924316, "learning_rate": 2.9894942754724106e-06, "loss": 0.1071, "step": 415125 }, { "epoch": 4.08, "grad_norm": 6.4860334396362305, "learning_rate": 2.9893751179163316e-06, "loss": 0.0908, "step": 415150 }, { "epoch": 4.08, "grad_norm": 15.087000846862793, "learning_rate": 2.9892509954620837e-06, "loss": 0.124, "step": 415175 }, { "epoch": 4.08, "grad_norm": 8.195572853088379, "learning_rate": 2.989126873007835e-06, "loss": 0.0795, "step": 415200 }, { "epoch": 4.08, "grad_norm": 10.403007507324219, "learning_rate": 2.989002750553586e-06, "loss": 0.0888, "step": 415225 }, { "epoch": 4.08, "grad_norm": 0.3725709915161133, "learning_rate": 2.988878628099338e-06, "loss": 0.078, "step": 415250 }, { "epoch": 4.08, "grad_norm": 6.519998550415039, "learning_rate": 2.9887545056450894e-06, "loss": 0.0816, "step": 415275 }, { "epoch": 4.08, "grad_norm": 1.5852800607681274, "learning_rate": 2.988630383190841e-06, "loss": 0.0635, "step": 415300 }, { "epoch": 4.08, "grad_norm": 10.341733932495117, "learning_rate": 2.9885062607365927e-06, "loss": 0.098, "step": 415325 }, { "epoch": 4.08, "grad_norm": 2.2236011028289795, "learning_rate": 2.9883821382823443e-06, "loss": 0.0691, "step": 415350 }, { "epoch": 4.08, "grad_norm": 10.083695411682129, "learning_rate": 2.9882580158280955e-06, "loss": 0.1038, "step": 415375 }, { "epoch": 4.08, "grad_norm": 0.5840553045272827, "learning_rate": 2.9881338933738476e-06, "loss": 0.0758, "step": 415400 }, { "epoch": 4.08, "grad_norm": 11.95617961883545, "learning_rate": 2.9880097709195988e-06, "loss": 0.1037, "step": 415425 }, { "epoch": 4.08, "grad_norm": 3.7684078216552734, "learning_rate": 2.98788564846535e-06, "loss": 0.0726, "step": 415450 }, { "epoch": 4.09, "grad_norm": 18.388071060180664, "learning_rate": 2.987761526011102e-06, "loss": 0.084, "step": 415475 }, { "epoch": 4.09, "grad_norm": 1.4248818159103394, "learning_rate": 2.9876374035568533e-06, "loss": 0.076, "step": 415500 }, { "epoch": 4.09, "grad_norm": 20.5856876373291, "learning_rate": 2.987513281102605e-06, "loss": 0.1362, "step": 415525 }, { "epoch": 4.09, "grad_norm": 4.598743915557861, "learning_rate": 2.987389158648356e-06, "loss": 0.1036, "step": 415550 }, { "epoch": 4.09, "grad_norm": 12.892382621765137, "learning_rate": 2.987265036194108e-06, "loss": 0.0962, "step": 415575 }, { "epoch": 4.09, "grad_norm": 1.071838617324829, "learning_rate": 2.9871409137398594e-06, "loss": 0.075, "step": 415600 }, { "epoch": 4.09, "grad_norm": 22.231143951416016, "learning_rate": 2.9870167912856106e-06, "loss": 0.0978, "step": 415625 }, { "epoch": 4.09, "grad_norm": 4.127457141876221, "learning_rate": 2.9868926688313626e-06, "loss": 0.0589, "step": 415650 }, { "epoch": 4.09, "grad_norm": 14.379109382629395, "learning_rate": 2.986768546377114e-06, "loss": 0.1336, "step": 415675 }, { "epoch": 4.09, "grad_norm": 2.2664096355438232, "learning_rate": 2.9866444239228655e-06, "loss": 0.0512, "step": 415700 }, { "epoch": 4.09, "grad_norm": 18.87982940673828, "learning_rate": 2.986520301468617e-06, "loss": 0.1006, "step": 415725 }, { "epoch": 4.09, "grad_norm": 1.8950709104537964, "learning_rate": 2.9863961790143688e-06, "loss": 0.0898, "step": 415750 }, { "epoch": 4.09, "grad_norm": 7.926838397979736, "learning_rate": 2.98627205656012e-06, "loss": 0.0979, "step": 415775 }, { "epoch": 4.09, "grad_norm": 1.5651637315750122, "learning_rate": 2.986147934105872e-06, "loss": 0.0728, "step": 415800 }, { "epoch": 4.09, "grad_norm": 20.140426635742188, "learning_rate": 2.9860238116516232e-06, "loss": 0.1132, "step": 415825 }, { "epoch": 4.09, "grad_norm": 7.30863094329834, "learning_rate": 2.9858996891973745e-06, "loss": 0.0585, "step": 415850 }, { "epoch": 4.09, "grad_norm": 11.667551040649414, "learning_rate": 2.9857755667431265e-06, "loss": 0.0968, "step": 415875 }, { "epoch": 4.09, "grad_norm": 4.11326789855957, "learning_rate": 2.9856514442888777e-06, "loss": 0.0662, "step": 415900 }, { "epoch": 4.09, "grad_norm": 13.748859405517578, "learning_rate": 2.9855273218346294e-06, "loss": 0.1027, "step": 415925 }, { "epoch": 4.09, "grad_norm": 1.6187728643417358, "learning_rate": 2.985403199380381e-06, "loss": 0.0717, "step": 415950 }, { "epoch": 4.09, "grad_norm": 11.571026802062988, "learning_rate": 2.9852790769261326e-06, "loss": 0.1012, "step": 415975 }, { "epoch": 4.09, "grad_norm": 3.4093098640441895, "learning_rate": 2.985154954471884e-06, "loss": 0.0669, "step": 416000 }, { "epoch": 4.09, "grad_norm": 7.420500755310059, "learning_rate": 2.985030832017636e-06, "loss": 0.11, "step": 416025 }, { "epoch": 4.09, "grad_norm": 0.07444903254508972, "learning_rate": 2.984906709563387e-06, "loss": 0.0679, "step": 416050 }, { "epoch": 4.09, "grad_norm": 16.84486961364746, "learning_rate": 2.9847825871091383e-06, "loss": 0.1484, "step": 416075 }, { "epoch": 4.09, "grad_norm": 7.833401679992676, "learning_rate": 2.9846584646548904e-06, "loss": 0.0973, "step": 416100 }, { "epoch": 4.09, "grad_norm": 0.5271584391593933, "learning_rate": 2.9845343422006416e-06, "loss": 0.1268, "step": 416125 }, { "epoch": 4.09, "grad_norm": 5.198213577270508, "learning_rate": 2.9844102197463932e-06, "loss": 0.0645, "step": 416150 }, { "epoch": 4.09, "grad_norm": 7.3841233253479, "learning_rate": 2.984286097292145e-06, "loss": 0.0994, "step": 416175 }, { "epoch": 4.09, "grad_norm": 0.9351028203964233, "learning_rate": 2.9841619748378965e-06, "loss": 0.0652, "step": 416200 }, { "epoch": 4.09, "grad_norm": 6.978800296783447, "learning_rate": 2.9840378523836477e-06, "loss": 0.0755, "step": 416225 }, { "epoch": 4.09, "grad_norm": 1.8643187284469604, "learning_rate": 2.9839137299293998e-06, "loss": 0.0724, "step": 416250 }, { "epoch": 4.09, "grad_norm": 21.492395401000977, "learning_rate": 2.983789607475151e-06, "loss": 0.1069, "step": 416275 }, { "epoch": 4.09, "grad_norm": 1.1023355722427368, "learning_rate": 2.983665485020902e-06, "loss": 0.0608, "step": 416300 }, { "epoch": 4.09, "grad_norm": 9.52021598815918, "learning_rate": 2.9835413625666543e-06, "loss": 0.1081, "step": 416325 }, { "epoch": 4.09, "grad_norm": 4.953060626983643, "learning_rate": 2.9834172401124055e-06, "loss": 0.0807, "step": 416350 }, { "epoch": 4.09, "grad_norm": 12.64793872833252, "learning_rate": 2.983293117658157e-06, "loss": 0.1044, "step": 416375 }, { "epoch": 4.09, "grad_norm": 1.8982152938842773, "learning_rate": 2.9831689952039083e-06, "loss": 0.0734, "step": 416400 }, { "epoch": 4.09, "grad_norm": 7.154331207275391, "learning_rate": 2.9830448727496604e-06, "loss": 0.1178, "step": 416425 }, { "epoch": 4.09, "grad_norm": 10.61752700805664, "learning_rate": 2.9829207502954116e-06, "loss": 0.0943, "step": 416450 }, { "epoch": 4.09, "grad_norm": 16.292478561401367, "learning_rate": 2.982796627841163e-06, "loss": 0.1128, "step": 416475 }, { "epoch": 4.1, "grad_norm": 7.391119003295898, "learning_rate": 2.982672505386915e-06, "loss": 0.0738, "step": 416500 }, { "epoch": 4.1, "grad_norm": 16.94753646850586, "learning_rate": 2.982548382932666e-06, "loss": 0.1403, "step": 416525 }, { "epoch": 4.1, "grad_norm": 2.130924940109253, "learning_rate": 2.9824242604784177e-06, "loss": 0.0681, "step": 416550 }, { "epoch": 4.1, "grad_norm": 16.360706329345703, "learning_rate": 2.9823001380241693e-06, "loss": 0.1144, "step": 416575 }, { "epoch": 4.1, "grad_norm": 5.479894638061523, "learning_rate": 2.982176015569921e-06, "loss": 0.0727, "step": 416600 }, { "epoch": 4.1, "grad_norm": 10.961891174316406, "learning_rate": 2.982051893115672e-06, "loss": 0.0887, "step": 416625 }, { "epoch": 4.1, "grad_norm": 1.1768585443496704, "learning_rate": 2.9819277706614242e-06, "loss": 0.0584, "step": 416650 }, { "epoch": 4.1, "grad_norm": 13.325740814208984, "learning_rate": 2.9818036482071754e-06, "loss": 0.0893, "step": 416675 }, { "epoch": 4.1, "grad_norm": 0.4182198643684387, "learning_rate": 2.9816795257529267e-06, "loss": 0.0565, "step": 416700 }, { "epoch": 4.1, "grad_norm": 16.671283721923828, "learning_rate": 2.9815554032986787e-06, "loss": 0.1218, "step": 416725 }, { "epoch": 4.1, "grad_norm": 3.5231573581695557, "learning_rate": 2.98143128084443e-06, "loss": 0.0853, "step": 416750 }, { "epoch": 4.1, "grad_norm": 11.702616691589355, "learning_rate": 2.9813071583901816e-06, "loss": 0.1092, "step": 416775 }, { "epoch": 4.1, "grad_norm": 2.5111961364746094, "learning_rate": 2.981183035935933e-06, "loss": 0.0809, "step": 416800 }, { "epoch": 4.1, "grad_norm": 14.197504043579102, "learning_rate": 2.981058913481685e-06, "loss": 0.0871, "step": 416825 }, { "epoch": 4.1, "grad_norm": 0.2103942185640335, "learning_rate": 2.980934791027436e-06, "loss": 0.0811, "step": 416850 }, { "epoch": 4.1, "grad_norm": 4.858949661254883, "learning_rate": 2.980810668573188e-06, "loss": 0.1003, "step": 416875 }, { "epoch": 4.1, "grad_norm": 3.1071877479553223, "learning_rate": 2.9806865461189393e-06, "loss": 0.055, "step": 416900 }, { "epoch": 4.1, "grad_norm": 11.216129302978516, "learning_rate": 2.9805624236646905e-06, "loss": 0.125, "step": 416925 }, { "epoch": 4.1, "grad_norm": 7.066404819488525, "learning_rate": 2.9804383012104426e-06, "loss": 0.0652, "step": 416950 }, { "epoch": 4.1, "grad_norm": 14.383929252624512, "learning_rate": 2.980314178756194e-06, "loss": 0.0907, "step": 416975 }, { "epoch": 4.1, "grad_norm": 5.470071792602539, "learning_rate": 2.9801900563019454e-06, "loss": 0.0672, "step": 417000 }, { "epoch": 4.1, "grad_norm": 22.213825225830078, "learning_rate": 2.980065933847697e-06, "loss": 0.086, "step": 417025 }, { "epoch": 4.1, "grad_norm": 2.526029586791992, "learning_rate": 2.9799418113934487e-06, "loss": 0.0759, "step": 417050 }, { "epoch": 4.1, "grad_norm": 22.164796829223633, "learning_rate": 2.9798176889392e-06, "loss": 0.1284, "step": 417075 }, { "epoch": 4.1, "grad_norm": 5.511498928070068, "learning_rate": 2.979693566484952e-06, "loss": 0.0647, "step": 417100 }, { "epoch": 4.1, "grad_norm": 12.130193710327148, "learning_rate": 2.979569444030703e-06, "loss": 0.1363, "step": 417125 }, { "epoch": 4.1, "grad_norm": 15.812653541564941, "learning_rate": 2.979445321576455e-06, "loss": 0.0867, "step": 417150 }, { "epoch": 4.1, "grad_norm": 4.632350444793701, "learning_rate": 2.9793211991222065e-06, "loss": 0.0997, "step": 417175 }, { "epoch": 4.1, "grad_norm": 3.3424158096313477, "learning_rate": 2.9791970766679577e-06, "loss": 0.0604, "step": 417200 }, { "epoch": 4.1, "grad_norm": 14.200858116149902, "learning_rate": 2.9790729542137093e-06, "loss": 0.0773, "step": 417225 }, { "epoch": 4.1, "grad_norm": 4.724984169006348, "learning_rate": 2.9789537966576308e-06, "loss": 0.0684, "step": 417250 }, { "epoch": 4.1, "grad_norm": 13.855085372924805, "learning_rate": 2.9788296742033824e-06, "loss": 0.1059, "step": 417275 }, { "epoch": 4.1, "grad_norm": 5.676109313964844, "learning_rate": 2.9787055517491336e-06, "loss": 0.0781, "step": 417300 }, { "epoch": 4.1, "grad_norm": 16.282665252685547, "learning_rate": 2.9785814292948857e-06, "loss": 0.1165, "step": 417325 }, { "epoch": 4.1, "grad_norm": 2.120438575744629, "learning_rate": 2.978457306840637e-06, "loss": 0.0598, "step": 417350 }, { "epoch": 4.1, "grad_norm": 6.909616470336914, "learning_rate": 2.9783331843863885e-06, "loss": 0.1047, "step": 417375 }, { "epoch": 4.1, "grad_norm": 1.835578441619873, "learning_rate": 2.97820906193214e-06, "loss": 0.0693, "step": 417400 }, { "epoch": 4.1, "grad_norm": 14.13172435760498, "learning_rate": 2.9780849394778918e-06, "loss": 0.0977, "step": 417425 }, { "epoch": 4.1, "grad_norm": 3.8619720935821533, "learning_rate": 2.977960817023643e-06, "loss": 0.0873, "step": 417450 }, { "epoch": 4.1, "grad_norm": 19.47747039794922, "learning_rate": 2.977836694569395e-06, "loss": 0.0863, "step": 417475 }, { "epoch": 4.1, "grad_norm": 0.09167081117630005, "learning_rate": 2.9777125721151463e-06, "loss": 0.0434, "step": 417500 }, { "epoch": 4.11, "grad_norm": 12.121578216552734, "learning_rate": 2.9775884496608975e-06, "loss": 0.1171, "step": 417525 }, { "epoch": 4.11, "grad_norm": 8.843315124511719, "learning_rate": 2.9774643272066495e-06, "loss": 0.0683, "step": 417550 }, { "epoch": 4.11, "grad_norm": 9.596783638000488, "learning_rate": 2.9773402047524007e-06, "loss": 0.1031, "step": 417575 }, { "epoch": 4.11, "grad_norm": 5.345171928405762, "learning_rate": 2.9772160822981524e-06, "loss": 0.0584, "step": 417600 }, { "epoch": 4.11, "grad_norm": 16.341638565063477, "learning_rate": 2.977091959843904e-06, "loss": 0.1454, "step": 417625 }, { "epoch": 4.11, "grad_norm": 1.7561287879943848, "learning_rate": 2.9769678373896556e-06, "loss": 0.0915, "step": 417650 }, { "epoch": 4.11, "grad_norm": 14.957435607910156, "learning_rate": 2.976843714935407e-06, "loss": 0.0882, "step": 417675 }, { "epoch": 4.11, "grad_norm": 0.5096042156219482, "learning_rate": 2.976719592481159e-06, "loss": 0.0888, "step": 417700 }, { "epoch": 4.11, "grad_norm": 12.506336212158203, "learning_rate": 2.97659547002691e-06, "loss": 0.0942, "step": 417725 }, { "epoch": 4.11, "grad_norm": 7.7204179763793945, "learning_rate": 2.9764713475726613e-06, "loss": 0.0552, "step": 417750 }, { "epoch": 4.11, "grad_norm": 14.919122695922852, "learning_rate": 2.9763472251184134e-06, "loss": 0.0936, "step": 417775 }, { "epoch": 4.11, "grad_norm": 5.121089458465576, "learning_rate": 2.9762231026641646e-06, "loss": 0.0703, "step": 417800 }, { "epoch": 4.11, "grad_norm": 18.047239303588867, "learning_rate": 2.9760989802099162e-06, "loss": 0.125, "step": 417825 }, { "epoch": 4.11, "grad_norm": 3.1689014434814453, "learning_rate": 2.9759748577556674e-06, "loss": 0.0681, "step": 417850 }, { "epoch": 4.11, "grad_norm": 11.503960609436035, "learning_rate": 2.9758507353014195e-06, "loss": 0.0981, "step": 417875 }, { "epoch": 4.11, "grad_norm": 0.5463144183158875, "learning_rate": 2.9757266128471707e-06, "loss": 0.0685, "step": 417900 }, { "epoch": 4.11, "grad_norm": 14.706599235534668, "learning_rate": 2.975602490392922e-06, "loss": 0.1238, "step": 417925 }, { "epoch": 4.11, "grad_norm": 0.6208307147026062, "learning_rate": 2.975478367938674e-06, "loss": 0.0639, "step": 417950 }, { "epoch": 4.11, "grad_norm": 13.576848030090332, "learning_rate": 2.975354245484425e-06, "loss": 0.0956, "step": 417975 }, { "epoch": 4.11, "grad_norm": 4.5375494956970215, "learning_rate": 2.975230123030177e-06, "loss": 0.0716, "step": 418000 }, { "epoch": 4.11, "grad_norm": 6.904353141784668, "learning_rate": 2.9751060005759285e-06, "loss": 0.1081, "step": 418025 }, { "epoch": 4.11, "grad_norm": 17.913442611694336, "learning_rate": 2.97498187812168e-06, "loss": 0.066, "step": 418050 }, { "epoch": 4.11, "grad_norm": 13.841324806213379, "learning_rate": 2.9748577556674313e-06, "loss": 0.1034, "step": 418075 }, { "epoch": 4.11, "grad_norm": 0.03042149543762207, "learning_rate": 2.9747336332131834e-06, "loss": 0.0824, "step": 418100 }, { "epoch": 4.11, "grad_norm": 10.134845733642578, "learning_rate": 2.9746095107589346e-06, "loss": 0.1341, "step": 418125 }, { "epoch": 4.11, "grad_norm": 2.3920540809631348, "learning_rate": 2.974485388304686e-06, "loss": 0.0604, "step": 418150 }, { "epoch": 4.11, "grad_norm": 20.836294174194336, "learning_rate": 2.974361265850438e-06, "loss": 0.0779, "step": 418175 }, { "epoch": 4.11, "grad_norm": 7.831419467926025, "learning_rate": 2.974237143396189e-06, "loss": 0.0578, "step": 418200 }, { "epoch": 4.11, "grad_norm": 18.343914031982422, "learning_rate": 2.9741130209419407e-06, "loss": 0.1322, "step": 418225 }, { "epoch": 4.11, "grad_norm": 1.6575043201446533, "learning_rate": 2.9739888984876923e-06, "loss": 0.0883, "step": 418250 }, { "epoch": 4.11, "grad_norm": 12.980866432189941, "learning_rate": 2.973864776033444e-06, "loss": 0.0795, "step": 418275 }, { "epoch": 4.11, "grad_norm": 2.960991859436035, "learning_rate": 2.973740653579195e-06, "loss": 0.0859, "step": 418300 }, { "epoch": 4.11, "grad_norm": 14.340424537658691, "learning_rate": 2.9736165311249472e-06, "loss": 0.1429, "step": 418325 }, { "epoch": 4.11, "grad_norm": 2.7849292755126953, "learning_rate": 2.9734924086706985e-06, "loss": 0.0719, "step": 418350 }, { "epoch": 4.11, "grad_norm": 10.822123527526855, "learning_rate": 2.9733682862164497e-06, "loss": 0.1203, "step": 418375 }, { "epoch": 4.11, "grad_norm": 3.8893253803253174, "learning_rate": 2.9732441637622017e-06, "loss": 0.0573, "step": 418400 }, { "epoch": 4.11, "grad_norm": 8.41240119934082, "learning_rate": 2.973120041307953e-06, "loss": 0.113, "step": 418425 }, { "epoch": 4.11, "grad_norm": 8.40343952178955, "learning_rate": 2.9729959188537046e-06, "loss": 0.0731, "step": 418450 }, { "epoch": 4.11, "grad_norm": 7.102429389953613, "learning_rate": 2.972871796399456e-06, "loss": 0.0814, "step": 418475 }, { "epoch": 4.11, "grad_norm": 1.6420648097991943, "learning_rate": 2.972747673945208e-06, "loss": 0.072, "step": 418500 }, { "epoch": 4.12, "grad_norm": 11.753239631652832, "learning_rate": 2.972623551490959e-06, "loss": 0.0919, "step": 418525 }, { "epoch": 4.12, "grad_norm": 2.592268466949463, "learning_rate": 2.972499429036711e-06, "loss": 0.0661, "step": 418550 }, { "epoch": 4.12, "grad_norm": 10.061057090759277, "learning_rate": 2.9723753065824623e-06, "loss": 0.1122, "step": 418575 }, { "epoch": 4.12, "grad_norm": 4.5624589920043945, "learning_rate": 2.9722511841282135e-06, "loss": 0.0629, "step": 418600 }, { "epoch": 4.12, "grad_norm": 21.61624526977539, "learning_rate": 2.9721270616739656e-06, "loss": 0.1037, "step": 418625 }, { "epoch": 4.12, "grad_norm": 3.1741905212402344, "learning_rate": 2.972002939219717e-06, "loss": 0.0818, "step": 418650 }, { "epoch": 4.12, "grad_norm": 10.174073219299316, "learning_rate": 2.9718788167654684e-06, "loss": 0.1314, "step": 418675 }, { "epoch": 4.12, "grad_norm": 6.85518741607666, "learning_rate": 2.9717546943112197e-06, "loss": 0.103, "step": 418700 }, { "epoch": 4.12, "grad_norm": 8.691516876220703, "learning_rate": 2.9716305718569717e-06, "loss": 0.1313, "step": 418725 }, { "epoch": 4.12, "grad_norm": 9.217406272888184, "learning_rate": 2.971506449402723e-06, "loss": 0.0746, "step": 418750 }, { "epoch": 4.12, "grad_norm": 16.417695999145508, "learning_rate": 2.971382326948474e-06, "loss": 0.0884, "step": 418775 }, { "epoch": 4.12, "grad_norm": 1.3080977201461792, "learning_rate": 2.971258204494226e-06, "loss": 0.0629, "step": 418800 }, { "epoch": 4.12, "grad_norm": 16.468339920043945, "learning_rate": 2.9711340820399774e-06, "loss": 0.0785, "step": 418825 }, { "epoch": 4.12, "grad_norm": 3.027993679046631, "learning_rate": 2.971009959585729e-06, "loss": 0.0784, "step": 418850 }, { "epoch": 4.12, "grad_norm": 17.39750099182129, "learning_rate": 2.9708858371314807e-06, "loss": 0.1005, "step": 418875 }, { "epoch": 4.12, "grad_norm": 3.330557107925415, "learning_rate": 2.9707617146772323e-06, "loss": 0.0737, "step": 418900 }, { "epoch": 4.12, "grad_norm": 21.156200408935547, "learning_rate": 2.9706375922229835e-06, "loss": 0.1171, "step": 418925 }, { "epoch": 4.12, "grad_norm": 54.473899841308594, "learning_rate": 2.9705134697687356e-06, "loss": 0.0979, "step": 418950 }, { "epoch": 4.12, "grad_norm": 12.587979316711426, "learning_rate": 2.9703893473144868e-06, "loss": 0.1098, "step": 418975 }, { "epoch": 4.12, "grad_norm": 3.2853047847747803, "learning_rate": 2.970265224860238e-06, "loss": 0.0725, "step": 419000 }, { "epoch": 4.12, "grad_norm": 15.204360008239746, "learning_rate": 2.97014110240599e-06, "loss": 0.1286, "step": 419025 }, { "epoch": 4.12, "grad_norm": 8.722397804260254, "learning_rate": 2.9700169799517413e-06, "loss": 0.0795, "step": 419050 }, { "epoch": 4.12, "grad_norm": 15.737765312194824, "learning_rate": 2.969892857497493e-06, "loss": 0.0957, "step": 419075 }, { "epoch": 4.12, "grad_norm": 5.905160903930664, "learning_rate": 2.9697687350432445e-06, "loss": 0.0677, "step": 419100 }, { "epoch": 4.12, "grad_norm": 9.680331230163574, "learning_rate": 2.969644612588996e-06, "loss": 0.0714, "step": 419125 }, { "epoch": 4.12, "grad_norm": 4.508127689361572, "learning_rate": 2.9695204901347474e-06, "loss": 0.0846, "step": 419150 }, { "epoch": 4.12, "grad_norm": 19.60980987548828, "learning_rate": 2.9693963676804994e-06, "loss": 0.1301, "step": 419175 }, { "epoch": 4.12, "grad_norm": 2.7123730182647705, "learning_rate": 2.9692722452262507e-06, "loss": 0.0696, "step": 419200 }, { "epoch": 4.12, "grad_norm": 19.092472076416016, "learning_rate": 2.969148122772002e-06, "loss": 0.0988, "step": 419225 }, { "epoch": 4.12, "grad_norm": 2.020843029022217, "learning_rate": 2.969024000317754e-06, "loss": 0.0737, "step": 419250 }, { "epoch": 4.12, "grad_norm": 7.567830562591553, "learning_rate": 2.968899877863505e-06, "loss": 0.0955, "step": 419275 }, { "epoch": 4.12, "grad_norm": 2.106279134750366, "learning_rate": 2.9687757554092568e-06, "loss": 0.0667, "step": 419300 }, { "epoch": 4.12, "grad_norm": 8.61164665222168, "learning_rate": 2.9686516329550084e-06, "loss": 0.0957, "step": 419325 }, { "epoch": 4.12, "grad_norm": 6.963396072387695, "learning_rate": 2.96852751050076e-06, "loss": 0.0652, "step": 419350 }, { "epoch": 4.12, "grad_norm": 12.404500007629395, "learning_rate": 2.9684033880465113e-06, "loss": 0.0925, "step": 419375 }, { "epoch": 4.12, "grad_norm": 3.6062073707580566, "learning_rate": 2.9682792655922633e-06, "loss": 0.0742, "step": 419400 }, { "epoch": 4.12, "grad_norm": 29.926267623901367, "learning_rate": 2.9681551431380145e-06, "loss": 0.1324, "step": 419425 }, { "epoch": 4.12, "grad_norm": 9.544278144836426, "learning_rate": 2.9680310206837657e-06, "loss": 0.0663, "step": 419450 }, { "epoch": 4.12, "grad_norm": 14.227862358093262, "learning_rate": 2.967906898229518e-06, "loss": 0.1039, "step": 419475 }, { "epoch": 4.12, "grad_norm": 8.771843910217285, "learning_rate": 2.967782775775269e-06, "loss": 0.0674, "step": 419500 }, { "epoch": 4.12, "grad_norm": 14.878937721252441, "learning_rate": 2.9676586533210206e-06, "loss": 0.13, "step": 419525 }, { "epoch": 4.13, "grad_norm": 0.1855698674917221, "learning_rate": 2.967539495764942e-06, "loss": 0.0925, "step": 419550 }, { "epoch": 4.13, "grad_norm": 10.169781684875488, "learning_rate": 2.9674153733106937e-06, "loss": 0.104, "step": 419575 }, { "epoch": 4.13, "grad_norm": 7.8799662590026855, "learning_rate": 2.967291250856445e-06, "loss": 0.0672, "step": 419600 }, { "epoch": 4.13, "grad_norm": 3.7241125106811523, "learning_rate": 2.967167128402197e-06, "loss": 0.125, "step": 419625 }, { "epoch": 4.13, "grad_norm": 12.561944961547852, "learning_rate": 2.967043005947948e-06, "loss": 0.0832, "step": 419650 }, { "epoch": 4.13, "grad_norm": 8.632590293884277, "learning_rate": 2.9669188834936994e-06, "loss": 0.1087, "step": 419675 }, { "epoch": 4.13, "grad_norm": 3.5522916316986084, "learning_rate": 2.9667947610394515e-06, "loss": 0.0456, "step": 419700 }, { "epoch": 4.13, "grad_norm": 10.075492858886719, "learning_rate": 2.9666706385852027e-06, "loss": 0.1122, "step": 419725 }, { "epoch": 4.13, "grad_norm": 3.5502259731292725, "learning_rate": 2.9665465161309543e-06, "loss": 0.0727, "step": 419750 }, { "epoch": 4.13, "grad_norm": 10.410553932189941, "learning_rate": 2.966422393676706e-06, "loss": 0.0996, "step": 419775 }, { "epoch": 4.13, "grad_norm": 4.47355318069458, "learning_rate": 2.9662982712224576e-06, "loss": 0.0481, "step": 419800 }, { "epoch": 4.13, "grad_norm": 12.736584663391113, "learning_rate": 2.966174148768209e-06, "loss": 0.1312, "step": 419825 }, { "epoch": 4.13, "grad_norm": 16.120271682739258, "learning_rate": 2.966050026313961e-06, "loss": 0.0695, "step": 419850 }, { "epoch": 4.13, "grad_norm": 11.321313858032227, "learning_rate": 2.965925903859712e-06, "loss": 0.0818, "step": 419875 }, { "epoch": 4.13, "grad_norm": 12.456345558166504, "learning_rate": 2.9658017814054633e-06, "loss": 0.0689, "step": 419900 }, { "epoch": 4.13, "grad_norm": 6.610472679138184, "learning_rate": 2.9656776589512153e-06, "loss": 0.1117, "step": 419925 }, { "epoch": 4.13, "grad_norm": 1.5696423053741455, "learning_rate": 2.9655535364969666e-06, "loss": 0.0741, "step": 419950 }, { "epoch": 4.13, "grad_norm": 10.015864372253418, "learning_rate": 2.965429414042718e-06, "loss": 0.0931, "step": 419975 }, { "epoch": 4.13, "grad_norm": 1.7055524587631226, "learning_rate": 2.96530529158847e-06, "loss": 0.0665, "step": 420000 }, { "epoch": 4.13, "eval_loss": 0.7467451095581055, "eval_runtime": 6118.1725, "eval_samples_per_second": 1.547, "eval_steps_per_second": 0.194, "eval_wer": 0.11707332775381446, "step": 420000 }, { "epoch": 4.13, "grad_norm": 11.027060508728027, "learning_rate": 2.9651811691342215e-06, "loss": 0.1156, "step": 420025 }, { "epoch": 4.13, "grad_norm": 5.342920303344727, "learning_rate": 2.9650570466799727e-06, "loss": 0.0578, "step": 420050 }, { "epoch": 4.13, "grad_norm": 13.507452964782715, "learning_rate": 2.9649329242257243e-06, "loss": 0.108, "step": 420075 }, { "epoch": 4.13, "grad_norm": 3.4801223278045654, "learning_rate": 2.964808801771476e-06, "loss": 0.0734, "step": 420100 }, { "epoch": 4.13, "grad_norm": 13.394919395446777, "learning_rate": 2.964684679317227e-06, "loss": 0.0953, "step": 420125 }, { "epoch": 4.13, "grad_norm": 1.287894368171692, "learning_rate": 2.964560556862979e-06, "loss": 0.0582, "step": 420150 }, { "epoch": 4.13, "grad_norm": 19.10788917541504, "learning_rate": 2.9644364344087304e-06, "loss": 0.1035, "step": 420175 }, { "epoch": 4.13, "grad_norm": 4.297049045562744, "learning_rate": 2.964312311954482e-06, "loss": 0.0556, "step": 420200 }, { "epoch": 4.13, "grad_norm": 13.35940170288086, "learning_rate": 2.9641881895002333e-06, "loss": 0.1387, "step": 420225 }, { "epoch": 4.13, "grad_norm": 0.7047127485275269, "learning_rate": 2.9640640670459853e-06, "loss": 0.0742, "step": 420250 }, { "epoch": 4.13, "grad_norm": 11.803131103515625, "learning_rate": 2.9639399445917365e-06, "loss": 0.0846, "step": 420275 }, { "epoch": 4.13, "grad_norm": 4.9653449058532715, "learning_rate": 2.963815822137488e-06, "loss": 0.0805, "step": 420300 }, { "epoch": 4.13, "grad_norm": 18.220060348510742, "learning_rate": 2.96369169968324e-06, "loss": 0.125, "step": 420325 }, { "epoch": 4.13, "grad_norm": 4.202369689941406, "learning_rate": 2.9635675772289914e-06, "loss": 0.0682, "step": 420350 }, { "epoch": 4.13, "grad_norm": 6.209647178649902, "learning_rate": 2.9634434547747427e-06, "loss": 0.0931, "step": 420375 }, { "epoch": 4.13, "grad_norm": 3.2625365257263184, "learning_rate": 2.9633193323204947e-06, "loss": 0.0679, "step": 420400 }, { "epoch": 4.13, "grad_norm": 13.683070182800293, "learning_rate": 2.963195209866246e-06, "loss": 0.0828, "step": 420425 }, { "epoch": 4.13, "grad_norm": 1.2643746137619019, "learning_rate": 2.963071087411997e-06, "loss": 0.0813, "step": 420450 }, { "epoch": 4.13, "grad_norm": 14.048476219177246, "learning_rate": 2.962946964957749e-06, "loss": 0.1072, "step": 420475 }, { "epoch": 4.13, "grad_norm": 5.165711879730225, "learning_rate": 2.9628228425035004e-06, "loss": 0.0738, "step": 420500 }, { "epoch": 4.13, "grad_norm": 9.0150728225708, "learning_rate": 2.962698720049252e-06, "loss": 0.0969, "step": 420525 }, { "epoch": 4.13, "grad_norm": 9.042396545410156, "learning_rate": 2.9625745975950037e-06, "loss": 0.0849, "step": 420550 }, { "epoch": 4.14, "grad_norm": 21.07254981994629, "learning_rate": 2.9624504751407553e-06, "loss": 0.1399, "step": 420575 }, { "epoch": 4.14, "grad_norm": 1.4056016206741333, "learning_rate": 2.9623263526865065e-06, "loss": 0.0683, "step": 420600 }, { "epoch": 4.14, "grad_norm": 6.46732759475708, "learning_rate": 2.9622022302322586e-06, "loss": 0.0986, "step": 420625 }, { "epoch": 4.14, "grad_norm": 8.303923606872559, "learning_rate": 2.96207810777801e-06, "loss": 0.0636, "step": 420650 }, { "epoch": 4.14, "grad_norm": 9.912211418151855, "learning_rate": 2.961953985323761e-06, "loss": 0.1272, "step": 420675 }, { "epoch": 4.14, "grad_norm": 4.165701866149902, "learning_rate": 2.961829862869513e-06, "loss": 0.0879, "step": 420700 }, { "epoch": 4.14, "grad_norm": 11.60274600982666, "learning_rate": 2.9617057404152643e-06, "loss": 0.1043, "step": 420725 }, { "epoch": 4.14, "grad_norm": 8.24426555633545, "learning_rate": 2.961581617961016e-06, "loss": 0.0555, "step": 420750 }, { "epoch": 4.14, "grad_norm": 16.730058670043945, "learning_rate": 2.9614574955067675e-06, "loss": 0.1303, "step": 420775 }, { "epoch": 4.14, "grad_norm": 4.2832183837890625, "learning_rate": 2.961333373052519e-06, "loss": 0.0534, "step": 420800 }, { "epoch": 4.14, "grad_norm": 16.35481071472168, "learning_rate": 2.9612092505982704e-06, "loss": 0.1075, "step": 420825 }, { "epoch": 4.14, "grad_norm": 3.2497751712799072, "learning_rate": 2.9610851281440225e-06, "loss": 0.0681, "step": 420850 }, { "epoch": 4.14, "grad_norm": 10.492010116577148, "learning_rate": 2.9609610056897737e-06, "loss": 0.1039, "step": 420875 }, { "epoch": 4.14, "grad_norm": 1.9474881887435913, "learning_rate": 2.960836883235525e-06, "loss": 0.071, "step": 420900 }, { "epoch": 4.14, "grad_norm": 7.847496509552002, "learning_rate": 2.960712760781277e-06, "loss": 0.082, "step": 420925 }, { "epoch": 4.14, "grad_norm": 5.6533074378967285, "learning_rate": 2.960588638327028e-06, "loss": 0.0749, "step": 420950 }, { "epoch": 4.14, "grad_norm": 11.42383861541748, "learning_rate": 2.9604645158727798e-06, "loss": 0.1059, "step": 420975 }, { "epoch": 4.14, "grad_norm": 7.414341926574707, "learning_rate": 2.960340393418531e-06, "loss": 0.047, "step": 421000 }, { "epoch": 4.14, "grad_norm": 5.338398456573486, "learning_rate": 2.960216270964283e-06, "loss": 0.1094, "step": 421025 }, { "epoch": 4.14, "grad_norm": 1.0310249328613281, "learning_rate": 2.9600921485100343e-06, "loss": 0.0713, "step": 421050 }, { "epoch": 4.14, "grad_norm": 10.277405738830566, "learning_rate": 2.9599680260557855e-06, "loss": 0.0911, "step": 421075 }, { "epoch": 4.14, "grad_norm": 1.864607334136963, "learning_rate": 2.9598439036015375e-06, "loss": 0.079, "step": 421100 }, { "epoch": 4.14, "grad_norm": 12.043352127075195, "learning_rate": 2.9597197811472887e-06, "loss": 0.0913, "step": 421125 }, { "epoch": 4.14, "grad_norm": 5.567807197570801, "learning_rate": 2.9595956586930404e-06, "loss": 0.0661, "step": 421150 }, { "epoch": 4.14, "grad_norm": 24.301536560058594, "learning_rate": 2.959471536238792e-06, "loss": 0.1125, "step": 421175 }, { "epoch": 4.14, "grad_norm": 0.1732306331396103, "learning_rate": 2.9593474137845436e-06, "loss": 0.0602, "step": 421200 }, { "epoch": 4.14, "grad_norm": 17.129398345947266, "learning_rate": 2.959223291330295e-06, "loss": 0.1042, "step": 421225 }, { "epoch": 4.14, "grad_norm": 0.8835667371749878, "learning_rate": 2.959099168876047e-06, "loss": 0.073, "step": 421250 }, { "epoch": 4.14, "grad_norm": 10.9263277053833, "learning_rate": 2.958975046421798e-06, "loss": 0.0897, "step": 421275 }, { "epoch": 4.14, "grad_norm": 8.40587043762207, "learning_rate": 2.9588509239675493e-06, "loss": 0.0923, "step": 421300 }, { "epoch": 4.14, "grad_norm": 11.517106056213379, "learning_rate": 2.9587268015133014e-06, "loss": 0.1262, "step": 421325 }, { "epoch": 4.14, "grad_norm": 6.652535915374756, "learning_rate": 2.9586026790590526e-06, "loss": 0.0751, "step": 421350 }, { "epoch": 4.14, "grad_norm": 10.57046890258789, "learning_rate": 2.9584785566048042e-06, "loss": 0.1186, "step": 421375 }, { "epoch": 4.14, "grad_norm": 5.103789806365967, "learning_rate": 2.958354434150556e-06, "loss": 0.0861, "step": 421400 }, { "epoch": 4.14, "grad_norm": 12.285650253295898, "learning_rate": 2.9582303116963075e-06, "loss": 0.0967, "step": 421425 }, { "epoch": 4.14, "grad_norm": 0.05123680830001831, "learning_rate": 2.9581061892420587e-06, "loss": 0.0865, "step": 421450 }, { "epoch": 4.14, "grad_norm": 19.476848602294922, "learning_rate": 2.9579820667878108e-06, "loss": 0.0923, "step": 421475 }, { "epoch": 4.14, "grad_norm": 4.014805793762207, "learning_rate": 2.957857944333562e-06, "loss": 0.0559, "step": 421500 }, { "epoch": 4.14, "grad_norm": 8.434268951416016, "learning_rate": 2.957733821879313e-06, "loss": 0.1265, "step": 421525 }, { "epoch": 4.14, "grad_norm": 6.691648960113525, "learning_rate": 2.9576096994250653e-06, "loss": 0.0787, "step": 421550 }, { "epoch": 4.14, "grad_norm": 14.034332275390625, "learning_rate": 2.9574855769708165e-06, "loss": 0.0983, "step": 421575 }, { "epoch": 4.15, "grad_norm": 9.691153526306152, "learning_rate": 2.957361454516568e-06, "loss": 0.0716, "step": 421600 }, { "epoch": 4.15, "grad_norm": 16.100303649902344, "learning_rate": 2.9572373320623197e-06, "loss": 0.1377, "step": 421625 }, { "epoch": 4.15, "grad_norm": 1.6615277528762817, "learning_rate": 2.9571132096080714e-06, "loss": 0.0825, "step": 421650 }, { "epoch": 4.15, "grad_norm": 5.713280200958252, "learning_rate": 2.9569890871538226e-06, "loss": 0.0944, "step": 421675 }, { "epoch": 4.15, "grad_norm": 1.7676315307617188, "learning_rate": 2.9568649646995747e-06, "loss": 0.0605, "step": 421700 }, { "epoch": 4.15, "grad_norm": 19.22380828857422, "learning_rate": 2.956740842245326e-06, "loss": 0.1046, "step": 421725 }, { "epoch": 4.15, "grad_norm": 5.560476303100586, "learning_rate": 2.956616719791077e-06, "loss": 0.0623, "step": 421750 }, { "epoch": 4.15, "grad_norm": 13.328080177307129, "learning_rate": 2.956492597336829e-06, "loss": 0.098, "step": 421775 }, { "epoch": 4.15, "grad_norm": 1.375445008277893, "learning_rate": 2.9563684748825803e-06, "loss": 0.081, "step": 421800 }, { "epoch": 4.15, "grad_norm": 16.826936721801758, "learning_rate": 2.956244352428332e-06, "loss": 0.0892, "step": 421825 }, { "epoch": 4.15, "grad_norm": 0.3708588778972626, "learning_rate": 2.956120229974083e-06, "loss": 0.0799, "step": 421850 }, { "epoch": 4.15, "grad_norm": 20.49984359741211, "learning_rate": 2.9559961075198352e-06, "loss": 0.1007, "step": 421875 }, { "epoch": 4.15, "grad_norm": 10.420886993408203, "learning_rate": 2.9558719850655865e-06, "loss": 0.0626, "step": 421900 }, { "epoch": 4.15, "grad_norm": 17.284101486206055, "learning_rate": 2.9557478626113377e-06, "loss": 0.1115, "step": 421925 }, { "epoch": 4.15, "grad_norm": 3.600306510925293, "learning_rate": 2.9556287050552595e-06, "loss": 0.0898, "step": 421950 }, { "epoch": 4.15, "grad_norm": 10.048351287841797, "learning_rate": 2.9555045826010108e-06, "loss": 0.103, "step": 421975 }, { "epoch": 4.15, "grad_norm": 1.8147215843200684, "learning_rate": 2.955380460146763e-06, "loss": 0.0778, "step": 422000 }, { "epoch": 4.15, "grad_norm": 9.483732223510742, "learning_rate": 2.955256337692514e-06, "loss": 0.1066, "step": 422025 }, { "epoch": 4.15, "grad_norm": 1.1488674879074097, "learning_rate": 2.9551322152382657e-06, "loss": 0.0827, "step": 422050 }, { "epoch": 4.15, "grad_norm": 11.511825561523438, "learning_rate": 2.9550080927840173e-06, "loss": 0.1157, "step": 422075 }, { "epoch": 4.15, "grad_norm": 0.22977717220783234, "learning_rate": 2.954883970329769e-06, "loss": 0.0838, "step": 422100 }, { "epoch": 4.15, "grad_norm": 15.323686599731445, "learning_rate": 2.95475984787552e-06, "loss": 0.1114, "step": 422125 }, { "epoch": 4.15, "grad_norm": 0.6639484763145447, "learning_rate": 2.954635725421272e-06, "loss": 0.0783, "step": 422150 }, { "epoch": 4.15, "grad_norm": 14.470847129821777, "learning_rate": 2.9545116029670234e-06, "loss": 0.089, "step": 422175 }, { "epoch": 4.15, "grad_norm": 5.755372047424316, "learning_rate": 2.9543874805127746e-06, "loss": 0.0704, "step": 422200 }, { "epoch": 4.15, "grad_norm": 10.677804946899414, "learning_rate": 2.9542633580585267e-06, "loss": 0.0884, "step": 422225 }, { "epoch": 4.15, "grad_norm": 4.231594562530518, "learning_rate": 2.954139235604278e-06, "loss": 0.0744, "step": 422250 }, { "epoch": 4.15, "grad_norm": 13.784984588623047, "learning_rate": 2.9540151131500295e-06, "loss": 0.1025, "step": 422275 }, { "epoch": 4.15, "grad_norm": 5.228938579559326, "learning_rate": 2.953890990695781e-06, "loss": 0.0475, "step": 422300 }, { "epoch": 4.15, "grad_norm": 15.1517972946167, "learning_rate": 2.953766868241533e-06, "loss": 0.0811, "step": 422325 }, { "epoch": 4.15, "grad_norm": 3.6244163513183594, "learning_rate": 2.953642745787284e-06, "loss": 0.0757, "step": 422350 }, { "epoch": 4.15, "grad_norm": 20.31937599182129, "learning_rate": 2.9535186233330352e-06, "loss": 0.1271, "step": 422375 }, { "epoch": 4.15, "grad_norm": 0.178507998585701, "learning_rate": 2.9533945008787873e-06, "loss": 0.0736, "step": 422400 }, { "epoch": 4.15, "grad_norm": 18.601240158081055, "learning_rate": 2.9532703784245385e-06, "loss": 0.1254, "step": 422425 }, { "epoch": 4.15, "grad_norm": 11.846363067626953, "learning_rate": 2.95314625597029e-06, "loss": 0.0708, "step": 422450 }, { "epoch": 4.15, "grad_norm": 11.215069770812988, "learning_rate": 2.9530221335160418e-06, "loss": 0.1014, "step": 422475 }, { "epoch": 4.15, "grad_norm": 0.2838130593299866, "learning_rate": 2.9528980110617934e-06, "loss": 0.0568, "step": 422500 }, { "epoch": 4.15, "grad_norm": 16.475942611694336, "learning_rate": 2.9527738886075446e-06, "loss": 0.1111, "step": 422525 }, { "epoch": 4.15, "grad_norm": 2.237992763519287, "learning_rate": 2.9526497661532967e-06, "loss": 0.0726, "step": 422550 }, { "epoch": 4.15, "grad_norm": 14.228070259094238, "learning_rate": 2.952525643699048e-06, "loss": 0.1525, "step": 422575 }, { "epoch": 4.16, "grad_norm": 3.528474807739258, "learning_rate": 2.952401521244799e-06, "loss": 0.0821, "step": 422600 }, { "epoch": 4.16, "grad_norm": 11.196378707885742, "learning_rate": 2.952277398790551e-06, "loss": 0.0858, "step": 422625 }, { "epoch": 4.16, "grad_norm": 2.531693696975708, "learning_rate": 2.9521532763363024e-06, "loss": 0.0752, "step": 422650 }, { "epoch": 4.16, "grad_norm": 16.697282791137695, "learning_rate": 2.952029153882054e-06, "loss": 0.112, "step": 422675 }, { "epoch": 4.16, "grad_norm": 0.11480604857206345, "learning_rate": 2.9519050314278056e-06, "loss": 0.0787, "step": 422700 }, { "epoch": 4.16, "grad_norm": 12.314180374145508, "learning_rate": 2.9517809089735573e-06, "loss": 0.0887, "step": 422725 }, { "epoch": 4.16, "grad_norm": 6.043646812438965, "learning_rate": 2.9516567865193085e-06, "loss": 0.0686, "step": 422750 }, { "epoch": 4.16, "grad_norm": 4.897916793823242, "learning_rate": 2.9515326640650605e-06, "loss": 0.0883, "step": 422775 }, { "epoch": 4.16, "grad_norm": 1.6062346696853638, "learning_rate": 2.9514085416108117e-06, "loss": 0.0642, "step": 422800 }, { "epoch": 4.16, "grad_norm": 21.011987686157227, "learning_rate": 2.951284419156563e-06, "loss": 0.1028, "step": 422825 }, { "epoch": 4.16, "grad_norm": 0.8519703149795532, "learning_rate": 2.951160296702315e-06, "loss": 0.0606, "step": 422850 }, { "epoch": 4.16, "grad_norm": 14.507518768310547, "learning_rate": 2.9510361742480662e-06, "loss": 0.1116, "step": 422875 }, { "epoch": 4.16, "grad_norm": 0.03715413063764572, "learning_rate": 2.950912051793818e-06, "loss": 0.0451, "step": 422900 }, { "epoch": 4.16, "grad_norm": 3.3380608558654785, "learning_rate": 2.9507879293395695e-06, "loss": 0.0764, "step": 422925 }, { "epoch": 4.16, "grad_norm": 1.284033179283142, "learning_rate": 2.950663806885321e-06, "loss": 0.0568, "step": 422950 }, { "epoch": 4.16, "grad_norm": 20.324180603027344, "learning_rate": 2.9505396844310723e-06, "loss": 0.1031, "step": 422975 }, { "epoch": 4.16, "grad_norm": 0.4627877473831177, "learning_rate": 2.9504155619768244e-06, "loss": 0.0684, "step": 423000 }, { "epoch": 4.16, "grad_norm": 18.73078155517578, "learning_rate": 2.9502914395225756e-06, "loss": 0.0983, "step": 423025 }, { "epoch": 4.16, "grad_norm": 6.827659606933594, "learning_rate": 2.9501673170683273e-06, "loss": 0.0995, "step": 423050 }, { "epoch": 4.16, "grad_norm": 13.416305541992188, "learning_rate": 2.950043194614079e-06, "loss": 0.12, "step": 423075 }, { "epoch": 4.16, "grad_norm": 1.6239748001098633, "learning_rate": 2.9499190721598305e-06, "loss": 0.0566, "step": 423100 }, { "epoch": 4.16, "grad_norm": 11.933133125305176, "learning_rate": 2.9497949497055817e-06, "loss": 0.1129, "step": 423125 }, { "epoch": 4.16, "grad_norm": 7.106864929199219, "learning_rate": 2.949670827251334e-06, "loss": 0.0785, "step": 423150 }, { "epoch": 4.16, "grad_norm": 16.726560592651367, "learning_rate": 2.949546704797085e-06, "loss": 0.1009, "step": 423175 }, { "epoch": 4.16, "grad_norm": 0.19230955839157104, "learning_rate": 2.9494225823428362e-06, "loss": 0.0832, "step": 423200 }, { "epoch": 4.16, "grad_norm": 19.663976669311523, "learning_rate": 2.949298459888588e-06, "loss": 0.0918, "step": 423225 }, { "epoch": 4.16, "grad_norm": 0.4801582396030426, "learning_rate": 2.9491743374343395e-06, "loss": 0.063, "step": 423250 }, { "epoch": 4.16, "grad_norm": 16.60610580444336, "learning_rate": 2.949050214980091e-06, "loss": 0.1046, "step": 423275 }, { "epoch": 4.16, "grad_norm": 6.957411766052246, "learning_rate": 2.9489260925258423e-06, "loss": 0.0763, "step": 423300 }, { "epoch": 4.16, "grad_norm": 23.05560302734375, "learning_rate": 2.9488019700715944e-06, "loss": 0.1339, "step": 423325 }, { "epoch": 4.16, "grad_norm": 0.29183658957481384, "learning_rate": 2.9486778476173456e-06, "loss": 0.0804, "step": 423350 }, { "epoch": 4.16, "grad_norm": 12.577486038208008, "learning_rate": 2.948553725163097e-06, "loss": 0.0975, "step": 423375 }, { "epoch": 4.16, "grad_norm": 0.6680112481117249, "learning_rate": 2.948429602708849e-06, "loss": 0.0678, "step": 423400 }, { "epoch": 4.16, "grad_norm": 12.246191024780273, "learning_rate": 2.9483054802546e-06, "loss": 0.1252, "step": 423425 }, { "epoch": 4.16, "grad_norm": 2.818913459777832, "learning_rate": 2.9481813578003517e-06, "loss": 0.0689, "step": 423450 }, { "epoch": 4.16, "grad_norm": 9.339715957641602, "learning_rate": 2.9480572353461034e-06, "loss": 0.1089, "step": 423475 }, { "epoch": 4.16, "grad_norm": 2.718017101287842, "learning_rate": 2.947933112891855e-06, "loss": 0.0604, "step": 423500 }, { "epoch": 4.16, "grad_norm": 20.903959274291992, "learning_rate": 2.947808990437606e-06, "loss": 0.0962, "step": 423525 }, { "epoch": 4.16, "grad_norm": 4.820235729217529, "learning_rate": 2.9476848679833583e-06, "loss": 0.0721, "step": 423550 }, { "epoch": 4.16, "grad_norm": 11.954010009765625, "learning_rate": 2.9475607455291095e-06, "loss": 0.1436, "step": 423575 }, { "epoch": 4.16, "grad_norm": 2.3005905151367188, "learning_rate": 2.9474366230748607e-06, "loss": 0.1103, "step": 423600 }, { "epoch": 4.17, "grad_norm": 12.166162490844727, "learning_rate": 2.9473125006206127e-06, "loss": 0.0927, "step": 423625 }, { "epoch": 4.17, "grad_norm": 7.005280494689941, "learning_rate": 2.947188378166364e-06, "loss": 0.0744, "step": 423650 }, { "epoch": 4.17, "grad_norm": 14.958941459655762, "learning_rate": 2.9470642557121156e-06, "loss": 0.1139, "step": 423675 }, { "epoch": 4.17, "grad_norm": 10.070562362670898, "learning_rate": 2.9469401332578672e-06, "loss": 0.0517, "step": 423700 }, { "epoch": 4.17, "grad_norm": 16.943134307861328, "learning_rate": 2.946816010803619e-06, "loss": 0.1522, "step": 423725 }, { "epoch": 4.17, "grad_norm": 9.571356773376465, "learning_rate": 2.94669188834937e-06, "loss": 0.0572, "step": 423750 }, { "epoch": 4.17, "grad_norm": 16.477575302124023, "learning_rate": 2.946567765895122e-06, "loss": 0.106, "step": 423775 }, { "epoch": 4.17, "grad_norm": 3.4461493492126465, "learning_rate": 2.9464436434408733e-06, "loss": 0.0737, "step": 423800 }, { "epoch": 4.17, "grad_norm": 10.220771789550781, "learning_rate": 2.9463195209866245e-06, "loss": 0.0958, "step": 423825 }, { "epoch": 4.17, "grad_norm": 0.138664111495018, "learning_rate": 2.9461953985323766e-06, "loss": 0.076, "step": 423850 }, { "epoch": 4.17, "grad_norm": 12.570222854614258, "learning_rate": 2.946071276078128e-06, "loss": 0.128, "step": 423875 }, { "epoch": 4.17, "grad_norm": 7.356542110443115, "learning_rate": 2.9459471536238795e-06, "loss": 0.0717, "step": 423900 }, { "epoch": 4.17, "grad_norm": 15.259587287902832, "learning_rate": 2.945823031169631e-06, "loss": 0.0958, "step": 423925 }, { "epoch": 4.17, "grad_norm": 0.36420905590057373, "learning_rate": 2.9456989087153827e-06, "loss": 0.062, "step": 423950 }, { "epoch": 4.17, "grad_norm": 10.44682788848877, "learning_rate": 2.945574786261134e-06, "loss": 0.0736, "step": 423975 }, { "epoch": 4.17, "grad_norm": 1.7694758176803589, "learning_rate": 2.945450663806886e-06, "loss": 0.0746, "step": 424000 }, { "epoch": 4.17, "grad_norm": 12.374313354492188, "learning_rate": 2.945326541352637e-06, "loss": 0.0853, "step": 424025 }, { "epoch": 4.17, "grad_norm": 0.5426231026649475, "learning_rate": 2.9452024188983884e-06, "loss": 0.0691, "step": 424050 }, { "epoch": 4.17, "grad_norm": 12.5223970413208, "learning_rate": 2.94507829644414e-06, "loss": 0.1047, "step": 424075 }, { "epoch": 4.17, "grad_norm": 0.587165117263794, "learning_rate": 2.9449541739898917e-06, "loss": 0.0602, "step": 424100 }, { "epoch": 4.17, "grad_norm": 11.64783000946045, "learning_rate": 2.9448300515356433e-06, "loss": 0.0907, "step": 424125 }, { "epoch": 4.17, "grad_norm": 1.5138269662857056, "learning_rate": 2.9447059290813945e-06, "loss": 0.0732, "step": 424150 }, { "epoch": 4.17, "grad_norm": 18.892494201660156, "learning_rate": 2.9445818066271466e-06, "loss": 0.1079, "step": 424175 }, { "epoch": 4.17, "grad_norm": 0.29181966185569763, "learning_rate": 2.944457684172898e-06, "loss": 0.0579, "step": 424200 }, { "epoch": 4.17, "grad_norm": 9.956073760986328, "learning_rate": 2.944333561718649e-06, "loss": 0.0826, "step": 424225 }, { "epoch": 4.17, "grad_norm": 0.3117310702800751, "learning_rate": 2.944209439264401e-06, "loss": 0.0835, "step": 424250 }, { "epoch": 4.17, "grad_norm": 15.207386016845703, "learning_rate": 2.9440853168101523e-06, "loss": 0.0814, "step": 424275 }, { "epoch": 4.17, "grad_norm": 8.102839469909668, "learning_rate": 2.943961194355904e-06, "loss": 0.0679, "step": 424300 }, { "epoch": 4.17, "grad_norm": 13.567895889282227, "learning_rate": 2.9438370719016556e-06, "loss": 0.1219, "step": 424325 }, { "epoch": 4.17, "grad_norm": 10.043789863586426, "learning_rate": 2.943712949447407e-06, "loss": 0.0787, "step": 424350 }, { "epoch": 4.17, "grad_norm": 14.495534896850586, "learning_rate": 2.9435888269931584e-06, "loss": 0.0965, "step": 424375 }, { "epoch": 4.17, "grad_norm": 0.12414237856864929, "learning_rate": 2.9434647045389105e-06, "loss": 0.1014, "step": 424400 }, { "epoch": 4.17, "grad_norm": 28.699203491210938, "learning_rate": 2.9433405820846617e-06, "loss": 0.1027, "step": 424425 }, { "epoch": 4.17, "grad_norm": 0.8630394339561462, "learning_rate": 2.943216459630413e-06, "loss": 0.0668, "step": 424450 }, { "epoch": 4.17, "grad_norm": 8.615184783935547, "learning_rate": 2.943092337176165e-06, "loss": 0.0948, "step": 424475 }, { "epoch": 4.17, "grad_norm": 7.463403701782227, "learning_rate": 2.942968214721916e-06, "loss": 0.0637, "step": 424500 }, { "epoch": 4.17, "grad_norm": 6.023656845092773, "learning_rate": 2.9428440922676678e-06, "loss": 0.1015, "step": 424525 }, { "epoch": 4.17, "grad_norm": 1.7940728664398193, "learning_rate": 2.9427199698134194e-06, "loss": 0.0987, "step": 424550 }, { "epoch": 4.17, "grad_norm": 11.799577713012695, "learning_rate": 2.942595847359171e-06, "loss": 0.0952, "step": 424575 }, { "epoch": 4.17, "grad_norm": 10.987913131713867, "learning_rate": 2.9424717249049223e-06, "loss": 0.0761, "step": 424600 }, { "epoch": 4.17, "grad_norm": 15.638179779052734, "learning_rate": 2.942352567348844e-06, "loss": 0.1042, "step": 424625 }, { "epoch": 4.18, "grad_norm": 3.3585398197174072, "learning_rate": 2.9422284448945954e-06, "loss": 0.0663, "step": 424650 }, { "epoch": 4.18, "grad_norm": 17.537723541259766, "learning_rate": 2.9421043224403466e-06, "loss": 0.0755, "step": 424675 }, { "epoch": 4.18, "grad_norm": 10.520892143249512, "learning_rate": 2.9419801999860986e-06, "loss": 0.0619, "step": 424700 }, { "epoch": 4.18, "grad_norm": 18.80828094482422, "learning_rate": 2.94185607753185e-06, "loss": 0.1169, "step": 424725 }, { "epoch": 4.18, "grad_norm": 2.2527904510498047, "learning_rate": 2.9417319550776015e-06, "loss": 0.0789, "step": 424750 }, { "epoch": 4.18, "grad_norm": 20.026714324951172, "learning_rate": 2.941607832623353e-06, "loss": 0.09, "step": 424775 }, { "epoch": 4.18, "grad_norm": 9.738251686096191, "learning_rate": 2.9414837101691047e-06, "loss": 0.0801, "step": 424800 }, { "epoch": 4.18, "grad_norm": 11.43443775177002, "learning_rate": 2.941359587714856e-06, "loss": 0.0693, "step": 424825 }, { "epoch": 4.18, "grad_norm": 4.162208080291748, "learning_rate": 2.941235465260608e-06, "loss": 0.0797, "step": 424850 }, { "epoch": 4.18, "grad_norm": 14.844724655151367, "learning_rate": 2.9411113428063592e-06, "loss": 0.1181, "step": 424875 }, { "epoch": 4.18, "grad_norm": 6.068497180938721, "learning_rate": 2.9409872203521104e-06, "loss": 0.0712, "step": 424900 }, { "epoch": 4.18, "grad_norm": 8.148938179016113, "learning_rate": 2.9408630978978625e-06, "loss": 0.0989, "step": 424925 }, { "epoch": 4.18, "grad_norm": 7.622990608215332, "learning_rate": 2.9407389754436137e-06, "loss": 0.0693, "step": 424950 }, { "epoch": 4.18, "grad_norm": 10.445043563842773, "learning_rate": 2.9406148529893653e-06, "loss": 0.1126, "step": 424975 }, { "epoch": 4.18, "grad_norm": 9.572477340698242, "learning_rate": 2.940490730535117e-06, "loss": 0.0884, "step": 425000 }, { "epoch": 4.18, "grad_norm": 16.6090087890625, "learning_rate": 2.9403666080808686e-06, "loss": 0.1081, "step": 425025 }, { "epoch": 4.18, "grad_norm": 8.468883514404297, "learning_rate": 2.94024248562662e-06, "loss": 0.0708, "step": 425050 }, { "epoch": 4.18, "grad_norm": 6.600162982940674, "learning_rate": 2.940118363172372e-06, "loss": 0.0758, "step": 425075 }, { "epoch": 4.18, "grad_norm": 1.2922636270523071, "learning_rate": 2.939994240718123e-06, "loss": 0.0705, "step": 425100 }, { "epoch": 4.18, "grad_norm": 15.800594329833984, "learning_rate": 2.9398701182638743e-06, "loss": 0.114, "step": 425125 }, { "epoch": 4.18, "grad_norm": 5.650814056396484, "learning_rate": 2.9397459958096264e-06, "loss": 0.0622, "step": 425150 }, { "epoch": 4.18, "grad_norm": 10.350167274475098, "learning_rate": 2.9396218733553776e-06, "loss": 0.1086, "step": 425175 }, { "epoch": 4.18, "grad_norm": 0.8898671269416809, "learning_rate": 2.939497750901129e-06, "loss": 0.066, "step": 425200 }, { "epoch": 4.18, "grad_norm": 16.67082405090332, "learning_rate": 2.939373628446881e-06, "loss": 0.1239, "step": 425225 }, { "epoch": 4.18, "grad_norm": 8.546092987060547, "learning_rate": 2.9392495059926325e-06, "loss": 0.0658, "step": 425250 }, { "epoch": 4.18, "grad_norm": 14.17590618133545, "learning_rate": 2.9391253835383837e-06, "loss": 0.116, "step": 425275 }, { "epoch": 4.18, "grad_norm": 3.1100502014160156, "learning_rate": 2.9390012610841357e-06, "loss": 0.0643, "step": 425300 }, { "epoch": 4.18, "grad_norm": 11.64976978302002, "learning_rate": 2.938877138629887e-06, "loss": 0.1104, "step": 425325 }, { "epoch": 4.18, "grad_norm": 0.6798185706138611, "learning_rate": 2.938753016175638e-06, "loss": 0.0755, "step": 425350 }, { "epoch": 4.18, "grad_norm": 16.023950576782227, "learning_rate": 2.9386288937213902e-06, "loss": 0.1042, "step": 425375 }, { "epoch": 4.18, "grad_norm": 1.7262945175170898, "learning_rate": 2.9385047712671414e-06, "loss": 0.0823, "step": 425400 }, { "epoch": 4.18, "grad_norm": 11.822237014770508, "learning_rate": 2.938380648812893e-06, "loss": 0.1031, "step": 425425 }, { "epoch": 4.18, "grad_norm": 6.853858947753906, "learning_rate": 2.9382565263586447e-06, "loss": 0.0732, "step": 425450 }, { "epoch": 4.18, "grad_norm": 14.384082794189453, "learning_rate": 2.9381324039043963e-06, "loss": 0.0915, "step": 425475 }, { "epoch": 4.18, "grad_norm": 5.785796642303467, "learning_rate": 2.9380082814501476e-06, "loss": 0.0669, "step": 425500 }, { "epoch": 4.18, "grad_norm": 11.984525680541992, "learning_rate": 2.9378841589958988e-06, "loss": 0.0938, "step": 425525 }, { "epoch": 4.18, "grad_norm": 2.196681022644043, "learning_rate": 2.937760036541651e-06, "loss": 0.0702, "step": 425550 }, { "epoch": 4.18, "grad_norm": 10.443732261657715, "learning_rate": 2.937635914087402e-06, "loss": 0.0975, "step": 425575 }, { "epoch": 4.18, "grad_norm": 0.034106187522411346, "learning_rate": 2.9375117916331537e-06, "loss": 0.1079, "step": 425600 }, { "epoch": 4.18, "grad_norm": 39.27644729614258, "learning_rate": 2.9373876691789053e-06, "loss": 0.1107, "step": 425625 }, { "epoch": 4.19, "grad_norm": 7.061286926269531, "learning_rate": 2.937263546724657e-06, "loss": 0.0631, "step": 425650 }, { "epoch": 4.19, "grad_norm": 15.818875312805176, "learning_rate": 2.937139424270408e-06, "loss": 0.1078, "step": 425675 }, { "epoch": 4.19, "grad_norm": 10.440559387207031, "learning_rate": 2.93701530181616e-06, "loss": 0.0684, "step": 425700 }, { "epoch": 4.19, "grad_norm": 14.450676918029785, "learning_rate": 2.9368911793619114e-06, "loss": 0.1028, "step": 425725 }, { "epoch": 4.19, "grad_norm": 4.433058261871338, "learning_rate": 2.9367670569076626e-06, "loss": 0.0879, "step": 425750 }, { "epoch": 4.19, "grad_norm": 9.569786071777344, "learning_rate": 2.9366429344534147e-06, "loss": 0.0995, "step": 425775 }, { "epoch": 4.19, "grad_norm": 1.917090892791748, "learning_rate": 2.936518811999166e-06, "loss": 0.0817, "step": 425800 }, { "epoch": 4.19, "grad_norm": 7.5783305168151855, "learning_rate": 2.9363946895449175e-06, "loss": 0.111, "step": 425825 }, { "epoch": 4.19, "grad_norm": 5.464631080627441, "learning_rate": 2.936270567090669e-06, "loss": 0.0706, "step": 425850 }, { "epoch": 4.19, "grad_norm": 8.523197174072266, "learning_rate": 2.936146444636421e-06, "loss": 0.0928, "step": 425875 }, { "epoch": 4.19, "grad_norm": 4.324892997741699, "learning_rate": 2.936022322182172e-06, "loss": 0.0764, "step": 425900 }, { "epoch": 4.19, "grad_norm": 19.37006950378418, "learning_rate": 2.935898199727924e-06, "loss": 0.0954, "step": 425925 }, { "epoch": 4.19, "grad_norm": 3.9300637245178223, "learning_rate": 2.9357740772736753e-06, "loss": 0.0548, "step": 425950 }, { "epoch": 4.19, "grad_norm": 16.679086685180664, "learning_rate": 2.935649954819427e-06, "loss": 0.1039, "step": 425975 }, { "epoch": 4.19, "grad_norm": 2.1471593379974365, "learning_rate": 2.9355258323651786e-06, "loss": 0.0786, "step": 426000 }, { "epoch": 4.19, "grad_norm": 13.885859489440918, "learning_rate": 2.93540170991093e-06, "loss": 0.1267, "step": 426025 }, { "epoch": 4.19, "grad_norm": 1.1152597665786743, "learning_rate": 2.9352775874566814e-06, "loss": 0.0772, "step": 426050 }, { "epoch": 4.19, "grad_norm": 13.643026351928711, "learning_rate": 2.9351534650024335e-06, "loss": 0.0953, "step": 426075 }, { "epoch": 4.19, "grad_norm": 3.584773302078247, "learning_rate": 2.9350293425481847e-06, "loss": 0.0721, "step": 426100 }, { "epoch": 4.19, "grad_norm": 16.721559524536133, "learning_rate": 2.934905220093936e-06, "loss": 0.1215, "step": 426125 }, { "epoch": 4.19, "grad_norm": 4.910810947418213, "learning_rate": 2.934781097639688e-06, "loss": 0.0778, "step": 426150 }, { "epoch": 4.19, "grad_norm": 15.310543060302734, "learning_rate": 2.934656975185439e-06, "loss": 0.097, "step": 426175 }, { "epoch": 4.19, "grad_norm": 4.036563873291016, "learning_rate": 2.934532852731191e-06, "loss": 0.0842, "step": 426200 }, { "epoch": 4.19, "grad_norm": 16.081632614135742, "learning_rate": 2.9344087302769424e-06, "loss": 0.1322, "step": 426225 }, { "epoch": 4.19, "grad_norm": 4.789533615112305, "learning_rate": 2.934284607822694e-06, "loss": 0.0688, "step": 426250 }, { "epoch": 4.19, "grad_norm": 13.897490501403809, "learning_rate": 2.9341604853684453e-06, "loss": 0.1153, "step": 426275 }, { "epoch": 4.19, "grad_norm": 6.0620436668396, "learning_rate": 2.9340363629141973e-06, "loss": 0.0814, "step": 426300 }, { "epoch": 4.19, "grad_norm": 18.80757713317871, "learning_rate": 2.9339122404599485e-06, "loss": 0.1024, "step": 426325 }, { "epoch": 4.19, "grad_norm": 5.645561695098877, "learning_rate": 2.9337881180056998e-06, "loss": 0.0697, "step": 426350 }, { "epoch": 4.19, "grad_norm": 20.22007942199707, "learning_rate": 2.9336639955514514e-06, "loss": 0.1235, "step": 426375 }, { "epoch": 4.19, "grad_norm": 5.008639335632324, "learning_rate": 2.933539873097203e-06, "loss": 0.0793, "step": 426400 }, { "epoch": 4.19, "grad_norm": 13.56088638305664, "learning_rate": 2.9334157506429547e-06, "loss": 0.0988, "step": 426425 }, { "epoch": 4.19, "grad_norm": 10.265284538269043, "learning_rate": 2.933291628188706e-06, "loss": 0.077, "step": 426450 }, { "epoch": 4.19, "grad_norm": 15.577874183654785, "learning_rate": 2.933167505734458e-06, "loss": 0.1179, "step": 426475 }, { "epoch": 4.19, "grad_norm": 8.724863052368164, "learning_rate": 2.933043383280209e-06, "loss": 0.0595, "step": 426500 }, { "epoch": 4.19, "grad_norm": 8.069236755371094, "learning_rate": 2.9329192608259604e-06, "loss": 0.0993, "step": 426525 }, { "epoch": 4.19, "grad_norm": 1.4041292667388916, "learning_rate": 2.9327951383717124e-06, "loss": 0.0984, "step": 426550 }, { "epoch": 4.19, "grad_norm": 18.603548049926758, "learning_rate": 2.9326710159174636e-06, "loss": 0.1036, "step": 426575 }, { "epoch": 4.19, "grad_norm": 10.73436164855957, "learning_rate": 2.9325468934632153e-06, "loss": 0.0766, "step": 426600 }, { "epoch": 4.19, "grad_norm": 8.186681747436523, "learning_rate": 2.932422771008967e-06, "loss": 0.1, "step": 426625 }, { "epoch": 4.19, "grad_norm": 1.553100824356079, "learning_rate": 2.9322986485547185e-06, "loss": 0.0763, "step": 426650 }, { "epoch": 4.2, "grad_norm": 14.39309024810791, "learning_rate": 2.9321745261004697e-06, "loss": 0.1181, "step": 426675 }, { "epoch": 4.2, "grad_norm": 6.420187950134277, "learning_rate": 2.9320553685443916e-06, "loss": 0.0754, "step": 426700 }, { "epoch": 4.2, "grad_norm": 7.731912136077881, "learning_rate": 2.931931246090143e-06, "loss": 0.1041, "step": 426725 }, { "epoch": 4.2, "grad_norm": 8.377939224243164, "learning_rate": 2.931807123635895e-06, "loss": 0.0481, "step": 426750 }, { "epoch": 4.2, "grad_norm": 16.328386306762695, "learning_rate": 2.931683001181646e-06, "loss": 0.0845, "step": 426775 }, { "epoch": 4.2, "grad_norm": 2.8839704990386963, "learning_rate": 2.9315588787273973e-06, "loss": 0.0814, "step": 426800 }, { "epoch": 4.2, "grad_norm": 11.945262908935547, "learning_rate": 2.9314347562731494e-06, "loss": 0.1239, "step": 426825 }, { "epoch": 4.2, "grad_norm": 7.231797695159912, "learning_rate": 2.9313106338189006e-06, "loss": 0.0794, "step": 426850 }, { "epoch": 4.2, "grad_norm": 9.524313926696777, "learning_rate": 2.931186511364652e-06, "loss": 0.0847, "step": 426875 }, { "epoch": 4.2, "grad_norm": 5.526181697845459, "learning_rate": 2.931062388910404e-06, "loss": 0.0834, "step": 426900 }, { "epoch": 4.2, "grad_norm": 16.995174407958984, "learning_rate": 2.9309382664561555e-06, "loss": 0.0947, "step": 426925 }, { "epoch": 4.2, "grad_norm": 6.924059867858887, "learning_rate": 2.9308141440019067e-06, "loss": 0.0742, "step": 426950 }, { "epoch": 4.2, "grad_norm": 13.383554458618164, "learning_rate": 2.930690021547658e-06, "loss": 0.0828, "step": 426975 }, { "epoch": 4.2, "grad_norm": 0.623185932636261, "learning_rate": 2.93056589909341e-06, "loss": 0.0695, "step": 427000 }, { "epoch": 4.2, "grad_norm": 13.72253131866455, "learning_rate": 2.930441776639161e-06, "loss": 0.098, "step": 427025 }, { "epoch": 4.2, "grad_norm": 3.600151300430298, "learning_rate": 2.930317654184913e-06, "loss": 0.0862, "step": 427050 }, { "epoch": 4.2, "grad_norm": 6.461460590362549, "learning_rate": 2.9301935317306644e-06, "loss": 0.0864, "step": 427075 }, { "epoch": 4.2, "grad_norm": 3.2189626693725586, "learning_rate": 2.930069409276416e-06, "loss": 0.0635, "step": 427100 }, { "epoch": 4.2, "grad_norm": 10.594935417175293, "learning_rate": 2.9299452868221673e-06, "loss": 0.1253, "step": 427125 }, { "epoch": 4.2, "grad_norm": 3.2900962829589844, "learning_rate": 2.9298211643679193e-06, "loss": 0.0788, "step": 427150 }, { "epoch": 4.2, "grad_norm": 16.08658790588379, "learning_rate": 2.9296970419136706e-06, "loss": 0.1237, "step": 427175 }, { "epoch": 4.2, "grad_norm": 1.012174129486084, "learning_rate": 2.9295729194594218e-06, "loss": 0.0616, "step": 427200 }, { "epoch": 4.2, "grad_norm": 12.308116912841797, "learning_rate": 2.929448797005174e-06, "loss": 0.1247, "step": 427225 }, { "epoch": 4.2, "grad_norm": 1.9044326543807983, "learning_rate": 2.929324674550925e-06, "loss": 0.0782, "step": 427250 }, { "epoch": 4.2, "grad_norm": 6.0275444984436035, "learning_rate": 2.9292005520966767e-06, "loss": 0.0895, "step": 427275 }, { "epoch": 4.2, "grad_norm": 0.9260954260826111, "learning_rate": 2.9290764296424283e-06, "loss": 0.0713, "step": 427300 }, { "epoch": 4.2, "grad_norm": 12.02019214630127, "learning_rate": 2.92895230718818e-06, "loss": 0.0862, "step": 427325 }, { "epoch": 4.2, "grad_norm": 7.426602840423584, "learning_rate": 2.928828184733931e-06, "loss": 0.0772, "step": 427350 }, { "epoch": 4.2, "grad_norm": 6.62621545791626, "learning_rate": 2.9287040622796832e-06, "loss": 0.0825, "step": 427375 }, { "epoch": 4.2, "grad_norm": 0.23020286858081818, "learning_rate": 2.9285799398254344e-06, "loss": 0.0753, "step": 427400 }, { "epoch": 4.2, "grad_norm": 11.439352035522461, "learning_rate": 2.9284558173711856e-06, "loss": 0.1288, "step": 427425 }, { "epoch": 4.2, "grad_norm": 4.742926120758057, "learning_rate": 2.9283316949169377e-06, "loss": 0.0589, "step": 427450 }, { "epoch": 4.2, "grad_norm": 22.69522476196289, "learning_rate": 2.928207572462689e-06, "loss": 0.0973, "step": 427475 }, { "epoch": 4.2, "grad_norm": 7.46436071395874, "learning_rate": 2.9280834500084405e-06, "loss": 0.0923, "step": 427500 }, { "epoch": 4.2, "grad_norm": 7.98314094543457, "learning_rate": 2.927959327554192e-06, "loss": 0.0717, "step": 427525 }, { "epoch": 4.2, "grad_norm": 5.108931541442871, "learning_rate": 2.927835205099944e-06, "loss": 0.0743, "step": 427550 }, { "epoch": 4.2, "grad_norm": 10.016538619995117, "learning_rate": 2.927711082645695e-06, "loss": 0.101, "step": 427575 }, { "epoch": 4.2, "grad_norm": 5.741268634796143, "learning_rate": 2.927586960191447e-06, "loss": 0.0904, "step": 427600 }, { "epoch": 4.2, "grad_norm": 12.261515617370605, "learning_rate": 2.9274628377371983e-06, "loss": 0.1121, "step": 427625 }, { "epoch": 4.2, "grad_norm": 5.191940784454346, "learning_rate": 2.9273387152829495e-06, "loss": 0.0852, "step": 427650 }, { "epoch": 4.2, "grad_norm": 10.572738647460938, "learning_rate": 2.9272145928287016e-06, "loss": 0.1001, "step": 427675 }, { "epoch": 4.21, "grad_norm": 2.6791303157806396, "learning_rate": 2.9270904703744528e-06, "loss": 0.0829, "step": 427700 }, { "epoch": 4.21, "grad_norm": 11.802413940429688, "learning_rate": 2.9269663479202044e-06, "loss": 0.0848, "step": 427725 }, { "epoch": 4.21, "grad_norm": 7.209588050842285, "learning_rate": 2.926842225465956e-06, "loss": 0.0754, "step": 427750 }, { "epoch": 4.21, "grad_norm": 13.487215042114258, "learning_rate": 2.9267181030117077e-06, "loss": 0.108, "step": 427775 }, { "epoch": 4.21, "grad_norm": 4.928836345672607, "learning_rate": 2.926593980557459e-06, "loss": 0.0775, "step": 427800 }, { "epoch": 4.21, "grad_norm": 19.318410873413086, "learning_rate": 2.92646985810321e-06, "loss": 0.118, "step": 427825 }, { "epoch": 4.21, "grad_norm": 1.5390093326568604, "learning_rate": 2.926345735648962e-06, "loss": 0.0682, "step": 427850 }, { "epoch": 4.21, "grad_norm": 22.69781494140625, "learning_rate": 2.9262216131947134e-06, "loss": 0.087, "step": 427875 }, { "epoch": 4.21, "grad_norm": 0.5665329694747925, "learning_rate": 2.926097490740465e-06, "loss": 0.0615, "step": 427900 }, { "epoch": 4.21, "grad_norm": 16.363903045654297, "learning_rate": 2.9259733682862166e-06, "loss": 0.1296, "step": 427925 }, { "epoch": 4.21, "grad_norm": 3.2550723552703857, "learning_rate": 2.9258492458319683e-06, "loss": 0.085, "step": 427950 }, { "epoch": 4.21, "grad_norm": 13.02324390411377, "learning_rate": 2.9257251233777195e-06, "loss": 0.1081, "step": 427975 }, { "epoch": 4.21, "grad_norm": 1.69768488407135, "learning_rate": 2.9256010009234715e-06, "loss": 0.0665, "step": 428000 }, { "epoch": 4.21, "grad_norm": 12.895171165466309, "learning_rate": 2.9254768784692228e-06, "loss": 0.107, "step": 428025 }, { "epoch": 4.21, "grad_norm": 8.234312057495117, "learning_rate": 2.925352756014974e-06, "loss": 0.0949, "step": 428050 }, { "epoch": 4.21, "grad_norm": 18.423368453979492, "learning_rate": 2.925228633560726e-06, "loss": 0.1125, "step": 428075 }, { "epoch": 4.21, "grad_norm": 0.5844886898994446, "learning_rate": 2.9251045111064772e-06, "loss": 0.0691, "step": 428100 }, { "epoch": 4.21, "grad_norm": 8.649460792541504, "learning_rate": 2.924980388652229e-06, "loss": 0.077, "step": 428125 }, { "epoch": 4.21, "grad_norm": 0.955332338809967, "learning_rate": 2.9248562661979805e-06, "loss": 0.0606, "step": 428150 }, { "epoch": 4.21, "grad_norm": 13.045720100402832, "learning_rate": 2.924732143743732e-06, "loss": 0.112, "step": 428175 }, { "epoch": 4.21, "grad_norm": 2.143312931060791, "learning_rate": 2.9246080212894834e-06, "loss": 0.0459, "step": 428200 }, { "epoch": 4.21, "grad_norm": 11.403162956237793, "learning_rate": 2.9244838988352354e-06, "loss": 0.0796, "step": 428225 }, { "epoch": 4.21, "grad_norm": 6.297715187072754, "learning_rate": 2.9243597763809866e-06, "loss": 0.0915, "step": 428250 }, { "epoch": 4.21, "grad_norm": 16.63736343383789, "learning_rate": 2.924235653926738e-06, "loss": 0.124, "step": 428275 }, { "epoch": 4.21, "grad_norm": 1.8656227588653564, "learning_rate": 2.92411153147249e-06, "loss": 0.0488, "step": 428300 }, { "epoch": 4.21, "grad_norm": 17.914318084716797, "learning_rate": 2.923987409018241e-06, "loss": 0.1171, "step": 428325 }, { "epoch": 4.21, "grad_norm": 2.2049849033355713, "learning_rate": 2.9238632865639927e-06, "loss": 0.0736, "step": 428350 }, { "epoch": 4.21, "grad_norm": 14.195656776428223, "learning_rate": 2.9237391641097444e-06, "loss": 0.1067, "step": 428375 }, { "epoch": 4.21, "grad_norm": 4.6924519538879395, "learning_rate": 2.923615041655496e-06, "loss": 0.059, "step": 428400 }, { "epoch": 4.21, "grad_norm": 12.389552116394043, "learning_rate": 2.9234909192012472e-06, "loss": 0.0998, "step": 428425 }, { "epoch": 4.21, "grad_norm": 2.659654140472412, "learning_rate": 2.9233667967469993e-06, "loss": 0.072, "step": 428450 }, { "epoch": 4.21, "grad_norm": 17.196800231933594, "learning_rate": 2.9232426742927505e-06, "loss": 0.101, "step": 428475 }, { "epoch": 4.21, "grad_norm": 6.361003398895264, "learning_rate": 2.9231185518385017e-06, "loss": 0.0926, "step": 428500 }, { "epoch": 4.21, "grad_norm": 16.150484085083008, "learning_rate": 2.9229944293842538e-06, "loss": 0.1044, "step": 428525 }, { "epoch": 4.21, "grad_norm": 6.124405860900879, "learning_rate": 2.922870306930005e-06, "loss": 0.093, "step": 428550 }, { "epoch": 4.21, "grad_norm": 12.60037899017334, "learning_rate": 2.9227461844757566e-06, "loss": 0.116, "step": 428575 }, { "epoch": 4.21, "grad_norm": 1.8846371173858643, "learning_rate": 2.9226220620215082e-06, "loss": 0.0868, "step": 428600 }, { "epoch": 4.21, "grad_norm": 8.977875709533691, "learning_rate": 2.92249793956726e-06, "loss": 0.1007, "step": 428625 }, { "epoch": 4.21, "grad_norm": 4.183228492736816, "learning_rate": 2.922373817113011e-06, "loss": 0.0706, "step": 428650 }, { "epoch": 4.21, "grad_norm": 10.761336326599121, "learning_rate": 2.9222496946587623e-06, "loss": 0.1139, "step": 428675 }, { "epoch": 4.22, "grad_norm": 0.4442782998085022, "learning_rate": 2.9221255722045144e-06, "loss": 0.0436, "step": 428700 }, { "epoch": 4.22, "grad_norm": 11.711480140686035, "learning_rate": 2.9220014497502656e-06, "loss": 0.0971, "step": 428725 }, { "epoch": 4.22, "grad_norm": 0.8596987128257751, "learning_rate": 2.921877327296017e-06, "loss": 0.0678, "step": 428750 }, { "epoch": 4.22, "grad_norm": Infinity, "learning_rate": 2.9217581697399387e-06, "loss": 0.1105, "step": 428775 }, { "epoch": 4.22, "grad_norm": 0.12625285983085632, "learning_rate": 2.9216340472856903e-06, "loss": 0.0729, "step": 428800 }, { "epoch": 4.22, "grad_norm": 12.289608001708984, "learning_rate": 2.921509924831442e-06, "loss": 0.0918, "step": 428825 }, { "epoch": 4.22, "grad_norm": 0.5117670297622681, "learning_rate": 2.9213858023771936e-06, "loss": 0.0778, "step": 428850 }, { "epoch": 4.22, "grad_norm": 9.136584281921387, "learning_rate": 2.9212616799229448e-06, "loss": 0.1095, "step": 428875 }, { "epoch": 4.22, "grad_norm": 6.601130962371826, "learning_rate": 2.921137557468697e-06, "loss": 0.0662, "step": 428900 }, { "epoch": 4.22, "grad_norm": 7.513246059417725, "learning_rate": 2.921013435014448e-06, "loss": 0.1158, "step": 428925 }, { "epoch": 4.22, "grad_norm": 0.0452633872628212, "learning_rate": 2.9208893125601997e-06, "loss": 0.0843, "step": 428950 }, { "epoch": 4.22, "grad_norm": 14.071629524230957, "learning_rate": 2.9207651901059513e-06, "loss": 0.1365, "step": 428975 }, { "epoch": 4.22, "grad_norm": 2.8318898677825928, "learning_rate": 2.920641067651703e-06, "loss": 0.0896, "step": 429000 }, { "epoch": 4.22, "grad_norm": 8.12364673614502, "learning_rate": 2.920516945197454e-06, "loss": 0.0927, "step": 429025 }, { "epoch": 4.22, "grad_norm": 6.551936149597168, "learning_rate": 2.9203928227432062e-06, "loss": 0.0762, "step": 429050 }, { "epoch": 4.22, "grad_norm": 14.860250473022461, "learning_rate": 2.9202687002889574e-06, "loss": 0.1091, "step": 429075 }, { "epoch": 4.22, "grad_norm": 0.4797900319099426, "learning_rate": 2.9201445778347086e-06, "loss": 0.0618, "step": 429100 }, { "epoch": 4.22, "grad_norm": 5.655113220214844, "learning_rate": 2.9200204553804607e-06, "loss": 0.0932, "step": 429125 }, { "epoch": 4.22, "grad_norm": 4.742082118988037, "learning_rate": 2.919896332926212e-06, "loss": 0.056, "step": 429150 }, { "epoch": 4.22, "grad_norm": 11.138750076293945, "learning_rate": 2.9197722104719636e-06, "loss": 0.1236, "step": 429175 }, { "epoch": 4.22, "grad_norm": 2.1342458724975586, "learning_rate": 2.919648088017715e-06, "loss": 0.0845, "step": 429200 }, { "epoch": 4.22, "grad_norm": 3.309309720993042, "learning_rate": 2.919523965563467e-06, "loss": 0.1092, "step": 429225 }, { "epoch": 4.22, "grad_norm": 3.671541452407837, "learning_rate": 2.919399843109218e-06, "loss": 0.0651, "step": 429250 }, { "epoch": 4.22, "grad_norm": 11.009541511535645, "learning_rate": 2.9192757206549692e-06, "loss": 0.1093, "step": 429275 }, { "epoch": 4.22, "grad_norm": 5.857067108154297, "learning_rate": 2.9191515982007213e-06, "loss": 0.0896, "step": 429300 }, { "epoch": 4.22, "grad_norm": 17.338871002197266, "learning_rate": 2.9190274757464725e-06, "loss": 0.1106, "step": 429325 }, { "epoch": 4.22, "grad_norm": 6.283471584320068, "learning_rate": 2.918903353292224e-06, "loss": 0.0729, "step": 429350 }, { "epoch": 4.22, "grad_norm": 9.352680206298828, "learning_rate": 2.9187792308379758e-06, "loss": 0.0914, "step": 429375 }, { "epoch": 4.22, "grad_norm": 7.016165256500244, "learning_rate": 2.9186551083837274e-06, "loss": 0.0714, "step": 429400 }, { "epoch": 4.22, "grad_norm": 6.70788049697876, "learning_rate": 2.9185309859294786e-06, "loss": 0.1289, "step": 429425 }, { "epoch": 4.22, "grad_norm": 11.111021995544434, "learning_rate": 2.9184068634752307e-06, "loss": 0.0758, "step": 429450 }, { "epoch": 4.22, "grad_norm": 6.391129493713379, "learning_rate": 2.918282741020982e-06, "loss": 0.1261, "step": 429475 }, { "epoch": 4.22, "grad_norm": 3.0336897373199463, "learning_rate": 2.918158618566733e-06, "loss": 0.0739, "step": 429500 }, { "epoch": 4.22, "grad_norm": 14.112594604492188, "learning_rate": 2.918034496112485e-06, "loss": 0.0825, "step": 429525 }, { "epoch": 4.22, "grad_norm": 5.796606540679932, "learning_rate": 2.9179103736582364e-06, "loss": 0.0901, "step": 429550 }, { "epoch": 4.22, "grad_norm": 3.355226516723633, "learning_rate": 2.917786251203988e-06, "loss": 0.109, "step": 429575 }, { "epoch": 4.22, "grad_norm": 2.935089349746704, "learning_rate": 2.9176621287497397e-06, "loss": 0.08, "step": 429600 }, { "epoch": 4.22, "grad_norm": 6.787085056304932, "learning_rate": 2.9175380062954913e-06, "loss": 0.117, "step": 429625 }, { "epoch": 4.22, "grad_norm": 4.071547508239746, "learning_rate": 2.9174138838412425e-06, "loss": 0.0781, "step": 429650 }, { "epoch": 4.22, "grad_norm": 11.464107513427734, "learning_rate": 2.9172897613869946e-06, "loss": 0.1228, "step": 429675 }, { "epoch": 4.22, "grad_norm": 4.9885334968566895, "learning_rate": 2.9171656389327458e-06, "loss": 0.0685, "step": 429700 }, { "epoch": 4.23, "grad_norm": 19.884599685668945, "learning_rate": 2.917041516478497e-06, "loss": 0.0957, "step": 429725 }, { "epoch": 4.23, "grad_norm": 0.3485706150531769, "learning_rate": 2.916917394024249e-06, "loss": 0.0553, "step": 429750 }, { "epoch": 4.23, "grad_norm": 11.429601669311523, "learning_rate": 2.9167932715700002e-06, "loss": 0.1061, "step": 429775 }, { "epoch": 4.23, "grad_norm": 5.045769214630127, "learning_rate": 2.916669149115752e-06, "loss": 0.0591, "step": 429800 }, { "epoch": 4.23, "grad_norm": 14.498739242553711, "learning_rate": 2.9165450266615035e-06, "loss": 0.1139, "step": 429825 }, { "epoch": 4.23, "grad_norm": 7.658445358276367, "learning_rate": 2.916420904207255e-06, "loss": 0.0815, "step": 429850 }, { "epoch": 4.23, "grad_norm": 10.014333724975586, "learning_rate": 2.9162967817530064e-06, "loss": 0.1417, "step": 429875 }, { "epoch": 4.23, "grad_norm": 17.959552764892578, "learning_rate": 2.9161726592987584e-06, "loss": 0.1203, "step": 429900 }, { "epoch": 4.23, "grad_norm": 14.950823783874512, "learning_rate": 2.9160485368445096e-06, "loss": 0.1275, "step": 429925 }, { "epoch": 4.23, "grad_norm": 2.2280006408691406, "learning_rate": 2.915924414390261e-06, "loss": 0.0601, "step": 429950 }, { "epoch": 4.23, "grad_norm": 8.866506576538086, "learning_rate": 2.915800291936013e-06, "loss": 0.0947, "step": 429975 }, { "epoch": 4.23, "grad_norm": 5.559341907501221, "learning_rate": 2.915676169481764e-06, "loss": 0.0894, "step": 430000 }, { "epoch": 4.23, "grad_norm": 14.324712753295898, "learning_rate": 2.9155520470275158e-06, "loss": 0.0783, "step": 430025 }, { "epoch": 4.23, "grad_norm": 5.445622444152832, "learning_rate": 2.9154279245732674e-06, "loss": 0.0726, "step": 430050 }, { "epoch": 4.23, "grad_norm": 10.434420585632324, "learning_rate": 2.915303802119019e-06, "loss": 0.104, "step": 430075 }, { "epoch": 4.23, "grad_norm": 0.11271318793296814, "learning_rate": 2.9151796796647702e-06, "loss": 0.0878, "step": 430100 }, { "epoch": 4.23, "grad_norm": 12.036215782165527, "learning_rate": 2.9150555572105214e-06, "loss": 0.1379, "step": 430125 }, { "epoch": 4.23, "grad_norm": 8.04609203338623, "learning_rate": 2.9149314347562735e-06, "loss": 0.0661, "step": 430150 }, { "epoch": 4.23, "grad_norm": 9.08139419555664, "learning_rate": 2.9148073123020247e-06, "loss": 0.1033, "step": 430175 }, { "epoch": 4.23, "grad_norm": 15.138564109802246, "learning_rate": 2.9146831898477763e-06, "loss": 0.1144, "step": 430200 }, { "epoch": 4.23, "grad_norm": 3.0024876594543457, "learning_rate": 2.914559067393528e-06, "loss": 0.1038, "step": 430225 }, { "epoch": 4.23, "grad_norm": 4.355837345123291, "learning_rate": 2.9144349449392796e-06, "loss": 0.0604, "step": 430250 }, { "epoch": 4.23, "grad_norm": 9.437053680419922, "learning_rate": 2.914310822485031e-06, "loss": 0.1256, "step": 430275 }, { "epoch": 4.23, "grad_norm": 4.0805439949035645, "learning_rate": 2.914186700030783e-06, "loss": 0.0755, "step": 430300 }, { "epoch": 4.23, "grad_norm": 17.644865036010742, "learning_rate": 2.914062577576534e-06, "loss": 0.1063, "step": 430325 }, { "epoch": 4.23, "grad_norm": 1.916599154472351, "learning_rate": 2.9139384551222853e-06, "loss": 0.078, "step": 430350 }, { "epoch": 4.23, "grad_norm": 10.04348087310791, "learning_rate": 2.9138143326680374e-06, "loss": 0.0988, "step": 430375 }, { "epoch": 4.23, "grad_norm": 3.5882232189178467, "learning_rate": 2.9136902102137886e-06, "loss": 0.0723, "step": 430400 }, { "epoch": 4.23, "grad_norm": 15.227010726928711, "learning_rate": 2.9135660877595402e-06, "loss": 0.1329, "step": 430425 }, { "epoch": 4.23, "grad_norm": 6.7396745681762695, "learning_rate": 2.913441965305292e-06, "loss": 0.07, "step": 430450 }, { "epoch": 4.23, "grad_norm": 10.421340942382812, "learning_rate": 2.9133178428510435e-06, "loss": 0.0896, "step": 430475 }, { "epoch": 4.23, "grad_norm": 5.665260314941406, "learning_rate": 2.9131937203967947e-06, "loss": 0.0788, "step": 430500 }, { "epoch": 4.23, "grad_norm": 10.742321014404297, "learning_rate": 2.9130695979425468e-06, "loss": 0.0975, "step": 430525 }, { "epoch": 4.23, "grad_norm": 4.485311985015869, "learning_rate": 2.912945475488298e-06, "loss": 0.0578, "step": 430550 }, { "epoch": 4.23, "grad_norm": 16.135026931762695, "learning_rate": 2.912821353034049e-06, "loss": 0.0967, "step": 430575 }, { "epoch": 4.23, "grad_norm": 1.971451997756958, "learning_rate": 2.9126972305798012e-06, "loss": 0.0751, "step": 430600 }, { "epoch": 4.23, "grad_norm": 22.840736389160156, "learning_rate": 2.9125731081255524e-06, "loss": 0.0907, "step": 430625 }, { "epoch": 4.23, "grad_norm": 3.8837597370147705, "learning_rate": 2.912448985671304e-06, "loss": 0.0562, "step": 430650 }, { "epoch": 4.23, "grad_norm": 9.17656421661377, "learning_rate": 2.9123248632170557e-06, "loss": 0.1031, "step": 430675 }, { "epoch": 4.23, "grad_norm": 0.5273758769035339, "learning_rate": 2.9122007407628074e-06, "loss": 0.067, "step": 430700 }, { "epoch": 4.23, "grad_norm": 20.61627197265625, "learning_rate": 2.9120766183085586e-06, "loss": 0.1023, "step": 430725 }, { "epoch": 4.24, "grad_norm": 10.635071754455566, "learning_rate": 2.9119524958543106e-06, "loss": 0.0789, "step": 430750 }, { "epoch": 4.24, "grad_norm": 12.562423706054688, "learning_rate": 2.911828373400062e-06, "loss": 0.1277, "step": 430775 }, { "epoch": 4.24, "grad_norm": 10.153692245483398, "learning_rate": 2.911704250945813e-06, "loss": 0.112, "step": 430800 }, { "epoch": 4.24, "grad_norm": 10.388152122497559, "learning_rate": 2.911580128491565e-06, "loss": 0.0798, "step": 430825 }, { "epoch": 4.24, "grad_norm": 3.963287830352783, "learning_rate": 2.9114560060373163e-06, "loss": 0.087, "step": 430850 }, { "epoch": 4.24, "grad_norm": 15.946653366088867, "learning_rate": 2.911331883583068e-06, "loss": 0.1298, "step": 430875 }, { "epoch": 4.24, "grad_norm": 0.43155503273010254, "learning_rate": 2.9112077611288196e-06, "loss": 0.1107, "step": 430900 }, { "epoch": 4.24, "grad_norm": 8.036865234375, "learning_rate": 2.9110836386745712e-06, "loss": 0.0905, "step": 430925 }, { "epoch": 4.24, "grad_norm": 0.12091438472270966, "learning_rate": 2.9109595162203224e-06, "loss": 0.0748, "step": 430950 }, { "epoch": 4.24, "grad_norm": 10.661437034606934, "learning_rate": 2.9108353937660736e-06, "loss": 0.1172, "step": 430975 }, { "epoch": 4.24, "grad_norm": 6.598640441894531, "learning_rate": 2.9107112713118257e-06, "loss": 0.062, "step": 431000 }, { "epoch": 4.24, "grad_norm": 11.292960166931152, "learning_rate": 2.910587148857577e-06, "loss": 0.0703, "step": 431025 }, { "epoch": 4.24, "grad_norm": 8.38676643371582, "learning_rate": 2.9104630264033286e-06, "loss": 0.0696, "step": 431050 }, { "epoch": 4.24, "grad_norm": 13.66172981262207, "learning_rate": 2.91033890394908e-06, "loss": 0.0945, "step": 431075 }, { "epoch": 4.24, "grad_norm": 6.226072788238525, "learning_rate": 2.910214781494832e-06, "loss": 0.0551, "step": 431100 }, { "epoch": 4.24, "grad_norm": 16.52914047241211, "learning_rate": 2.910090659040583e-06, "loss": 0.103, "step": 431125 }, { "epoch": 4.24, "grad_norm": 8.594948768615723, "learning_rate": 2.909966536586335e-06, "loss": 0.0578, "step": 431150 }, { "epoch": 4.24, "grad_norm": 11.735548973083496, "learning_rate": 2.9098424141320863e-06, "loss": 0.1005, "step": 431175 }, { "epoch": 4.24, "grad_norm": 1.6430083513259888, "learning_rate": 2.9097182916778375e-06, "loss": 0.0582, "step": 431200 }, { "epoch": 4.24, "grad_norm": 11.842473983764648, "learning_rate": 2.9095941692235896e-06, "loss": 0.1101, "step": 431225 }, { "epoch": 4.24, "grad_norm": 5.027395725250244, "learning_rate": 2.9094700467693408e-06, "loss": 0.0975, "step": 431250 }, { "epoch": 4.24, "grad_norm": 0.9224505424499512, "learning_rate": 2.9093459243150924e-06, "loss": 0.0832, "step": 431275 }, { "epoch": 4.24, "grad_norm": 0.5163919925689697, "learning_rate": 2.909221801860844e-06, "loss": 0.0709, "step": 431300 }, { "epoch": 4.24, "grad_norm": 16.50326156616211, "learning_rate": 2.9090976794065957e-06, "loss": 0.1386, "step": 431325 }, { "epoch": 4.24, "grad_norm": 2.2514073848724365, "learning_rate": 2.908973556952347e-06, "loss": 0.0699, "step": 431350 }, { "epoch": 4.24, "grad_norm": 9.08554744720459, "learning_rate": 2.908849434498099e-06, "loss": 0.1129, "step": 431375 }, { "epoch": 4.24, "grad_norm": 0.015160143375396729, "learning_rate": 2.90872531204385e-06, "loss": 0.0603, "step": 431400 }, { "epoch": 4.24, "grad_norm": 9.381400108337402, "learning_rate": 2.9086011895896014e-06, "loss": 0.0807, "step": 431425 }, { "epoch": 4.24, "grad_norm": 5.367090225219727, "learning_rate": 2.9084770671353534e-06, "loss": 0.0822, "step": 431450 }, { "epoch": 4.24, "grad_norm": 16.41261100769043, "learning_rate": 2.9083529446811047e-06, "loss": 0.0933, "step": 431475 }, { "epoch": 4.24, "grad_norm": 0.7982491254806519, "learning_rate": 2.9082288222268563e-06, "loss": 0.0635, "step": 431500 }, { "epoch": 4.24, "grad_norm": 15.486519813537598, "learning_rate": 2.908104699772608e-06, "loss": 0.1019, "step": 431525 }, { "epoch": 4.24, "grad_norm": 8.208595275878906, "learning_rate": 2.9079805773183596e-06, "loss": 0.0738, "step": 431550 }, { "epoch": 4.24, "grad_norm": 16.542495727539062, "learning_rate": 2.9078564548641108e-06, "loss": 0.1337, "step": 431575 }, { "epoch": 4.24, "grad_norm": 7.331182479858398, "learning_rate": 2.907732332409863e-06, "loss": 0.0599, "step": 431600 }, { "epoch": 4.24, "grad_norm": 15.493778228759766, "learning_rate": 2.907608209955614e-06, "loss": 0.1459, "step": 431625 }, { "epoch": 4.24, "grad_norm": 1.6079442501068115, "learning_rate": 2.9074840875013652e-06, "loss": 0.0797, "step": 431650 }, { "epoch": 4.24, "grad_norm": 16.332448959350586, "learning_rate": 2.9073599650471173e-06, "loss": 0.0824, "step": 431675 }, { "epoch": 4.24, "grad_norm": 3.315216302871704, "learning_rate": 2.9072358425928685e-06, "loss": 0.0541, "step": 431700 }, { "epoch": 4.24, "grad_norm": 15.66373062133789, "learning_rate": 2.90711172013862e-06, "loss": 0.0937, "step": 431725 }, { "epoch": 4.25, "grad_norm": 8.255159378051758, "learning_rate": 2.906987597684372e-06, "loss": 0.0883, "step": 431750 }, { "epoch": 4.25, "grad_norm": 16.82923698425293, "learning_rate": 2.9068634752301234e-06, "loss": 0.1208, "step": 431775 }, { "epoch": 4.25, "grad_norm": 3.5787999629974365, "learning_rate": 2.9067393527758746e-06, "loss": 0.0763, "step": 431800 }, { "epoch": 4.25, "grad_norm": 20.8298282623291, "learning_rate": 2.9066152303216263e-06, "loss": 0.1504, "step": 431825 }, { "epoch": 4.25, "grad_norm": 9.562972068786621, "learning_rate": 2.906491107867378e-06, "loss": 0.0519, "step": 431850 }, { "epoch": 4.25, "grad_norm": 13.50484848022461, "learning_rate": 2.9063669854131295e-06, "loss": 0.1298, "step": 431875 }, { "epoch": 4.25, "grad_norm": 0.9940403699874878, "learning_rate": 2.9062428629588808e-06, "loss": 0.064, "step": 431900 }, { "epoch": 4.25, "grad_norm": 7.594587802886963, "learning_rate": 2.906118740504633e-06, "loss": 0.0841, "step": 431925 }, { "epoch": 4.25, "grad_norm": 6.371484279632568, "learning_rate": 2.905994618050384e-06, "loss": 0.077, "step": 431950 }, { "epoch": 4.25, "grad_norm": 8.303385734558105, "learning_rate": 2.9058704955961352e-06, "loss": 0.1088, "step": 431975 }, { "epoch": 4.25, "grad_norm": 4.554176330566406, "learning_rate": 2.9057463731418873e-06, "loss": 0.0732, "step": 432000 }, { "epoch": 4.25, "grad_norm": 10.10586166381836, "learning_rate": 2.9056222506876385e-06, "loss": 0.0849, "step": 432025 }, { "epoch": 4.25, "grad_norm": 15.246715545654297, "learning_rate": 2.90549812823339e-06, "loss": 0.0963, "step": 432050 }, { "epoch": 4.25, "grad_norm": 12.847970008850098, "learning_rate": 2.9053740057791418e-06, "loss": 0.1035, "step": 432075 }, { "epoch": 4.25, "grad_norm": 8.890005111694336, "learning_rate": 2.9052498833248934e-06, "loss": 0.0734, "step": 432100 }, { "epoch": 4.25, "grad_norm": 13.125511169433594, "learning_rate": 2.9051257608706446e-06, "loss": 0.1114, "step": 432125 }, { "epoch": 4.25, "grad_norm": 0.97768235206604, "learning_rate": 2.9050016384163967e-06, "loss": 0.0896, "step": 432150 }, { "epoch": 4.25, "grad_norm": 15.558954238891602, "learning_rate": 2.904877515962148e-06, "loss": 0.107, "step": 432175 }, { "epoch": 4.25, "grad_norm": 4.7670512199401855, "learning_rate": 2.904753393507899e-06, "loss": 0.0561, "step": 432200 }, { "epoch": 4.25, "grad_norm": 12.810868263244629, "learning_rate": 2.904629271053651e-06, "loss": 0.0933, "step": 432225 }, { "epoch": 4.25, "grad_norm": 3.750570058822632, "learning_rate": 2.9045051485994024e-06, "loss": 0.0693, "step": 432250 }, { "epoch": 4.25, "grad_norm": 8.952750205993652, "learning_rate": 2.904381026145154e-06, "loss": 0.0866, "step": 432275 }, { "epoch": 4.25, "grad_norm": 3.361903667449951, "learning_rate": 2.9042569036909056e-06, "loss": 0.0692, "step": 432300 }, { "epoch": 4.25, "grad_norm": 11.374626159667969, "learning_rate": 2.9041327812366573e-06, "loss": 0.1025, "step": 432325 }, { "epoch": 4.25, "grad_norm": 3.0099740028381348, "learning_rate": 2.9040086587824085e-06, "loss": 0.0587, "step": 432350 }, { "epoch": 4.25, "grad_norm": 10.490819931030273, "learning_rate": 2.9038845363281605e-06, "loss": 0.1025, "step": 432375 }, { "epoch": 4.25, "grad_norm": 1.093788743019104, "learning_rate": 2.9037604138739118e-06, "loss": 0.0869, "step": 432400 }, { "epoch": 4.25, "grad_norm": 12.007123947143555, "learning_rate": 2.903636291419663e-06, "loss": 0.0988, "step": 432425 }, { "epoch": 4.25, "grad_norm": 0.7610961198806763, "learning_rate": 2.903512168965415e-06, "loss": 0.0621, "step": 432450 }, { "epoch": 4.25, "grad_norm": 22.675188064575195, "learning_rate": 2.9033880465111662e-06, "loss": 0.1235, "step": 432475 }, { "epoch": 4.25, "grad_norm": 4.3485188484191895, "learning_rate": 2.903263924056918e-06, "loss": 0.0904, "step": 432500 }, { "epoch": 4.25, "grad_norm": 12.262681007385254, "learning_rate": 2.9031398016026695e-06, "loss": 0.1184, "step": 432525 }, { "epoch": 4.25, "grad_norm": 6.362362384796143, "learning_rate": 2.903020644046591e-06, "loss": 0.0776, "step": 432550 }, { "epoch": 4.25, "grad_norm": 14.358469009399414, "learning_rate": 2.902896521592342e-06, "loss": 0.0899, "step": 432575 }, { "epoch": 4.25, "grad_norm": 2.2890689373016357, "learning_rate": 2.9027723991380942e-06, "loss": 0.0958, "step": 432600 }, { "epoch": 4.25, "grad_norm": 11.314291954040527, "learning_rate": 2.9026482766838454e-06, "loss": 0.1013, "step": 432625 }, { "epoch": 4.25, "grad_norm": 2.3203682899475098, "learning_rate": 2.9025241542295967e-06, "loss": 0.061, "step": 432650 }, { "epoch": 4.25, "grad_norm": 12.176506042480469, "learning_rate": 2.9024000317753487e-06, "loss": 0.1094, "step": 432675 }, { "epoch": 4.25, "grad_norm": 3.504943609237671, "learning_rate": 2.9022759093211e-06, "loss": 0.0933, "step": 432700 }, { "epoch": 4.25, "grad_norm": 10.445371627807617, "learning_rate": 2.9021517868668516e-06, "loss": 0.0808, "step": 432725 }, { "epoch": 4.25, "grad_norm": 2.9994444847106934, "learning_rate": 2.902027664412603e-06, "loss": 0.0444, "step": 432750 }, { "epoch": 4.26, "grad_norm": 12.8780517578125, "learning_rate": 2.901903541958355e-06, "loss": 0.1049, "step": 432775 }, { "epoch": 4.26, "grad_norm": 5.768780708312988, "learning_rate": 2.901779419504106e-06, "loss": 0.0787, "step": 432800 }, { "epoch": 4.26, "grad_norm": 14.643949508666992, "learning_rate": 2.901655297049858e-06, "loss": 0.1085, "step": 432825 }, { "epoch": 4.26, "grad_norm": 7.04815149307251, "learning_rate": 2.9015311745956093e-06, "loss": 0.0868, "step": 432850 }, { "epoch": 4.26, "grad_norm": 20.21918487548828, "learning_rate": 2.9014070521413605e-06, "loss": 0.1323, "step": 432875 }, { "epoch": 4.26, "grad_norm": 4.091075420379639, "learning_rate": 2.9012829296871126e-06, "loss": 0.0692, "step": 432900 }, { "epoch": 4.26, "grad_norm": 14.23721694946289, "learning_rate": 2.901158807232864e-06, "loss": 0.0929, "step": 432925 }, { "epoch": 4.26, "grad_norm": 0.054285820573568344, "learning_rate": 2.9010346847786154e-06, "loss": 0.0849, "step": 432950 }, { "epoch": 4.26, "grad_norm": 10.539813995361328, "learning_rate": 2.900910562324367e-06, "loss": 0.1187, "step": 432975 }, { "epoch": 4.26, "grad_norm": 0.009340992197394371, "learning_rate": 2.9007864398701187e-06, "loss": 0.0636, "step": 433000 }, { "epoch": 4.26, "grad_norm": 11.046135902404785, "learning_rate": 2.90066231741587e-06, "loss": 0.0928, "step": 433025 }, { "epoch": 4.26, "grad_norm": 2.837493658065796, "learning_rate": 2.900538194961622e-06, "loss": 0.0574, "step": 433050 }, { "epoch": 4.26, "grad_norm": 16.534648895263672, "learning_rate": 2.900414072507373e-06, "loss": 0.0938, "step": 433075 }, { "epoch": 4.26, "grad_norm": 4.746761322021484, "learning_rate": 2.9002899500531244e-06, "loss": 0.0693, "step": 433100 }, { "epoch": 4.26, "grad_norm": 14.838937759399414, "learning_rate": 2.9001658275988764e-06, "loss": 0.1397, "step": 433125 }, { "epoch": 4.26, "grad_norm": 1.0229275226593018, "learning_rate": 2.9000417051446277e-06, "loss": 0.0701, "step": 433150 }, { "epoch": 4.26, "grad_norm": 20.36020851135254, "learning_rate": 2.8999175826903793e-06, "loss": 0.0948, "step": 433175 }, { "epoch": 4.26, "grad_norm": 3.627614736557007, "learning_rate": 2.899793460236131e-06, "loss": 0.0741, "step": 433200 }, { "epoch": 4.26, "grad_norm": 3.99910569190979, "learning_rate": 2.8996693377818826e-06, "loss": 0.1258, "step": 433225 }, { "epoch": 4.26, "grad_norm": 4.154250621795654, "learning_rate": 2.8995452153276338e-06, "loss": 0.0623, "step": 433250 }, { "epoch": 4.26, "grad_norm": 11.316873550415039, "learning_rate": 2.899421092873385e-06, "loss": 0.067, "step": 433275 }, { "epoch": 4.26, "grad_norm": 0.5555662512779236, "learning_rate": 2.899296970419137e-06, "loss": 0.0988, "step": 433300 }, { "epoch": 4.26, "grad_norm": 8.410173416137695, "learning_rate": 2.8991728479648883e-06, "loss": 0.1101, "step": 433325 }, { "epoch": 4.26, "grad_norm": 2.1679329872131348, "learning_rate": 2.89904872551064e-06, "loss": 0.0576, "step": 433350 }, { "epoch": 4.26, "grad_norm": 4.952165126800537, "learning_rate": 2.8989246030563915e-06, "loss": 0.0961, "step": 433375 }, { "epoch": 4.26, "grad_norm": 0.5391991138458252, "learning_rate": 2.898800480602143e-06, "loss": 0.0872, "step": 433400 }, { "epoch": 4.26, "grad_norm": 15.305784225463867, "learning_rate": 2.8986763581478944e-06, "loss": 0.1157, "step": 433425 }, { "epoch": 4.26, "grad_norm": 9.765765190124512, "learning_rate": 2.8985522356936464e-06, "loss": 0.067, "step": 433450 }, { "epoch": 4.26, "grad_norm": 12.415589332580566, "learning_rate": 2.8984281132393976e-06, "loss": 0.1034, "step": 433475 }, { "epoch": 4.26, "grad_norm": 5.214875221252441, "learning_rate": 2.898303990785149e-06, "loss": 0.0776, "step": 433500 }, { "epoch": 4.26, "grad_norm": 19.09174346923828, "learning_rate": 2.898179868330901e-06, "loss": 0.1265, "step": 433525 }, { "epoch": 4.26, "grad_norm": 5.307643413543701, "learning_rate": 2.898055745876652e-06, "loss": 0.0759, "step": 433550 }, { "epoch": 4.26, "grad_norm": 13.264754295349121, "learning_rate": 2.8979316234224038e-06, "loss": 0.0963, "step": 433575 }, { "epoch": 4.26, "grad_norm": 3.7222506999969482, "learning_rate": 2.8978075009681554e-06, "loss": 0.0851, "step": 433600 }, { "epoch": 4.26, "grad_norm": 19.841388702392578, "learning_rate": 2.897683378513907e-06, "loss": 0.0983, "step": 433625 }, { "epoch": 4.26, "grad_norm": 1.9208734035491943, "learning_rate": 2.8975592560596582e-06, "loss": 0.0883, "step": 433650 }, { "epoch": 4.26, "grad_norm": 17.260738372802734, "learning_rate": 2.8974351336054103e-06, "loss": 0.1424, "step": 433675 }, { "epoch": 4.26, "grad_norm": 6.034989356994629, "learning_rate": 2.8973110111511615e-06, "loss": 0.0677, "step": 433700 }, { "epoch": 4.26, "grad_norm": 13.922229766845703, "learning_rate": 2.8971868886969127e-06, "loss": 0.1264, "step": 433725 }, { "epoch": 4.26, "grad_norm": 9.822916030883789, "learning_rate": 2.8970627662426648e-06, "loss": 0.0826, "step": 433750 }, { "epoch": 4.26, "grad_norm": 10.77241039276123, "learning_rate": 2.896938643788416e-06, "loss": 0.1104, "step": 433775 }, { "epoch": 4.27, "grad_norm": 0.153328076004982, "learning_rate": 2.8968145213341676e-06, "loss": 0.0625, "step": 433800 }, { "epoch": 4.27, "grad_norm": 6.843050956726074, "learning_rate": 2.8966903988799193e-06, "loss": 0.0766, "step": 433825 }, { "epoch": 4.27, "grad_norm": 11.683610916137695, "learning_rate": 2.896566276425671e-06, "loss": 0.0628, "step": 433850 }, { "epoch": 4.27, "grad_norm": 11.203181266784668, "learning_rate": 2.896442153971422e-06, "loss": 0.1014, "step": 433875 }, { "epoch": 4.27, "grad_norm": 0.5437051057815552, "learning_rate": 2.896318031517174e-06, "loss": 0.0636, "step": 433900 }, { "epoch": 4.27, "grad_norm": 18.653078079223633, "learning_rate": 2.8961939090629254e-06, "loss": 0.1076, "step": 433925 }, { "epoch": 4.27, "grad_norm": 1.782171368598938, "learning_rate": 2.8960697866086766e-06, "loss": 0.0725, "step": 433950 }, { "epoch": 4.27, "grad_norm": 11.157135963439941, "learning_rate": 2.8959456641544286e-06, "loss": 0.101, "step": 433975 }, { "epoch": 4.27, "grad_norm": 0.22052253782749176, "learning_rate": 2.89582154170018e-06, "loss": 0.0997, "step": 434000 }, { "epoch": 4.27, "grad_norm": 15.261667251586914, "learning_rate": 2.8956974192459315e-06, "loss": 0.1048, "step": 434025 }, { "epoch": 4.27, "grad_norm": 6.316566467285156, "learning_rate": 2.895573296791683e-06, "loss": 0.0659, "step": 434050 }, { "epoch": 4.27, "grad_norm": 13.36806869506836, "learning_rate": 2.8954491743374348e-06, "loss": 0.0962, "step": 434075 }, { "epoch": 4.27, "grad_norm": 4.1093926429748535, "learning_rate": 2.895325051883186e-06, "loss": 0.0863, "step": 434100 }, { "epoch": 4.27, "grad_norm": 10.154783248901367, "learning_rate": 2.895200929428937e-06, "loss": 0.1183, "step": 434125 }, { "epoch": 4.27, "grad_norm": 1.0026992559432983, "learning_rate": 2.8950768069746892e-06, "loss": 0.0929, "step": 434150 }, { "epoch": 4.27, "grad_norm": 14.007833480834961, "learning_rate": 2.8949526845204405e-06, "loss": 0.0929, "step": 434175 }, { "epoch": 4.27, "grad_norm": 7.099582672119141, "learning_rate": 2.894828562066192e-06, "loss": 0.0556, "step": 434200 }, { "epoch": 4.27, "grad_norm": 7.646145820617676, "learning_rate": 2.8947044396119437e-06, "loss": 0.0937, "step": 434225 }, { "epoch": 4.27, "grad_norm": 6.015811920166016, "learning_rate": 2.8945803171576954e-06, "loss": 0.0704, "step": 434250 }, { "epoch": 4.27, "grad_norm": 2.2214691638946533, "learning_rate": 2.8944561947034466e-06, "loss": 0.1255, "step": 434275 }, { "epoch": 4.27, "grad_norm": 0.8518967628479004, "learning_rate": 2.8943320722491986e-06, "loss": 0.0754, "step": 434300 }, { "epoch": 4.27, "grad_norm": 30.313871383666992, "learning_rate": 2.89420794979495e-06, "loss": 0.1116, "step": 434325 }, { "epoch": 4.27, "grad_norm": 0.3273144066333771, "learning_rate": 2.894083827340701e-06, "loss": 0.0646, "step": 434350 }, { "epoch": 4.27, "grad_norm": 8.35400676727295, "learning_rate": 2.893959704886453e-06, "loss": 0.1198, "step": 434375 }, { "epoch": 4.27, "grad_norm": 5.748983383178711, "learning_rate": 2.8938355824322043e-06, "loss": 0.0751, "step": 434400 }, { "epoch": 4.27, "grad_norm": 15.80382251739502, "learning_rate": 2.893711459977956e-06, "loss": 0.1195, "step": 434425 }, { "epoch": 4.27, "grad_norm": 7.113263130187988, "learning_rate": 2.8935873375237076e-06, "loss": 0.0583, "step": 434450 }, { "epoch": 4.27, "grad_norm": 16.20836067199707, "learning_rate": 2.8934632150694592e-06, "loss": 0.0882, "step": 434475 }, { "epoch": 4.27, "grad_norm": 0.37429118156433105, "learning_rate": 2.8933390926152104e-06, "loss": 0.0706, "step": 434500 }, { "epoch": 4.27, "grad_norm": 17.57181739807129, "learning_rate": 2.8932149701609625e-06, "loss": 0.1087, "step": 434525 }, { "epoch": 4.27, "grad_norm": 6.587950706481934, "learning_rate": 2.8930908477067137e-06, "loss": 0.0574, "step": 434550 }, { "epoch": 4.27, "grad_norm": 0.9985365867614746, "learning_rate": 2.8929716901506356e-06, "loss": 0.1076, "step": 434575 }, { "epoch": 4.27, "grad_norm": 0.08616352826356888, "learning_rate": 2.892847567696387e-06, "loss": 0.0683, "step": 434600 }, { "epoch": 4.27, "grad_norm": 10.588348388671875, "learning_rate": 2.892723445242138e-06, "loss": 0.1044, "step": 434625 }, { "epoch": 4.27, "grad_norm": 8.486242294311523, "learning_rate": 2.89259932278789e-06, "loss": 0.0874, "step": 434650 }, { "epoch": 4.27, "grad_norm": 2.7298643589019775, "learning_rate": 2.8924752003336413e-06, "loss": 0.114, "step": 434675 }, { "epoch": 4.27, "grad_norm": 2.5321831703186035, "learning_rate": 2.892351077879393e-06, "loss": 0.0956, "step": 434700 }, { "epoch": 4.27, "grad_norm": 10.855010032653809, "learning_rate": 2.892226955425144e-06, "loss": 0.1231, "step": 434725 }, { "epoch": 4.27, "grad_norm": 0.045537110418081284, "learning_rate": 2.892102832970896e-06, "loss": 0.0568, "step": 434750 }, { "epoch": 4.27, "grad_norm": 16.538694381713867, "learning_rate": 2.8919787105166474e-06, "loss": 0.144, "step": 434775 }, { "epoch": 4.28, "grad_norm": 15.195513725280762, "learning_rate": 2.891854588062399e-06, "loss": 0.0681, "step": 434800 }, { "epoch": 4.28, "grad_norm": 7.5039167404174805, "learning_rate": 2.8917304656081507e-06, "loss": 0.1025, "step": 434825 }, { "epoch": 4.28, "grad_norm": 4.038573741912842, "learning_rate": 2.8916063431539023e-06, "loss": 0.0721, "step": 434850 }, { "epoch": 4.28, "grad_norm": 16.195486068725586, "learning_rate": 2.8914822206996535e-06, "loss": 0.0951, "step": 434875 }, { "epoch": 4.28, "grad_norm": 8.310830116271973, "learning_rate": 2.8913580982454056e-06, "loss": 0.0714, "step": 434900 }, { "epoch": 4.28, "grad_norm": 14.192693710327148, "learning_rate": 2.8912339757911568e-06, "loss": 0.0879, "step": 434925 }, { "epoch": 4.28, "grad_norm": 8.560589790344238, "learning_rate": 2.891109853336908e-06, "loss": 0.0656, "step": 434950 }, { "epoch": 4.28, "grad_norm": 9.616023063659668, "learning_rate": 2.89098573088266e-06, "loss": 0.1318, "step": 434975 }, { "epoch": 4.28, "grad_norm": 4.582209587097168, "learning_rate": 2.8908616084284113e-06, "loss": 0.0612, "step": 435000 }, { "epoch": 4.28, "grad_norm": 17.923383712768555, "learning_rate": 2.890737485974163e-06, "loss": 0.115, "step": 435025 }, { "epoch": 4.28, "grad_norm": 4.080602645874023, "learning_rate": 2.8906133635199145e-06, "loss": 0.0648, "step": 435050 }, { "epoch": 4.28, "grad_norm": 14.161553382873535, "learning_rate": 2.890489241065666e-06, "loss": 0.1518, "step": 435075 }, { "epoch": 4.28, "grad_norm": 0.3865162432193756, "learning_rate": 2.8903651186114174e-06, "loss": 0.0972, "step": 435100 }, { "epoch": 4.28, "grad_norm": 6.241672992706299, "learning_rate": 2.8902409961571694e-06, "loss": 0.0984, "step": 435125 }, { "epoch": 4.28, "grad_norm": 1.5897746086120605, "learning_rate": 2.8901168737029206e-06, "loss": 0.0454, "step": 435150 }, { "epoch": 4.28, "grad_norm": 15.279716491699219, "learning_rate": 2.889992751248672e-06, "loss": 0.0943, "step": 435175 }, { "epoch": 4.28, "grad_norm": 3.069725275039673, "learning_rate": 2.889868628794424e-06, "loss": 0.0918, "step": 435200 }, { "epoch": 4.28, "grad_norm": 24.868928909301758, "learning_rate": 2.889744506340175e-06, "loss": 0.1066, "step": 435225 }, { "epoch": 4.28, "grad_norm": 5.208556175231934, "learning_rate": 2.8896203838859268e-06, "loss": 0.0972, "step": 435250 }, { "epoch": 4.28, "grad_norm": 9.208744049072266, "learning_rate": 2.8894962614316784e-06, "loss": 0.1153, "step": 435275 }, { "epoch": 4.28, "grad_norm": 0.2355814427137375, "learning_rate": 2.88937213897743e-06, "loss": 0.0638, "step": 435300 }, { "epoch": 4.28, "grad_norm": 12.14683723449707, "learning_rate": 2.8892480165231812e-06, "loss": 0.0899, "step": 435325 }, { "epoch": 4.28, "grad_norm": 5.957228660583496, "learning_rate": 2.8891238940689333e-06, "loss": 0.0702, "step": 435350 }, { "epoch": 4.28, "grad_norm": 21.577293395996094, "learning_rate": 2.8889997716146845e-06, "loss": 0.0892, "step": 435375 }, { "epoch": 4.28, "grad_norm": 11.749526023864746, "learning_rate": 2.8888756491604357e-06, "loss": 0.0864, "step": 435400 }, { "epoch": 4.28, "grad_norm": 7.872488975524902, "learning_rate": 2.8887515267061878e-06, "loss": 0.0936, "step": 435425 }, { "epoch": 4.28, "grad_norm": 4.125258445739746, "learning_rate": 2.888627404251939e-06, "loss": 0.0686, "step": 435450 }, { "epoch": 4.28, "grad_norm": 10.215959548950195, "learning_rate": 2.8885032817976906e-06, "loss": 0.0931, "step": 435475 }, { "epoch": 4.28, "grad_norm": 7.413440704345703, "learning_rate": 2.8883791593434423e-06, "loss": 0.0782, "step": 435500 }, { "epoch": 4.28, "grad_norm": 15.215694427490234, "learning_rate": 2.888255036889194e-06, "loss": 0.0858, "step": 435525 }, { "epoch": 4.28, "grad_norm": 6.628875732421875, "learning_rate": 2.888130914434945e-06, "loss": 0.0748, "step": 435550 }, { "epoch": 4.28, "grad_norm": 13.836155891418457, "learning_rate": 2.8880067919806963e-06, "loss": 0.0923, "step": 435575 }, { "epoch": 4.28, "grad_norm": 1.991867184638977, "learning_rate": 2.8878826695264484e-06, "loss": 0.0608, "step": 435600 }, { "epoch": 4.28, "grad_norm": 15.193029403686523, "learning_rate": 2.8877585470721996e-06, "loss": 0.1157, "step": 435625 }, { "epoch": 4.28, "grad_norm": 2.685720920562744, "learning_rate": 2.8876344246179512e-06, "loss": 0.0613, "step": 435650 }, { "epoch": 4.28, "grad_norm": 14.018903732299805, "learning_rate": 2.887510302163703e-06, "loss": 0.0942, "step": 435675 }, { "epoch": 4.28, "grad_norm": 3.3470230102539062, "learning_rate": 2.8873861797094545e-06, "loss": 0.076, "step": 435700 }, { "epoch": 4.28, "grad_norm": 20.635347366333008, "learning_rate": 2.8872620572552057e-06, "loss": 0.1392, "step": 435725 }, { "epoch": 4.28, "grad_norm": 4.8870463371276855, "learning_rate": 2.8871379348009578e-06, "loss": 0.0686, "step": 435750 }, { "epoch": 4.28, "grad_norm": 16.111106872558594, "learning_rate": 2.887013812346709e-06, "loss": 0.0978, "step": 435775 }, { "epoch": 4.28, "grad_norm": 2.4788308143615723, "learning_rate": 2.88688968989246e-06, "loss": 0.0536, "step": 435800 }, { "epoch": 4.29, "grad_norm": 18.78496551513672, "learning_rate": 2.8867655674382123e-06, "loss": 0.1056, "step": 435825 }, { "epoch": 4.29, "grad_norm": 1.4311236143112183, "learning_rate": 2.8866414449839635e-06, "loss": 0.0561, "step": 435850 }, { "epoch": 4.29, "grad_norm": 9.760412216186523, "learning_rate": 2.886517322529715e-06, "loss": 0.0824, "step": 435875 }, { "epoch": 4.29, "grad_norm": 2.7849154472351074, "learning_rate": 2.8863932000754667e-06, "loss": 0.1087, "step": 435900 }, { "epoch": 4.29, "grad_norm": 11.438883781433105, "learning_rate": 2.8862690776212184e-06, "loss": 0.1153, "step": 435925 }, { "epoch": 4.29, "grad_norm": 5.611280918121338, "learning_rate": 2.8861449551669696e-06, "loss": 0.0716, "step": 435950 }, { "epoch": 4.29, "grad_norm": 19.60987091064453, "learning_rate": 2.8860208327127216e-06, "loss": 0.0959, "step": 435975 }, { "epoch": 4.29, "grad_norm": 4.412648677825928, "learning_rate": 2.885896710258473e-06, "loss": 0.0756, "step": 436000 }, { "epoch": 4.29, "grad_norm": 16.28611946105957, "learning_rate": 2.885772587804224e-06, "loss": 0.1105, "step": 436025 }, { "epoch": 4.29, "grad_norm": 5.571461200714111, "learning_rate": 2.885648465349976e-06, "loss": 0.0754, "step": 436050 }, { "epoch": 4.29, "grad_norm": 2.360488176345825, "learning_rate": 2.8855243428957273e-06, "loss": 0.0783, "step": 436075 }, { "epoch": 4.29, "grad_norm": 4.271025657653809, "learning_rate": 2.885400220441479e-06, "loss": 0.0601, "step": 436100 }, { "epoch": 4.29, "grad_norm": 21.2075252532959, "learning_rate": 2.8852760979872306e-06, "loss": 0.1077, "step": 436125 }, { "epoch": 4.29, "grad_norm": 2.65586256980896, "learning_rate": 2.8851519755329822e-06, "loss": 0.0884, "step": 436150 }, { "epoch": 4.29, "grad_norm": 25.707624435424805, "learning_rate": 2.8850278530787334e-06, "loss": 0.1016, "step": 436175 }, { "epoch": 4.29, "grad_norm": 0.40825119614601135, "learning_rate": 2.8849037306244855e-06, "loss": 0.0877, "step": 436200 }, { "epoch": 4.29, "grad_norm": 13.771060943603516, "learning_rate": 2.8847796081702367e-06, "loss": 0.0887, "step": 436225 }, { "epoch": 4.29, "grad_norm": 0.37693193554878235, "learning_rate": 2.884655485715988e-06, "loss": 0.0924, "step": 436250 }, { "epoch": 4.29, "grad_norm": 13.065619468688965, "learning_rate": 2.88453136326174e-06, "loss": 0.0963, "step": 436275 }, { "epoch": 4.29, "grad_norm": 0.6276748776435852, "learning_rate": 2.884407240807491e-06, "loss": 0.0804, "step": 436300 }, { "epoch": 4.29, "grad_norm": 15.820962905883789, "learning_rate": 2.884283118353243e-06, "loss": 0.1437, "step": 436325 }, { "epoch": 4.29, "grad_norm": 5.809247970581055, "learning_rate": 2.8841589958989945e-06, "loss": 0.0857, "step": 436350 }, { "epoch": 4.29, "grad_norm": 3.2142255306243896, "learning_rate": 2.884034873444746e-06, "loss": 0.0937, "step": 436375 }, { "epoch": 4.29, "grad_norm": 0.18598787486553192, "learning_rate": 2.8839107509904973e-06, "loss": 0.0911, "step": 436400 }, { "epoch": 4.29, "grad_norm": 6.23631477355957, "learning_rate": 2.8837866285362485e-06, "loss": 0.0949, "step": 436425 }, { "epoch": 4.29, "grad_norm": 5.5561981201171875, "learning_rate": 2.8836625060820006e-06, "loss": 0.0586, "step": 436450 }, { "epoch": 4.29, "grad_norm": 14.33171272277832, "learning_rate": 2.883538383627752e-06, "loss": 0.08, "step": 436475 }, { "epoch": 4.29, "grad_norm": 8.620857238769531, "learning_rate": 2.8834142611735034e-06, "loss": 0.0644, "step": 436500 }, { "epoch": 4.29, "grad_norm": 14.924455642700195, "learning_rate": 2.883290138719255e-06, "loss": 0.0988, "step": 436525 }, { "epoch": 4.29, "grad_norm": 3.1170737743377686, "learning_rate": 2.8831660162650067e-06, "loss": 0.0811, "step": 436550 }, { "epoch": 4.29, "grad_norm": 9.198440551757812, "learning_rate": 2.883041893810758e-06, "loss": 0.1317, "step": 436575 }, { "epoch": 4.29, "grad_norm": 4.36384391784668, "learning_rate": 2.88291777135651e-06, "loss": 0.104, "step": 436600 }, { "epoch": 4.29, "grad_norm": 14.530078887939453, "learning_rate": 2.882793648902261e-06, "loss": 0.1156, "step": 436625 }, { "epoch": 4.29, "grad_norm": 2.714146852493286, "learning_rate": 2.8826695264480124e-06, "loss": 0.0747, "step": 436650 }, { "epoch": 4.29, "grad_norm": 8.58531379699707, "learning_rate": 2.8825454039937645e-06, "loss": 0.0987, "step": 436675 }, { "epoch": 4.29, "grad_norm": 9.333267211914062, "learning_rate": 2.8824212815395157e-06, "loss": 0.0634, "step": 436700 }, { "epoch": 4.29, "grad_norm": 6.931682586669922, "learning_rate": 2.8822971590852673e-06, "loss": 0.1204, "step": 436725 }, { "epoch": 4.29, "grad_norm": 0.8309662938117981, "learning_rate": 2.8821780015291887e-06, "loss": 0.0743, "step": 436750 }, { "epoch": 4.29, "grad_norm": 8.062469482421875, "learning_rate": 2.8820538790749404e-06, "loss": 0.1046, "step": 436775 }, { "epoch": 4.29, "grad_norm": 5.326353073120117, "learning_rate": 2.881929756620692e-06, "loss": 0.0873, "step": 436800 }, { "epoch": 4.29, "grad_norm": 11.533651351928711, "learning_rate": 2.8818056341664437e-06, "loss": 0.1277, "step": 436825 }, { "epoch": 4.3, "grad_norm": 8.890880584716797, "learning_rate": 2.881681511712195e-06, "loss": 0.0794, "step": 436850 }, { "epoch": 4.3, "grad_norm": 8.97533130645752, "learning_rate": 2.881557389257947e-06, "loss": 0.128, "step": 436875 }, { "epoch": 4.3, "grad_norm": 0.3696663975715637, "learning_rate": 2.881433266803698e-06, "loss": 0.0834, "step": 436900 }, { "epoch": 4.3, "grad_norm": 11.033248901367188, "learning_rate": 2.8813091443494493e-06, "loss": 0.1355, "step": 436925 }, { "epoch": 4.3, "grad_norm": 1.3929356336593628, "learning_rate": 2.8811850218952014e-06, "loss": 0.0749, "step": 436950 }, { "epoch": 4.3, "grad_norm": 15.230864524841309, "learning_rate": 2.8810608994409526e-06, "loss": 0.1122, "step": 436975 }, { "epoch": 4.3, "grad_norm": 1.4289913177490234, "learning_rate": 2.8809367769867043e-06, "loss": 0.053, "step": 437000 }, { "epoch": 4.3, "grad_norm": 14.787653923034668, "learning_rate": 2.8808126545324555e-06, "loss": 0.1366, "step": 437025 }, { "epoch": 4.3, "grad_norm": 5.273375511169434, "learning_rate": 2.8806885320782075e-06, "loss": 0.064, "step": 437050 }, { "epoch": 4.3, "grad_norm": 16.502830505371094, "learning_rate": 2.8805644096239587e-06, "loss": 0.1044, "step": 437075 }, { "epoch": 4.3, "grad_norm": 2.7416727542877197, "learning_rate": 2.88044028716971e-06, "loss": 0.0921, "step": 437100 }, { "epoch": 4.3, "grad_norm": 17.637928009033203, "learning_rate": 2.880316164715462e-06, "loss": 0.107, "step": 437125 }, { "epoch": 4.3, "grad_norm": 5.581762313842773, "learning_rate": 2.8801920422612132e-06, "loss": 0.088, "step": 437150 }, { "epoch": 4.3, "grad_norm": 16.21051597595215, "learning_rate": 2.880067919806965e-06, "loss": 0.1123, "step": 437175 }, { "epoch": 4.3, "grad_norm": 1.7166268825531006, "learning_rate": 2.8799437973527165e-06, "loss": 0.0718, "step": 437200 }, { "epoch": 4.3, "grad_norm": 17.981000900268555, "learning_rate": 2.879819674898468e-06, "loss": 0.1043, "step": 437225 }, { "epoch": 4.3, "grad_norm": 6.696577072143555, "learning_rate": 2.8796955524442193e-06, "loss": 0.0822, "step": 437250 }, { "epoch": 4.3, "grad_norm": 12.370140075683594, "learning_rate": 2.8795714299899714e-06, "loss": 0.0938, "step": 437275 }, { "epoch": 4.3, "grad_norm": 3.9320852756500244, "learning_rate": 2.8794473075357226e-06, "loss": 0.1054, "step": 437300 }, { "epoch": 4.3, "grad_norm": 16.664159774780273, "learning_rate": 2.879323185081474e-06, "loss": 0.0854, "step": 437325 }, { "epoch": 4.3, "grad_norm": 1.0698261260986328, "learning_rate": 2.879199062627226e-06, "loss": 0.0582, "step": 437350 }, { "epoch": 4.3, "grad_norm": 10.63956069946289, "learning_rate": 2.879074940172977e-06, "loss": 0.1103, "step": 437375 }, { "epoch": 4.3, "grad_norm": 3.3368446826934814, "learning_rate": 2.8789508177187287e-06, "loss": 0.083, "step": 437400 }, { "epoch": 4.3, "grad_norm": 12.698898315429688, "learning_rate": 2.8788266952644804e-06, "loss": 0.0921, "step": 437425 }, { "epoch": 4.3, "grad_norm": 8.629142761230469, "learning_rate": 2.878702572810232e-06, "loss": 0.0738, "step": 437450 }, { "epoch": 4.3, "grad_norm": 15.957053184509277, "learning_rate": 2.878578450355983e-06, "loss": 0.1278, "step": 437475 }, { "epoch": 4.3, "grad_norm": 3.863659620285034, "learning_rate": 2.8784543279017353e-06, "loss": 0.0562, "step": 437500 }, { "epoch": 4.3, "grad_norm": 15.420735359191895, "learning_rate": 2.8783302054474865e-06, "loss": 0.0879, "step": 437525 }, { "epoch": 4.3, "grad_norm": 8.072247505187988, "learning_rate": 2.878206082993238e-06, "loss": 0.0684, "step": 437550 }, { "epoch": 4.3, "grad_norm": 14.931394577026367, "learning_rate": 2.8780819605389897e-06, "loss": 0.1252, "step": 437575 }, { "epoch": 4.3, "grad_norm": 9.585262298583984, "learning_rate": 2.877957838084741e-06, "loss": 0.0942, "step": 437600 }, { "epoch": 4.3, "grad_norm": 12.691974639892578, "learning_rate": 2.8778337156304926e-06, "loss": 0.0982, "step": 437625 }, { "epoch": 4.3, "grad_norm": 9.224727630615234, "learning_rate": 2.8777095931762442e-06, "loss": 0.0736, "step": 437650 }, { "epoch": 4.3, "grad_norm": 9.81789493560791, "learning_rate": 2.877585470721996e-06, "loss": 0.0869, "step": 437675 }, { "epoch": 4.3, "grad_norm": 1.7189375162124634, "learning_rate": 2.877461348267747e-06, "loss": 0.0756, "step": 437700 }, { "epoch": 4.3, "grad_norm": 14.05833625793457, "learning_rate": 2.877337225813499e-06, "loss": 0.1279, "step": 437725 }, { "epoch": 4.3, "grad_norm": 2.4404594898223877, "learning_rate": 2.8772131033592503e-06, "loss": 0.075, "step": 437750 }, { "epoch": 4.3, "grad_norm": 16.279634475708008, "learning_rate": 2.877088980905002e-06, "loss": 0.0868, "step": 437775 }, { "epoch": 4.3, "grad_norm": 3.0338258743286133, "learning_rate": 2.8769648584507536e-06, "loss": 0.0769, "step": 437800 }, { "epoch": 4.3, "grad_norm": 4.425958156585693, "learning_rate": 2.8768407359965052e-06, "loss": 0.108, "step": 437825 }, { "epoch": 4.31, "grad_norm": 2.1773946285247803, "learning_rate": 2.8767166135422565e-06, "loss": 0.0817, "step": 437850 }, { "epoch": 4.31, "grad_norm": 9.111875534057617, "learning_rate": 2.8765924910880077e-06, "loss": 0.1326, "step": 437875 }, { "epoch": 4.31, "grad_norm": 1.032721996307373, "learning_rate": 2.8764683686337597e-06, "loss": 0.0695, "step": 437900 }, { "epoch": 4.31, "grad_norm": 13.28272819519043, "learning_rate": 2.876344246179511e-06, "loss": 0.1155, "step": 437925 }, { "epoch": 4.31, "grad_norm": 0.10199084132909775, "learning_rate": 2.8762201237252626e-06, "loss": 0.0643, "step": 437950 }, { "epoch": 4.31, "grad_norm": 12.184488296508789, "learning_rate": 2.876096001271014e-06, "loss": 0.1184, "step": 437975 }, { "epoch": 4.31, "grad_norm": 5.324767112731934, "learning_rate": 2.875971878816766e-06, "loss": 0.0758, "step": 438000 }, { "epoch": 4.31, "grad_norm": 23.33023452758789, "learning_rate": 2.875847756362517e-06, "loss": 0.0958, "step": 438025 }, { "epoch": 4.31, "grad_norm": 1.208994746208191, "learning_rate": 2.875723633908269e-06, "loss": 0.0784, "step": 438050 }, { "epoch": 4.31, "grad_norm": 3.061448097229004, "learning_rate": 2.8755995114540203e-06, "loss": 0.1173, "step": 438075 }, { "epoch": 4.31, "grad_norm": 4.934471607208252, "learning_rate": 2.8754753889997715e-06, "loss": 0.0907, "step": 438100 }, { "epoch": 4.31, "grad_norm": 12.466889381408691, "learning_rate": 2.8753512665455236e-06, "loss": 0.1008, "step": 438125 }, { "epoch": 4.31, "grad_norm": 0.8797290921211243, "learning_rate": 2.875227144091275e-06, "loss": 0.0895, "step": 438150 }, { "epoch": 4.31, "grad_norm": 9.120405197143555, "learning_rate": 2.8751030216370264e-06, "loss": 0.0953, "step": 438175 }, { "epoch": 4.31, "grad_norm": 0.4895288348197937, "learning_rate": 2.874978899182778e-06, "loss": 0.0663, "step": 438200 }, { "epoch": 4.31, "grad_norm": 10.665188789367676, "learning_rate": 2.8748547767285297e-06, "loss": 0.1033, "step": 438225 }, { "epoch": 4.31, "grad_norm": 2.339243173599243, "learning_rate": 2.874730654274281e-06, "loss": 0.0614, "step": 438250 }, { "epoch": 4.31, "grad_norm": 12.60081672668457, "learning_rate": 2.874606531820033e-06, "loss": 0.1189, "step": 438275 }, { "epoch": 4.31, "grad_norm": 2.7417097091674805, "learning_rate": 2.874482409365784e-06, "loss": 0.0814, "step": 438300 }, { "epoch": 4.31, "grad_norm": 14.686576843261719, "learning_rate": 2.8743582869115354e-06, "loss": 0.0992, "step": 438325 }, { "epoch": 4.31, "grad_norm": 8.898248672485352, "learning_rate": 2.8742341644572875e-06, "loss": 0.0759, "step": 438350 }, { "epoch": 4.31, "grad_norm": 15.879420280456543, "learning_rate": 2.8741100420030387e-06, "loss": 0.1049, "step": 438375 }, { "epoch": 4.31, "grad_norm": 1.6216686964035034, "learning_rate": 2.8739859195487903e-06, "loss": 0.0763, "step": 438400 }, { "epoch": 4.31, "grad_norm": 15.038125038146973, "learning_rate": 2.873861797094542e-06, "loss": 0.1179, "step": 438425 }, { "epoch": 4.31, "grad_norm": 5.046276092529297, "learning_rate": 2.8737376746402936e-06, "loss": 0.0762, "step": 438450 }, { "epoch": 4.31, "grad_norm": 13.715439796447754, "learning_rate": 2.8736135521860448e-06, "loss": 0.0884, "step": 438475 }, { "epoch": 4.31, "grad_norm": 0.704587459564209, "learning_rate": 2.873489429731797e-06, "loss": 0.0837, "step": 438500 }, { "epoch": 4.31, "grad_norm": 20.68568992614746, "learning_rate": 2.873365307277548e-06, "loss": 0.1317, "step": 438525 }, { "epoch": 4.31, "grad_norm": 6.2509965896606445, "learning_rate": 2.8732411848232993e-06, "loss": 0.087, "step": 438550 }, { "epoch": 4.31, "grad_norm": 17.800121307373047, "learning_rate": 2.8731170623690513e-06, "loss": 0.1011, "step": 438575 }, { "epoch": 4.31, "grad_norm": 14.17154312133789, "learning_rate": 2.8729929399148025e-06, "loss": 0.0693, "step": 438600 }, { "epoch": 4.31, "grad_norm": 18.1710262298584, "learning_rate": 2.872868817460554e-06, "loss": 0.0946, "step": 438625 }, { "epoch": 4.31, "grad_norm": 5.410688877105713, "learning_rate": 2.872744695006306e-06, "loss": 0.0814, "step": 438650 }, { "epoch": 4.31, "grad_norm": 13.16696548461914, "learning_rate": 2.8726205725520574e-06, "loss": 0.1317, "step": 438675 }, { "epoch": 4.31, "grad_norm": 2.945878028869629, "learning_rate": 2.8724964500978087e-06, "loss": 0.0773, "step": 438700 }, { "epoch": 4.31, "grad_norm": 10.092045783996582, "learning_rate": 2.87237232764356e-06, "loss": 0.1054, "step": 438725 }, { "epoch": 4.31, "grad_norm": 0.9179011583328247, "learning_rate": 2.872248205189312e-06, "loss": 0.0903, "step": 438750 }, { "epoch": 4.31, "grad_norm": 11.800586700439453, "learning_rate": 2.872124082735063e-06, "loss": 0.1204, "step": 438775 }, { "epoch": 4.31, "grad_norm": 0.5816435217857361, "learning_rate": 2.8719999602808148e-06, "loss": 0.0635, "step": 438800 }, { "epoch": 4.31, "grad_norm": 9.080121994018555, "learning_rate": 2.8718758378265664e-06, "loss": 0.1185, "step": 438825 }, { "epoch": 4.31, "grad_norm": 3.307936906814575, "learning_rate": 2.871751715372318e-06, "loss": 0.0699, "step": 438850 }, { "epoch": 4.32, "grad_norm": 11.998173713684082, "learning_rate": 2.8716275929180693e-06, "loss": 0.103, "step": 438875 }, { "epoch": 4.32, "grad_norm": 0.4265938699245453, "learning_rate": 2.8715034704638213e-06, "loss": 0.0595, "step": 438900 }, { "epoch": 4.32, "grad_norm": 14.338784217834473, "learning_rate": 2.8713793480095725e-06, "loss": 0.1096, "step": 438925 }, { "epoch": 4.32, "grad_norm": 1.8581013679504395, "learning_rate": 2.8712552255553237e-06, "loss": 0.0633, "step": 438950 }, { "epoch": 4.32, "grad_norm": 12.178263664245605, "learning_rate": 2.871131103101076e-06, "loss": 0.1029, "step": 438975 }, { "epoch": 4.32, "grad_norm": 0.3159368336200714, "learning_rate": 2.871006980646827e-06, "loss": 0.0871, "step": 439000 }, { "epoch": 4.32, "grad_norm": 16.965059280395508, "learning_rate": 2.8708828581925786e-06, "loss": 0.1128, "step": 439025 }, { "epoch": 4.32, "grad_norm": 2.600705623626709, "learning_rate": 2.8707587357383303e-06, "loss": 0.1043, "step": 439050 }, { "epoch": 4.32, "grad_norm": 13.314249038696289, "learning_rate": 2.870634613284082e-06, "loss": 0.1221, "step": 439075 }, { "epoch": 4.32, "grad_norm": 3.6991124153137207, "learning_rate": 2.870510490829833e-06, "loss": 0.0798, "step": 439100 }, { "epoch": 4.32, "grad_norm": 10.735278129577637, "learning_rate": 2.870386368375585e-06, "loss": 0.1046, "step": 439125 }, { "epoch": 4.32, "grad_norm": 7.779467582702637, "learning_rate": 2.8702622459213364e-06, "loss": 0.0715, "step": 439150 }, { "epoch": 4.32, "grad_norm": 10.252523422241211, "learning_rate": 2.8701381234670876e-06, "loss": 0.0916, "step": 439175 }, { "epoch": 4.32, "grad_norm": 4.268653392791748, "learning_rate": 2.8700140010128397e-06, "loss": 0.0733, "step": 439200 }, { "epoch": 4.32, "grad_norm": 9.671387672424316, "learning_rate": 2.869889878558591e-06, "loss": 0.1035, "step": 439225 }, { "epoch": 4.32, "grad_norm": 10.027098655700684, "learning_rate": 2.8697657561043425e-06, "loss": 0.0921, "step": 439250 }, { "epoch": 4.32, "grad_norm": 9.620929718017578, "learning_rate": 2.869641633650094e-06, "loss": 0.105, "step": 439275 }, { "epoch": 4.32, "grad_norm": 2.9828696250915527, "learning_rate": 2.8695175111958458e-06, "loss": 0.0923, "step": 439300 }, { "epoch": 4.32, "grad_norm": 16.511037826538086, "learning_rate": 2.869393388741597e-06, "loss": 0.1311, "step": 439325 }, { "epoch": 4.32, "grad_norm": 7.413293361663818, "learning_rate": 2.869269266287349e-06, "loss": 0.0715, "step": 439350 }, { "epoch": 4.32, "grad_norm": 16.270971298217773, "learning_rate": 2.8691451438331003e-06, "loss": 0.0984, "step": 439375 }, { "epoch": 4.32, "grad_norm": 4.249249458312988, "learning_rate": 2.8690210213788515e-06, "loss": 0.0756, "step": 439400 }, { "epoch": 4.32, "grad_norm": 9.102519989013672, "learning_rate": 2.8688968989246035e-06, "loss": 0.1046, "step": 439425 }, { "epoch": 4.32, "grad_norm": 7.956279277801514, "learning_rate": 2.8687727764703547e-06, "loss": 0.0942, "step": 439450 }, { "epoch": 4.32, "grad_norm": 9.561027526855469, "learning_rate": 2.8686486540161064e-06, "loss": 0.1115, "step": 439475 }, { "epoch": 4.32, "grad_norm": 2.2084290981292725, "learning_rate": 2.868524531561858e-06, "loss": 0.0841, "step": 439500 }, { "epoch": 4.32, "grad_norm": 12.441489219665527, "learning_rate": 2.8684004091076096e-06, "loss": 0.1253, "step": 439525 }, { "epoch": 4.32, "grad_norm": 2.529632568359375, "learning_rate": 2.868276286653361e-06, "loss": 0.0621, "step": 439550 }, { "epoch": 4.32, "grad_norm": 8.9115629196167, "learning_rate": 2.868152164199112e-06, "loss": 0.1081, "step": 439575 }, { "epoch": 4.32, "grad_norm": 2.888052225112915, "learning_rate": 2.868028041744864e-06, "loss": 0.0737, "step": 439600 }, { "epoch": 4.32, "grad_norm": 6.608593940734863, "learning_rate": 2.8679039192906153e-06, "loss": 0.1251, "step": 439625 }, { "epoch": 4.32, "grad_norm": 7.556906700134277, "learning_rate": 2.867779796836367e-06, "loss": 0.0753, "step": 439650 }, { "epoch": 4.32, "grad_norm": 12.553433418273926, "learning_rate": 2.8676556743821186e-06, "loss": 0.1008, "step": 439675 }, { "epoch": 4.32, "grad_norm": 0.16946865618228912, "learning_rate": 2.8675315519278702e-06, "loss": 0.062, "step": 439700 }, { "epoch": 4.32, "grad_norm": 12.874006271362305, "learning_rate": 2.8674074294736215e-06, "loss": 0.1317, "step": 439725 }, { "epoch": 4.32, "grad_norm": 0.9934817552566528, "learning_rate": 2.8672833070193735e-06, "loss": 0.0738, "step": 439750 }, { "epoch": 4.32, "grad_norm": 8.807318687438965, "learning_rate": 2.8671591845651247e-06, "loss": 0.0754, "step": 439775 }, { "epoch": 4.32, "grad_norm": 2.752767562866211, "learning_rate": 2.867035062110876e-06, "loss": 0.0602, "step": 439800 }, { "epoch": 4.32, "grad_norm": 12.138786315917969, "learning_rate": 2.866910939656628e-06, "loss": 0.1019, "step": 439825 }, { "epoch": 4.32, "grad_norm": 3.4108519554138184, "learning_rate": 2.866786817202379e-06, "loss": 0.0554, "step": 439850 }, { "epoch": 4.32, "grad_norm": 17.706945419311523, "learning_rate": 2.866662694748131e-06, "loss": 0.0935, "step": 439875 }, { "epoch": 4.33, "grad_norm": 4.083128452301025, "learning_rate": 2.8665385722938825e-06, "loss": 0.0699, "step": 439900 }, { "epoch": 4.33, "grad_norm": 8.147563934326172, "learning_rate": 2.866414449839634e-06, "loss": 0.1323, "step": 439925 }, { "epoch": 4.33, "grad_norm": 4.281584739685059, "learning_rate": 2.8662903273853853e-06, "loss": 0.0794, "step": 439950 }, { "epoch": 4.33, "grad_norm": Infinity, "learning_rate": 2.866171169829307e-06, "loss": 0.0955, "step": 439975 }, { "epoch": 4.33, "grad_norm": 3.7917094230651855, "learning_rate": 2.8660470473750584e-06, "loss": 0.0695, "step": 440000 }, { "epoch": 4.33, "eval_loss": 0.7332437634468079, "eval_runtime": 6101.1482, "eval_samples_per_second": 1.552, "eval_steps_per_second": 0.194, "eval_wer": 0.11642953711453036, "step": 440000 }, { "epoch": 4.33, "grad_norm": 10.038064002990723, "learning_rate": 2.8659229249208105e-06, "loss": 0.0748, "step": 440025 }, { "epoch": 4.33, "grad_norm": 15.161686897277832, "learning_rate": 2.8657988024665617e-06, "loss": 0.0818, "step": 440050 }, { "epoch": 4.33, "grad_norm": 16.682018280029297, "learning_rate": 2.865674680012313e-06, "loss": 0.1026, "step": 440075 }, { "epoch": 4.33, "grad_norm": 14.406562805175781, "learning_rate": 2.865550557558065e-06, "loss": 0.0941, "step": 440100 }, { "epoch": 4.33, "grad_norm": 14.052810668945312, "learning_rate": 2.865426435103816e-06, "loss": 0.1027, "step": 440125 }, { "epoch": 4.33, "grad_norm": 7.466988563537598, "learning_rate": 2.865302312649568e-06, "loss": 0.0796, "step": 440150 }, { "epoch": 4.33, "grad_norm": 14.710166931152344, "learning_rate": 2.865178190195319e-06, "loss": 0.1046, "step": 440175 }, { "epoch": 4.33, "grad_norm": 0.32511308789253235, "learning_rate": 2.865054067741071e-06, "loss": 0.0688, "step": 440200 }, { "epoch": 4.33, "grad_norm": 4.211788177490234, "learning_rate": 2.8649299452868223e-06, "loss": 0.0902, "step": 440225 }, { "epoch": 4.33, "grad_norm": 3.032071113586426, "learning_rate": 2.8648058228325735e-06, "loss": 0.0635, "step": 440250 }, { "epoch": 4.33, "grad_norm": 27.870986938476562, "learning_rate": 2.8646817003783255e-06, "loss": 0.1178, "step": 440275 }, { "epoch": 4.33, "grad_norm": 1.3410413265228271, "learning_rate": 2.8645575779240768e-06, "loss": 0.0678, "step": 440300 }, { "epoch": 4.33, "grad_norm": 12.425847053527832, "learning_rate": 2.8644334554698284e-06, "loss": 0.1039, "step": 440325 }, { "epoch": 4.33, "grad_norm": 5.267154216766357, "learning_rate": 2.86430933301558e-06, "loss": 0.0853, "step": 440350 }, { "epoch": 4.33, "grad_norm": 3.3040285110473633, "learning_rate": 2.8641852105613317e-06, "loss": 0.1047, "step": 440375 }, { "epoch": 4.33, "grad_norm": 4.340402603149414, "learning_rate": 2.864061088107083e-06, "loss": 0.0819, "step": 440400 }, { "epoch": 4.33, "grad_norm": 10.592421531677246, "learning_rate": 2.863936965652835e-06, "loss": 0.0987, "step": 440425 }, { "epoch": 4.33, "grad_norm": 11.598718643188477, "learning_rate": 2.863812843198586e-06, "loss": 0.0581, "step": 440450 }, { "epoch": 4.33, "grad_norm": 16.20009994506836, "learning_rate": 2.8636887207443378e-06, "loss": 0.1124, "step": 440475 }, { "epoch": 4.33, "grad_norm": 2.859781503677368, "learning_rate": 2.8635645982900894e-06, "loss": 0.0719, "step": 440500 }, { "epoch": 4.33, "grad_norm": 11.89478588104248, "learning_rate": 2.863440475835841e-06, "loss": 0.112, "step": 440525 }, { "epoch": 4.33, "grad_norm": 5.554533958435059, "learning_rate": 2.8633163533815923e-06, "loss": 0.1041, "step": 440550 }, { "epoch": 4.33, "grad_norm": 17.133188247680664, "learning_rate": 2.8631922309273443e-06, "loss": 0.1078, "step": 440575 }, { "epoch": 4.33, "grad_norm": 3.808284282684326, "learning_rate": 2.8630681084730955e-06, "loss": 0.0637, "step": 440600 }, { "epoch": 4.33, "grad_norm": 7.882230758666992, "learning_rate": 2.8629439860188467e-06, "loss": 0.0696, "step": 440625 }, { "epoch": 4.33, "grad_norm": 0.1383797973394394, "learning_rate": 2.862819863564599e-06, "loss": 0.063, "step": 440650 }, { "epoch": 4.33, "grad_norm": 23.16649055480957, "learning_rate": 2.86269574111035e-06, "loss": 0.0975, "step": 440675 }, { "epoch": 4.33, "grad_norm": 4.389485836029053, "learning_rate": 2.8625716186561016e-06, "loss": 0.0817, "step": 440700 }, { "epoch": 4.33, "grad_norm": 14.54360580444336, "learning_rate": 2.8624474962018533e-06, "loss": 0.0792, "step": 440725 }, { "epoch": 4.33, "grad_norm": 4.441710948944092, "learning_rate": 2.862323373747605e-06, "loss": 0.0668, "step": 440750 }, { "epoch": 4.33, "grad_norm": 16.117101669311523, "learning_rate": 2.862199251293356e-06, "loss": 0.0778, "step": 440775 }, { "epoch": 4.33, "grad_norm": 8.877819061279297, "learning_rate": 2.862075128839108e-06, "loss": 0.061, "step": 440800 }, { "epoch": 4.33, "grad_norm": 7.8594794273376465, "learning_rate": 2.8619510063848594e-06, "loss": 0.1084, "step": 440825 }, { "epoch": 4.33, "grad_norm": 11.485037803649902, "learning_rate": 2.8618268839306106e-06, "loss": 0.1004, "step": 440850 }, { "epoch": 4.33, "grad_norm": 8.973152160644531, "learning_rate": 2.8617027614763627e-06, "loss": 0.0888, "step": 440875 }, { "epoch": 4.34, "grad_norm": 1.5949798822402954, "learning_rate": 2.861578639022114e-06, "loss": 0.0793, "step": 440900 }, { "epoch": 4.34, "grad_norm": 15.442912101745605, "learning_rate": 2.8614545165678655e-06, "loss": 0.1274, "step": 440925 }, { "epoch": 4.34, "grad_norm": 9.415807723999023, "learning_rate": 2.861330394113617e-06, "loss": 0.0853, "step": 440950 }, { "epoch": 4.34, "grad_norm": 13.999459266662598, "learning_rate": 2.8612062716593688e-06, "loss": 0.108, "step": 440975 }, { "epoch": 4.34, "grad_norm": 2.538726329803467, "learning_rate": 2.86108214920512e-06, "loss": 0.0703, "step": 441000 }, { "epoch": 4.34, "grad_norm": 12.548035621643066, "learning_rate": 2.860958026750871e-06, "loss": 0.0708, "step": 441025 }, { "epoch": 4.34, "grad_norm": 1.7888400554656982, "learning_rate": 2.8608339042966233e-06, "loss": 0.0678, "step": 441050 }, { "epoch": 4.34, "grad_norm": 13.405162811279297, "learning_rate": 2.8607097818423745e-06, "loss": 0.1115, "step": 441075 }, { "epoch": 4.34, "grad_norm": 2.9186770915985107, "learning_rate": 2.860585659388126e-06, "loss": 0.073, "step": 441100 }, { "epoch": 4.34, "grad_norm": 19.325572967529297, "learning_rate": 2.8604615369338777e-06, "loss": 0.1041, "step": 441125 }, { "epoch": 4.34, "grad_norm": 5.3580474853515625, "learning_rate": 2.8603374144796294e-06, "loss": 0.0753, "step": 441150 }, { "epoch": 4.34, "grad_norm": 9.25084114074707, "learning_rate": 2.8602132920253806e-06, "loss": 0.1252, "step": 441175 }, { "epoch": 4.34, "grad_norm": 2.5843207836151123, "learning_rate": 2.8600891695711327e-06, "loss": 0.0612, "step": 441200 }, { "epoch": 4.34, "grad_norm": 22.405742645263672, "learning_rate": 2.859965047116884e-06, "loss": 0.1229, "step": 441225 }, { "epoch": 4.34, "grad_norm": 1.066924810409546, "learning_rate": 2.859840924662635e-06, "loss": 0.078, "step": 441250 }, { "epoch": 4.34, "grad_norm": 4.557159900665283, "learning_rate": 2.859716802208387e-06, "loss": 0.0884, "step": 441275 }, { "epoch": 4.34, "grad_norm": 5.471745491027832, "learning_rate": 2.8595926797541383e-06, "loss": 0.052, "step": 441300 }, { "epoch": 4.34, "grad_norm": 12.985933303833008, "learning_rate": 2.85946855729989e-06, "loss": 0.0971, "step": 441325 }, { "epoch": 4.34, "grad_norm": 7.53792142868042, "learning_rate": 2.8593444348456416e-06, "loss": 0.0925, "step": 441350 }, { "epoch": 4.34, "grad_norm": 10.661105155944824, "learning_rate": 2.8592203123913932e-06, "loss": 0.0875, "step": 441375 }, { "epoch": 4.34, "grad_norm": 5.912680625915527, "learning_rate": 2.8590961899371445e-06, "loss": 0.0761, "step": 441400 }, { "epoch": 4.34, "grad_norm": 14.016358375549316, "learning_rate": 2.8589720674828965e-06, "loss": 0.0928, "step": 441425 }, { "epoch": 4.34, "grad_norm": 5.013120651245117, "learning_rate": 2.8588479450286477e-06, "loss": 0.0703, "step": 441450 }, { "epoch": 4.34, "grad_norm": 13.298005104064941, "learning_rate": 2.858723822574399e-06, "loss": 0.095, "step": 441475 }, { "epoch": 4.34, "grad_norm": 9.849198341369629, "learning_rate": 2.858599700120151e-06, "loss": 0.0671, "step": 441500 }, { "epoch": 4.34, "grad_norm": 13.25474739074707, "learning_rate": 2.858475577665902e-06, "loss": 0.1011, "step": 441525 }, { "epoch": 4.34, "grad_norm": 0.33062058687210083, "learning_rate": 2.858351455211654e-06, "loss": 0.0869, "step": 441550 }, { "epoch": 4.34, "grad_norm": 11.482986450195312, "learning_rate": 2.8582273327574055e-06, "loss": 0.0935, "step": 441575 }, { "epoch": 4.34, "grad_norm": 2.258164644241333, "learning_rate": 2.858103210303157e-06, "loss": 0.1054, "step": 441600 }, { "epoch": 4.34, "grad_norm": 17.486528396606445, "learning_rate": 2.8579790878489083e-06, "loss": 0.0918, "step": 441625 }, { "epoch": 4.34, "grad_norm": 1.563460350036621, "learning_rate": 2.8578549653946604e-06, "loss": 0.096, "step": 441650 }, { "epoch": 4.34, "grad_norm": 13.66032886505127, "learning_rate": 2.8577308429404116e-06, "loss": 0.08, "step": 441675 }, { "epoch": 4.34, "grad_norm": 4.275595664978027, "learning_rate": 2.857606720486163e-06, "loss": 0.073, "step": 441700 }, { "epoch": 4.34, "grad_norm": 9.511253356933594, "learning_rate": 2.857482598031915e-06, "loss": 0.0927, "step": 441725 }, { "epoch": 4.34, "grad_norm": 0.42723575234413147, "learning_rate": 2.857358475577666e-06, "loss": 0.0591, "step": 441750 }, { "epoch": 4.34, "grad_norm": 15.75046157836914, "learning_rate": 2.8572343531234177e-06, "loss": 0.084, "step": 441775 }, { "epoch": 4.34, "grad_norm": 12.739238739013672, "learning_rate": 2.8571102306691693e-06, "loss": 0.104, "step": 441800 }, { "epoch": 4.34, "grad_norm": 13.471085548400879, "learning_rate": 2.856986108214921e-06, "loss": 0.1395, "step": 441825 }, { "epoch": 4.34, "grad_norm": 3.2648589611053467, "learning_rate": 2.856861985760672e-06, "loss": 0.0803, "step": 441850 }, { "epoch": 4.34, "grad_norm": 6.070195198059082, "learning_rate": 2.8567378633064234e-06, "loss": 0.1028, "step": 441875 }, { "epoch": 4.34, "grad_norm": 9.499198913574219, "learning_rate": 2.8566137408521755e-06, "loss": 0.0742, "step": 441900 }, { "epoch": 4.35, "grad_norm": 17.800004959106445, "learning_rate": 2.8564896183979267e-06, "loss": 0.1047, "step": 441925 }, { "epoch": 4.35, "grad_norm": 5.58601188659668, "learning_rate": 2.8563654959436783e-06, "loss": 0.0641, "step": 441950 }, { "epoch": 4.35, "grad_norm": 2.5308284759521484, "learning_rate": 2.85624137348943e-06, "loss": 0.0834, "step": 441975 }, { "epoch": 4.35, "grad_norm": 5.897665023803711, "learning_rate": 2.8561172510351816e-06, "loss": 0.087, "step": 442000 }, { "epoch": 4.35, "grad_norm": 11.668171882629395, "learning_rate": 2.855993128580933e-06, "loss": 0.0955, "step": 442025 }, { "epoch": 4.35, "grad_norm": 0.47115564346313477, "learning_rate": 2.855869006126685e-06, "loss": 0.0523, "step": 442050 }, { "epoch": 4.35, "grad_norm": 7.59914493560791, "learning_rate": 2.855744883672436e-06, "loss": 0.1177, "step": 442075 }, { "epoch": 4.35, "grad_norm": 1.2792431116104126, "learning_rate": 2.8556207612181873e-06, "loss": 0.063, "step": 442100 }, { "epoch": 4.35, "grad_norm": 6.921943187713623, "learning_rate": 2.8554966387639393e-06, "loss": 0.1109, "step": 442125 }, { "epoch": 4.35, "grad_norm": 2.338069200515747, "learning_rate": 2.8553774812078604e-06, "loss": 0.0677, "step": 442150 }, { "epoch": 4.35, "grad_norm": 15.442330360412598, "learning_rate": 2.8552533587536124e-06, "loss": 0.1101, "step": 442175 }, { "epoch": 4.35, "grad_norm": 13.599935531616211, "learning_rate": 2.8551292362993636e-06, "loss": 0.0752, "step": 442200 }, { "epoch": 4.35, "grad_norm": 13.872766494750977, "learning_rate": 2.8550051138451153e-06, "loss": 0.1335, "step": 442225 }, { "epoch": 4.35, "grad_norm": 12.18762493133545, "learning_rate": 2.854880991390867e-06, "loss": 0.0762, "step": 442250 }, { "epoch": 4.35, "grad_norm": 18.456647872924805, "learning_rate": 2.8547568689366185e-06, "loss": 0.1062, "step": 442275 }, { "epoch": 4.35, "grad_norm": 1.568479299545288, "learning_rate": 2.8546327464823697e-06, "loss": 0.096, "step": 442300 }, { "epoch": 4.35, "grad_norm": 8.643664360046387, "learning_rate": 2.854508624028122e-06, "loss": 0.1039, "step": 442325 }, { "epoch": 4.35, "grad_norm": 5.564265727996826, "learning_rate": 2.854384501573873e-06, "loss": 0.0846, "step": 442350 }, { "epoch": 4.35, "grad_norm": 11.412363052368164, "learning_rate": 2.8542603791196242e-06, "loss": 0.1072, "step": 442375 }, { "epoch": 4.35, "grad_norm": 3.550894021987915, "learning_rate": 2.8541362566653763e-06, "loss": 0.0567, "step": 442400 }, { "epoch": 4.35, "grad_norm": 26.092899322509766, "learning_rate": 2.8540121342111275e-06, "loss": 0.1075, "step": 442425 }, { "epoch": 4.35, "grad_norm": 5.187542915344238, "learning_rate": 2.853888011756879e-06, "loss": 0.0776, "step": 442450 }, { "epoch": 4.35, "grad_norm": 14.662102699279785, "learning_rate": 2.8537638893026303e-06, "loss": 0.118, "step": 442475 }, { "epoch": 4.35, "grad_norm": 7.472677707672119, "learning_rate": 2.8536397668483824e-06, "loss": 0.0648, "step": 442500 }, { "epoch": 4.35, "grad_norm": 12.584772109985352, "learning_rate": 2.8535156443941336e-06, "loss": 0.1282, "step": 442525 }, { "epoch": 4.35, "grad_norm": 4.749797821044922, "learning_rate": 2.853391521939885e-06, "loss": 0.0711, "step": 442550 }, { "epoch": 4.35, "grad_norm": 5.78670597076416, "learning_rate": 2.853267399485637e-06, "loss": 0.0915, "step": 442575 }, { "epoch": 4.35, "grad_norm": 8.18239688873291, "learning_rate": 2.853143277031388e-06, "loss": 0.0892, "step": 442600 }, { "epoch": 4.35, "grad_norm": 12.316011428833008, "learning_rate": 2.8530191545771397e-06, "loss": 0.0801, "step": 442625 }, { "epoch": 4.35, "grad_norm": 1.9764705896377563, "learning_rate": 2.8528950321228914e-06, "loss": 0.0639, "step": 442650 }, { "epoch": 4.35, "grad_norm": 18.050914764404297, "learning_rate": 2.852770909668643e-06, "loss": 0.0922, "step": 442675 }, { "epoch": 4.35, "grad_norm": 1.3542624711990356, "learning_rate": 2.8526467872143942e-06, "loss": 0.0812, "step": 442700 }, { "epoch": 4.35, "grad_norm": 10.18789005279541, "learning_rate": 2.8525226647601463e-06, "loss": 0.1007, "step": 442725 }, { "epoch": 4.35, "grad_norm": 1.3644331693649292, "learning_rate": 2.8523985423058975e-06, "loss": 0.0436, "step": 442750 }, { "epoch": 4.35, "grad_norm": 15.392189025878906, "learning_rate": 2.8522744198516487e-06, "loss": 0.1031, "step": 442775 }, { "epoch": 4.35, "grad_norm": 0.6156884431838989, "learning_rate": 2.8521502973974008e-06, "loss": 0.0762, "step": 442800 }, { "epoch": 4.35, "grad_norm": 30.887577056884766, "learning_rate": 2.852026174943152e-06, "loss": 0.1447, "step": 442825 }, { "epoch": 4.35, "grad_norm": 9.858023643493652, "learning_rate": 2.8519020524889036e-06, "loss": 0.097, "step": 442850 }, { "epoch": 4.35, "grad_norm": 15.653079986572266, "learning_rate": 2.8517779300346552e-06, "loss": 0.0884, "step": 442875 }, { "epoch": 4.35, "grad_norm": 9.153144836425781, "learning_rate": 2.851653807580407e-06, "loss": 0.0714, "step": 442900 }, { "epoch": 4.35, "grad_norm": 27.33061408996582, "learning_rate": 2.851529685126158e-06, "loss": 0.1362, "step": 442925 }, { "epoch": 4.36, "grad_norm": 0.04839727655053139, "learning_rate": 2.85140556267191e-06, "loss": 0.0627, "step": 442950 }, { "epoch": 4.36, "grad_norm": 16.612525939941406, "learning_rate": 2.8512814402176613e-06, "loss": 0.1206, "step": 442975 }, { "epoch": 4.36, "grad_norm": 4.079164981842041, "learning_rate": 2.8511573177634126e-06, "loss": 0.0914, "step": 443000 }, { "epoch": 4.36, "grad_norm": 12.186666488647461, "learning_rate": 2.8510331953091646e-06, "loss": 0.1112, "step": 443025 }, { "epoch": 4.36, "grad_norm": 6.544219970703125, "learning_rate": 2.850909072854916e-06, "loss": 0.0761, "step": 443050 }, { "epoch": 4.36, "grad_norm": 12.560118675231934, "learning_rate": 2.8507849504006675e-06, "loss": 0.0846, "step": 443075 }, { "epoch": 4.36, "grad_norm": 18.223405838012695, "learning_rate": 2.850660827946419e-06, "loss": 0.0763, "step": 443100 }, { "epoch": 4.36, "grad_norm": 6.480066776275635, "learning_rate": 2.8505367054921707e-06, "loss": 0.1053, "step": 443125 }, { "epoch": 4.36, "grad_norm": 5.50395393371582, "learning_rate": 2.850412583037922e-06, "loss": 0.0945, "step": 443150 }, { "epoch": 4.36, "grad_norm": 17.29157066345215, "learning_rate": 2.850288460583674e-06, "loss": 0.1139, "step": 443175 }, { "epoch": 4.36, "grad_norm": 7.336965560913086, "learning_rate": 2.8501643381294252e-06, "loss": 0.0609, "step": 443200 }, { "epoch": 4.36, "grad_norm": 14.84968376159668, "learning_rate": 2.8500402156751764e-06, "loss": 0.0951, "step": 443225 }, { "epoch": 4.36, "grad_norm": 28.790882110595703, "learning_rate": 2.8499160932209285e-06, "loss": 0.078, "step": 443250 }, { "epoch": 4.36, "grad_norm": 11.560731887817383, "learning_rate": 2.8497919707666797e-06, "loss": 0.1047, "step": 443275 }, { "epoch": 4.36, "grad_norm": 4.903054714202881, "learning_rate": 2.8496678483124313e-06, "loss": 0.0665, "step": 443300 }, { "epoch": 4.36, "grad_norm": 18.416053771972656, "learning_rate": 2.8495437258581825e-06, "loss": 0.137, "step": 443325 }, { "epoch": 4.36, "grad_norm": 1.9167158603668213, "learning_rate": 2.8494196034039346e-06, "loss": 0.0849, "step": 443350 }, { "epoch": 4.36, "grad_norm": 15.318873405456543, "learning_rate": 2.849295480949686e-06, "loss": 0.1197, "step": 443375 }, { "epoch": 4.36, "grad_norm": 5.891141891479492, "learning_rate": 2.8491713584954375e-06, "loss": 0.1017, "step": 443400 }, { "epoch": 4.36, "grad_norm": 11.03371810913086, "learning_rate": 2.849047236041189e-06, "loss": 0.1089, "step": 443425 }, { "epoch": 4.36, "grad_norm": 5.3554534912109375, "learning_rate": 2.8489231135869407e-06, "loss": 0.0751, "step": 443450 }, { "epoch": 4.36, "grad_norm": 8.843036651611328, "learning_rate": 2.848798991132692e-06, "loss": 0.0978, "step": 443475 }, { "epoch": 4.36, "grad_norm": 9.281139373779297, "learning_rate": 2.848674868678444e-06, "loss": 0.0616, "step": 443500 }, { "epoch": 4.36, "grad_norm": 8.496150016784668, "learning_rate": 2.848550746224195e-06, "loss": 0.1029, "step": 443525 }, { "epoch": 4.36, "grad_norm": 10.991812705993652, "learning_rate": 2.8484266237699464e-06, "loss": 0.0617, "step": 443550 }, { "epoch": 4.36, "grad_norm": 14.884471893310547, "learning_rate": 2.8483025013156985e-06, "loss": 0.103, "step": 443575 }, { "epoch": 4.36, "grad_norm": 5.233858585357666, "learning_rate": 2.8481783788614497e-06, "loss": 0.0833, "step": 443600 }, { "epoch": 4.36, "grad_norm": 6.363269329071045, "learning_rate": 2.8480542564072013e-06, "loss": 0.0648, "step": 443625 }, { "epoch": 4.36, "grad_norm": 3.791022300720215, "learning_rate": 2.847930133952953e-06, "loss": 0.0588, "step": 443650 }, { "epoch": 4.36, "grad_norm": 15.226739883422852, "learning_rate": 2.8478060114987046e-06, "loss": 0.1052, "step": 443675 }, { "epoch": 4.36, "grad_norm": 3.9626619815826416, "learning_rate": 2.847681889044456e-06, "loss": 0.0606, "step": 443700 }, { "epoch": 4.36, "grad_norm": 11.048426628112793, "learning_rate": 2.847557766590208e-06, "loss": 0.1257, "step": 443725 }, { "epoch": 4.36, "grad_norm": 1.6584126949310303, "learning_rate": 2.847433644135959e-06, "loss": 0.09, "step": 443750 }, { "epoch": 4.36, "grad_norm": 18.14371681213379, "learning_rate": 2.8473095216817103e-06, "loss": 0.1112, "step": 443775 }, { "epoch": 4.36, "grad_norm": 10.546399116516113, "learning_rate": 2.8471853992274623e-06, "loss": 0.0806, "step": 443800 }, { "epoch": 4.36, "grad_norm": 12.030305862426758, "learning_rate": 2.8470612767732136e-06, "loss": 0.0957, "step": 443825 }, { "epoch": 4.36, "grad_norm": 4.660086631774902, "learning_rate": 2.846937154318965e-06, "loss": 0.053, "step": 443850 }, { "epoch": 4.36, "grad_norm": 13.885577201843262, "learning_rate": 2.846813031864717e-06, "loss": 0.1115, "step": 443875 }, { "epoch": 4.36, "grad_norm": 5.216857433319092, "learning_rate": 2.8466889094104685e-06, "loss": 0.1066, "step": 443900 }, { "epoch": 4.36, "grad_norm": 5.860298156738281, "learning_rate": 2.8465647869562197e-06, "loss": 0.1018, "step": 443925 }, { "epoch": 4.36, "grad_norm": 8.616711616516113, "learning_rate": 2.8464406645019717e-06, "loss": 0.0973, "step": 443950 }, { "epoch": 4.37, "grad_norm": 12.711492538452148, "learning_rate": 2.846316542047723e-06, "loss": 0.0909, "step": 443975 }, { "epoch": 4.37, "grad_norm": 5.100931167602539, "learning_rate": 2.846192419593474e-06, "loss": 0.0683, "step": 444000 }, { "epoch": 4.37, "grad_norm": 32.06621551513672, "learning_rate": 2.846068297139226e-06, "loss": 0.1297, "step": 444025 }, { "epoch": 4.37, "grad_norm": 5.132751941680908, "learning_rate": 2.8459441746849774e-06, "loss": 0.0932, "step": 444050 }, { "epoch": 4.37, "grad_norm": 16.697885513305664, "learning_rate": 2.845820052230729e-06, "loss": 0.103, "step": 444075 }, { "epoch": 4.37, "grad_norm": 3.4174156188964844, "learning_rate": 2.8456959297764807e-06, "loss": 0.0814, "step": 444100 }, { "epoch": 4.37, "grad_norm": 11.40036392211914, "learning_rate": 2.8455718073222323e-06, "loss": 0.0992, "step": 444125 }, { "epoch": 4.37, "grad_norm": 1.1670258045196533, "learning_rate": 2.8454476848679835e-06, "loss": 0.0499, "step": 444150 }, { "epoch": 4.37, "grad_norm": 12.241825103759766, "learning_rate": 2.8453235624137347e-06, "loss": 0.0851, "step": 444175 }, { "epoch": 4.37, "grad_norm": 4.5233659744262695, "learning_rate": 2.845199439959487e-06, "loss": 0.0751, "step": 444200 }, { "epoch": 4.37, "grad_norm": 10.190927505493164, "learning_rate": 2.845075317505238e-06, "loss": 0.0753, "step": 444225 }, { "epoch": 4.37, "grad_norm": 6.254114627838135, "learning_rate": 2.8449511950509897e-06, "loss": 0.082, "step": 444250 }, { "epoch": 4.37, "grad_norm": 13.842949867248535, "learning_rate": 2.8448270725967413e-06, "loss": 0.1323, "step": 444275 }, { "epoch": 4.37, "grad_norm": 5.5266337394714355, "learning_rate": 2.844702950142493e-06, "loss": 0.0664, "step": 444300 }, { "epoch": 4.37, "grad_norm": 20.729846954345703, "learning_rate": 2.844578827688244e-06, "loss": 0.0945, "step": 444325 }, { "epoch": 4.37, "grad_norm": 1.9620258808135986, "learning_rate": 2.844454705233996e-06, "loss": 0.0653, "step": 444350 }, { "epoch": 4.37, "grad_norm": 6.562868118286133, "learning_rate": 2.8443305827797474e-06, "loss": 0.0986, "step": 444375 }, { "epoch": 4.37, "grad_norm": 0.7915121912956238, "learning_rate": 2.8442064603254986e-06, "loss": 0.0992, "step": 444400 }, { "epoch": 4.37, "grad_norm": 14.235913276672363, "learning_rate": 2.8440823378712507e-06, "loss": 0.0972, "step": 444425 }, { "epoch": 4.37, "grad_norm": 1.3716375827789307, "learning_rate": 2.843958215417002e-06, "loss": 0.0761, "step": 444450 }, { "epoch": 4.37, "grad_norm": 14.235790252685547, "learning_rate": 2.8438340929627535e-06, "loss": 0.1061, "step": 444475 }, { "epoch": 4.37, "grad_norm": 3.5299220085144043, "learning_rate": 2.843714935406675e-06, "loss": 0.0816, "step": 444500 }, { "epoch": 4.37, "grad_norm": 27.090055465698242, "learning_rate": 2.8435908129524266e-06, "loss": 0.1504, "step": 444525 }, { "epoch": 4.37, "grad_norm": 1.2738515138626099, "learning_rate": 2.8434666904981782e-06, "loss": 0.077, "step": 444550 }, { "epoch": 4.37, "grad_norm": 22.02726173400879, "learning_rate": 2.84334256804393e-06, "loss": 0.1351, "step": 444575 }, { "epoch": 4.37, "grad_norm": 1.9683173894882202, "learning_rate": 2.843218445589681e-06, "loss": 0.0705, "step": 444600 }, { "epoch": 4.37, "grad_norm": 10.919828414916992, "learning_rate": 2.843094323135433e-06, "loss": 0.0912, "step": 444625 }, { "epoch": 4.37, "grad_norm": 7.093734264373779, "learning_rate": 2.8429702006811844e-06, "loss": 0.0716, "step": 444650 }, { "epoch": 4.37, "grad_norm": 11.768046379089355, "learning_rate": 2.8428460782269356e-06, "loss": 0.1194, "step": 444675 }, { "epoch": 4.37, "grad_norm": 3.903343915939331, "learning_rate": 2.842721955772687e-06, "loss": 0.0658, "step": 444700 }, { "epoch": 4.37, "grad_norm": 11.127366065979004, "learning_rate": 2.842597833318439e-06, "loss": 0.1166, "step": 444725 }, { "epoch": 4.37, "grad_norm": 8.471631050109863, "learning_rate": 2.8424737108641905e-06, "loss": 0.0576, "step": 444750 }, { "epoch": 4.37, "grad_norm": 8.767114639282227, "learning_rate": 2.8423495884099417e-06, "loss": 0.1199, "step": 444775 }, { "epoch": 4.37, "grad_norm": 0.7646388411521912, "learning_rate": 2.8422254659556937e-06, "loss": 0.0568, "step": 444800 }, { "epoch": 4.37, "grad_norm": 21.59171485900879, "learning_rate": 2.842101343501445e-06, "loss": 0.0985, "step": 444825 }, { "epoch": 4.37, "grad_norm": 0.6958351731300354, "learning_rate": 2.841977221047196e-06, "loss": 0.0702, "step": 444850 }, { "epoch": 4.37, "grad_norm": 10.822962760925293, "learning_rate": 2.8418530985929482e-06, "loss": 0.093, "step": 444875 }, { "epoch": 4.37, "grad_norm": 7.686196327209473, "learning_rate": 2.8417289761386994e-06, "loss": 0.0975, "step": 444900 }, { "epoch": 4.37, "grad_norm": 5.794086933135986, "learning_rate": 2.841604853684451e-06, "loss": 0.1138, "step": 444925 }, { "epoch": 4.37, "grad_norm": 6.821367263793945, "learning_rate": 2.8414807312302027e-06, "loss": 0.0692, "step": 444950 }, { "epoch": 4.38, "grad_norm": 34.29772186279297, "learning_rate": 2.8413566087759543e-06, "loss": 0.1378, "step": 444975 }, { "epoch": 4.38, "grad_norm": 2.766200065612793, "learning_rate": 2.8412324863217056e-06, "loss": 0.079, "step": 445000 }, { "epoch": 4.38, "grad_norm": 7.255822658538818, "learning_rate": 2.8411083638674576e-06, "loss": 0.1051, "step": 445025 }, { "epoch": 4.38, "grad_norm": 3.8483965396881104, "learning_rate": 2.840984241413209e-06, "loss": 0.0822, "step": 445050 }, { "epoch": 4.38, "grad_norm": 13.214951515197754, "learning_rate": 2.84086011895896e-06, "loss": 0.094, "step": 445075 }, { "epoch": 4.38, "grad_norm": 0.5061014890670776, "learning_rate": 2.840735996504712e-06, "loss": 0.0876, "step": 445100 }, { "epoch": 4.38, "grad_norm": 11.23572826385498, "learning_rate": 2.8406118740504633e-06, "loss": 0.0973, "step": 445125 }, { "epoch": 4.38, "grad_norm": 13.984993934631348, "learning_rate": 2.840487751596215e-06, "loss": 0.0686, "step": 445150 }, { "epoch": 4.38, "grad_norm": 19.785245895385742, "learning_rate": 2.8403636291419666e-06, "loss": 0.1158, "step": 445175 }, { "epoch": 4.38, "grad_norm": 4.978888511657715, "learning_rate": 2.840239506687718e-06, "loss": 0.0966, "step": 445200 }, { "epoch": 4.38, "grad_norm": 12.37535285949707, "learning_rate": 2.8401153842334694e-06, "loss": 0.1202, "step": 445225 }, { "epoch": 4.38, "grad_norm": 9.513726234436035, "learning_rate": 2.8399912617792215e-06, "loss": 0.0705, "step": 445250 }, { "epoch": 4.38, "grad_norm": 15.381961822509766, "learning_rate": 2.8398671393249727e-06, "loss": 0.0985, "step": 445275 }, { "epoch": 4.38, "grad_norm": 11.394569396972656, "learning_rate": 2.839743016870724e-06, "loss": 0.0687, "step": 445300 }, { "epoch": 4.38, "grad_norm": 6.251089572906494, "learning_rate": 2.839618894416476e-06, "loss": 0.0686, "step": 445325 }, { "epoch": 4.38, "grad_norm": 8.051595687866211, "learning_rate": 2.839494771962227e-06, "loss": 0.0597, "step": 445350 }, { "epoch": 4.38, "grad_norm": 13.540371894836426, "learning_rate": 2.839370649507979e-06, "loss": 0.0947, "step": 445375 }, { "epoch": 4.38, "grad_norm": 2.477041244506836, "learning_rate": 2.8392465270537304e-06, "loss": 0.0778, "step": 445400 }, { "epoch": 4.38, "grad_norm": 20.477262496948242, "learning_rate": 2.839122404599482e-06, "loss": 0.1042, "step": 445425 }, { "epoch": 4.38, "grad_norm": 9.777148246765137, "learning_rate": 2.8389982821452333e-06, "loss": 0.0802, "step": 445450 }, { "epoch": 4.38, "grad_norm": 6.0401740074157715, "learning_rate": 2.8388741596909853e-06, "loss": 0.0819, "step": 445475 }, { "epoch": 4.38, "grad_norm": 14.66071605682373, "learning_rate": 2.8387500372367366e-06, "loss": 0.0864, "step": 445500 }, { "epoch": 4.38, "grad_norm": 16.546199798583984, "learning_rate": 2.8386259147824878e-06, "loss": 0.0962, "step": 445525 }, { "epoch": 4.38, "grad_norm": 7.207777976989746, "learning_rate": 2.8385017923282394e-06, "loss": 0.0722, "step": 445550 }, { "epoch": 4.38, "grad_norm": 16.675973892211914, "learning_rate": 2.838377669873991e-06, "loss": 0.1149, "step": 445575 }, { "epoch": 4.38, "grad_norm": 9.30163288116455, "learning_rate": 2.8382535474197427e-06, "loss": 0.0692, "step": 445600 }, { "epoch": 4.38, "grad_norm": 19.84661102294922, "learning_rate": 2.838129424965494e-06, "loss": 0.1176, "step": 445625 }, { "epoch": 4.38, "grad_norm": 6.4907426834106445, "learning_rate": 2.838005302511246e-06, "loss": 0.1066, "step": 445650 }, { "epoch": 4.38, "grad_norm": 13.365621566772461, "learning_rate": 2.837881180056997e-06, "loss": 0.1101, "step": 445675 }, { "epoch": 4.38, "grad_norm": 2.2936227321624756, "learning_rate": 2.8377570576027484e-06, "loss": 0.0896, "step": 445700 }, { "epoch": 4.38, "grad_norm": 7.984655380249023, "learning_rate": 2.8376329351485004e-06, "loss": 0.0925, "step": 445725 }, { "epoch": 4.38, "grad_norm": 1.5380579233169556, "learning_rate": 2.8375088126942516e-06, "loss": 0.0838, "step": 445750 }, { "epoch": 4.38, "grad_norm": 15.41164779663086, "learning_rate": 2.8373846902400033e-06, "loss": 0.0995, "step": 445775 }, { "epoch": 4.38, "grad_norm": 9.808191299438477, "learning_rate": 2.837260567785755e-06, "loss": 0.083, "step": 445800 }, { "epoch": 4.38, "grad_norm": 16.833425521850586, "learning_rate": 2.8371364453315065e-06, "loss": 0.1121, "step": 445825 }, { "epoch": 4.38, "grad_norm": 0.22045259177684784, "learning_rate": 2.8370123228772578e-06, "loss": 0.0983, "step": 445850 }, { "epoch": 4.38, "grad_norm": 10.659234046936035, "learning_rate": 2.83688820042301e-06, "loss": 0.1061, "step": 445875 }, { "epoch": 4.38, "grad_norm": 5.222507476806641, "learning_rate": 2.836764077968761e-06, "loss": 0.0646, "step": 445900 }, { "epoch": 4.38, "grad_norm": 13.664320945739746, "learning_rate": 2.8366399555145122e-06, "loss": 0.1038, "step": 445925 }, { "epoch": 4.38, "grad_norm": 7.251915454864502, "learning_rate": 2.8365158330602643e-06, "loss": 0.0672, "step": 445950 }, { "epoch": 4.38, "grad_norm": 10.236196517944336, "learning_rate": 2.8363917106060155e-06, "loss": 0.0966, "step": 445975 }, { "epoch": 4.39, "grad_norm": 4.614361763000488, "learning_rate": 2.836267588151767e-06, "loss": 0.09, "step": 446000 }, { "epoch": 4.39, "grad_norm": 16.40522575378418, "learning_rate": 2.8361434656975188e-06, "loss": 0.0881, "step": 446025 }, { "epoch": 4.39, "grad_norm": 5.668019771575928, "learning_rate": 2.8360193432432704e-06, "loss": 0.0861, "step": 446050 }, { "epoch": 4.39, "grad_norm": 15.280405044555664, "learning_rate": 2.8358952207890216e-06, "loss": 0.0986, "step": 446075 }, { "epoch": 4.39, "grad_norm": 5.590425968170166, "learning_rate": 2.8357710983347737e-06, "loss": 0.0844, "step": 446100 }, { "epoch": 4.39, "grad_norm": 15.350987434387207, "learning_rate": 2.835646975880525e-06, "loss": 0.1224, "step": 446125 }, { "epoch": 4.39, "grad_norm": 0.15013915300369263, "learning_rate": 2.835522853426276e-06, "loss": 0.0618, "step": 446150 }, { "epoch": 4.39, "grad_norm": 20.20452880859375, "learning_rate": 2.835398730972028e-06, "loss": 0.1015, "step": 446175 }, { "epoch": 4.39, "grad_norm": 7.670372486114502, "learning_rate": 2.8352746085177794e-06, "loss": 0.0835, "step": 446200 }, { "epoch": 4.39, "grad_norm": 12.285473823547363, "learning_rate": 2.835150486063531e-06, "loss": 0.0933, "step": 446225 }, { "epoch": 4.39, "grad_norm": 4.373326301574707, "learning_rate": 2.8350263636092826e-06, "loss": 0.0561, "step": 446250 }, { "epoch": 4.39, "grad_norm": 4.5063934326171875, "learning_rate": 2.8349022411550343e-06, "loss": 0.0862, "step": 446275 }, { "epoch": 4.39, "grad_norm": 1.0951919555664062, "learning_rate": 2.8347781187007855e-06, "loss": 0.0697, "step": 446300 }, { "epoch": 4.39, "grad_norm": 18.674518585205078, "learning_rate": 2.8346539962465375e-06, "loss": 0.1369, "step": 446325 }, { "epoch": 4.39, "grad_norm": 4.401712417602539, "learning_rate": 2.8345298737922888e-06, "loss": 0.0589, "step": 446350 }, { "epoch": 4.39, "grad_norm": 10.764359474182129, "learning_rate": 2.8344057513380404e-06, "loss": 0.1269, "step": 446375 }, { "epoch": 4.39, "grad_norm": 1.1389068365097046, "learning_rate": 2.834281628883792e-06, "loss": 0.0686, "step": 446400 }, { "epoch": 4.39, "grad_norm": 15.581404685974121, "learning_rate": 2.8341575064295437e-06, "loss": 0.1098, "step": 446425 }, { "epoch": 4.39, "grad_norm": 5.4585394859313965, "learning_rate": 2.834033383975295e-06, "loss": 0.0719, "step": 446450 }, { "epoch": 4.39, "grad_norm": 9.17011547088623, "learning_rate": 2.833909261521046e-06, "loss": 0.09, "step": 446475 }, { "epoch": 4.39, "grad_norm": 0.07052654027938843, "learning_rate": 2.833785139066798e-06, "loss": 0.0772, "step": 446500 }, { "epoch": 4.39, "grad_norm": 4.694676399230957, "learning_rate": 2.8336610166125494e-06, "loss": 0.1342, "step": 446525 }, { "epoch": 4.39, "grad_norm": 4.821450233459473, "learning_rate": 2.833536894158301e-06, "loss": 0.0754, "step": 446550 }, { "epoch": 4.39, "grad_norm": 15.817073822021484, "learning_rate": 2.8334127717040526e-06, "loss": 0.0772, "step": 446575 }, { "epoch": 4.39, "grad_norm": 6.233576774597168, "learning_rate": 2.8332886492498043e-06, "loss": 0.0705, "step": 446600 }, { "epoch": 4.39, "grad_norm": 11.106447219848633, "learning_rate": 2.8331645267955555e-06, "loss": 0.0987, "step": 446625 }, { "epoch": 4.39, "grad_norm": 6.9631428718566895, "learning_rate": 2.8330404043413075e-06, "loss": 0.0701, "step": 446650 }, { "epoch": 4.39, "grad_norm": 15.115172386169434, "learning_rate": 2.8329162818870587e-06, "loss": 0.1044, "step": 446675 }, { "epoch": 4.39, "grad_norm": 5.9820475578308105, "learning_rate": 2.83279215943281e-06, "loss": 0.0803, "step": 446700 }, { "epoch": 4.39, "grad_norm": 13.460650444030762, "learning_rate": 2.832668036978562e-06, "loss": 0.1284, "step": 446725 }, { "epoch": 4.39, "grad_norm": 19.786460876464844, "learning_rate": 2.8325439145243132e-06, "loss": 0.0939, "step": 446750 }, { "epoch": 4.39, "grad_norm": 8.518993377685547, "learning_rate": 2.832419792070065e-06, "loss": 0.1074, "step": 446775 }, { "epoch": 4.39, "grad_norm": 4.123383045196533, "learning_rate": 2.8322956696158165e-06, "loss": 0.0648, "step": 446800 }, { "epoch": 4.39, "grad_norm": 14.207783699035645, "learning_rate": 2.832171547161568e-06, "loss": 0.1315, "step": 446825 }, { "epoch": 4.39, "grad_norm": 0.04067692533135414, "learning_rate": 2.8320474247073193e-06, "loss": 0.0545, "step": 446850 }, { "epoch": 4.39, "grad_norm": 19.19536590576172, "learning_rate": 2.8319233022530714e-06, "loss": 0.1167, "step": 446875 }, { "epoch": 4.39, "grad_norm": 0.21072950959205627, "learning_rate": 2.8317991797988226e-06, "loss": 0.084, "step": 446900 }, { "epoch": 4.39, "grad_norm": 20.552783966064453, "learning_rate": 2.831675057344574e-06, "loss": 0.1217, "step": 446925 }, { "epoch": 4.39, "grad_norm": 0.7412862181663513, "learning_rate": 2.831550934890326e-06, "loss": 0.0927, "step": 446950 }, { "epoch": 4.39, "grad_norm": 18.764001846313477, "learning_rate": 2.831426812436077e-06, "loss": 0.0962, "step": 446975 }, { "epoch": 4.39, "grad_norm": 3.9900622367858887, "learning_rate": 2.8313026899818287e-06, "loss": 0.0723, "step": 447000 }, { "epoch": 4.4, "grad_norm": 12.883477210998535, "learning_rate": 2.8311785675275804e-06, "loss": 0.0822, "step": 447025 }, { "epoch": 4.4, "grad_norm": 2.259209156036377, "learning_rate": 2.831054445073332e-06, "loss": 0.0686, "step": 447050 }, { "epoch": 4.4, "grad_norm": 14.33002758026123, "learning_rate": 2.830930322619083e-06, "loss": 0.0914, "step": 447075 }, { "epoch": 4.4, "grad_norm": 1.1590102910995483, "learning_rate": 2.8308062001648353e-06, "loss": 0.0905, "step": 447100 }, { "epoch": 4.4, "grad_norm": 14.697186470031738, "learning_rate": 2.8306820777105865e-06, "loss": 0.1435, "step": 447125 }, { "epoch": 4.4, "grad_norm": 13.179017066955566, "learning_rate": 2.8305579552563377e-06, "loss": 0.0787, "step": 447150 }, { "epoch": 4.4, "grad_norm": 10.981393814086914, "learning_rate": 2.8304338328020897e-06, "loss": 0.1173, "step": 447175 }, { "epoch": 4.4, "grad_norm": 6.480762481689453, "learning_rate": 2.830309710347841e-06, "loss": 0.0788, "step": 447200 }, { "epoch": 4.4, "grad_norm": 15.227330207824707, "learning_rate": 2.8301855878935926e-06, "loss": 0.1024, "step": 447225 }, { "epoch": 4.4, "grad_norm": 1.4483227729797363, "learning_rate": 2.8300614654393442e-06, "loss": 0.0808, "step": 447250 }, { "epoch": 4.4, "grad_norm": 9.83685302734375, "learning_rate": 2.829937342985096e-06, "loss": 0.1465, "step": 447275 }, { "epoch": 4.4, "grad_norm": 4.4154839515686035, "learning_rate": 2.829813220530847e-06, "loss": 0.0888, "step": 447300 }, { "epoch": 4.4, "grad_norm": 15.964607238769531, "learning_rate": 2.8296890980765983e-06, "loss": 0.1025, "step": 447325 }, { "epoch": 4.4, "grad_norm": 3.1009442806243896, "learning_rate": 2.8295649756223503e-06, "loss": 0.078, "step": 447350 }, { "epoch": 4.4, "grad_norm": 4.333355903625488, "learning_rate": 2.8294408531681016e-06, "loss": 0.0915, "step": 447375 }, { "epoch": 4.4, "grad_norm": 2.2532012462615967, "learning_rate": 2.8293216956120234e-06, "loss": 0.0992, "step": 447400 }, { "epoch": 4.4, "grad_norm": 10.978894233703613, "learning_rate": 2.8291975731577746e-06, "loss": 0.0822, "step": 447425 }, { "epoch": 4.4, "grad_norm": 6.827280044555664, "learning_rate": 2.8290734507035263e-06, "loss": 0.0886, "step": 447450 }, { "epoch": 4.4, "grad_norm": 21.328157424926758, "learning_rate": 2.828949328249278e-06, "loss": 0.0922, "step": 447475 }, { "epoch": 4.4, "grad_norm": 6.768072128295898, "learning_rate": 2.8288252057950295e-06, "loss": 0.0841, "step": 447500 }, { "epoch": 4.4, "grad_norm": 12.742491722106934, "learning_rate": 2.8287010833407808e-06, "loss": 0.1275, "step": 447525 }, { "epoch": 4.4, "grad_norm": 10.798379898071289, "learning_rate": 2.828576960886533e-06, "loss": 0.0818, "step": 447550 }, { "epoch": 4.4, "grad_norm": 15.54202938079834, "learning_rate": 2.828452838432284e-06, "loss": 0.0981, "step": 447575 }, { "epoch": 4.4, "grad_norm": 7.150248050689697, "learning_rate": 2.8283287159780352e-06, "loss": 0.0688, "step": 447600 }, { "epoch": 4.4, "grad_norm": 11.540458679199219, "learning_rate": 2.8282045935237873e-06, "loss": 0.1288, "step": 447625 }, { "epoch": 4.4, "grad_norm": 2.98826265335083, "learning_rate": 2.8280804710695385e-06, "loss": 0.0814, "step": 447650 }, { "epoch": 4.4, "grad_norm": 13.024914741516113, "learning_rate": 2.82795634861529e-06, "loss": 0.1026, "step": 447675 }, { "epoch": 4.4, "grad_norm": 5.350971221923828, "learning_rate": 2.8278322261610418e-06, "loss": 0.0899, "step": 447700 }, { "epoch": 4.4, "grad_norm": 7.867288589477539, "learning_rate": 2.8277081037067934e-06, "loss": 0.1077, "step": 447725 }, { "epoch": 4.4, "grad_norm": 5.588169574737549, "learning_rate": 2.8275839812525446e-06, "loss": 0.0578, "step": 447750 }, { "epoch": 4.4, "grad_norm": 11.027554512023926, "learning_rate": 2.8274598587982967e-06, "loss": 0.1, "step": 447775 }, { "epoch": 4.4, "grad_norm": 2.105020523071289, "learning_rate": 2.827335736344048e-06, "loss": 0.091, "step": 447800 }, { "epoch": 4.4, "grad_norm": 8.48322582244873, "learning_rate": 2.827211613889799e-06, "loss": 0.1155, "step": 447825 }, { "epoch": 4.4, "grad_norm": 12.094999313354492, "learning_rate": 2.8270874914355507e-06, "loss": 0.067, "step": 447850 }, { "epoch": 4.4, "grad_norm": 14.353341102600098, "learning_rate": 2.8269633689813024e-06, "loss": 0.0986, "step": 447875 }, { "epoch": 4.4, "grad_norm": 7.2253265380859375, "learning_rate": 2.826839246527054e-06, "loss": 0.0924, "step": 447900 }, { "epoch": 4.4, "grad_norm": 16.818458557128906, "learning_rate": 2.8267151240728052e-06, "loss": 0.1186, "step": 447925 }, { "epoch": 4.4, "grad_norm": 5.949583530426025, "learning_rate": 2.8265910016185573e-06, "loss": 0.0731, "step": 447950 }, { "epoch": 4.4, "grad_norm": 15.475767135620117, "learning_rate": 2.8264668791643085e-06, "loss": 0.1242, "step": 447975 }, { "epoch": 4.4, "grad_norm": 14.10644245147705, "learning_rate": 2.8263427567100597e-06, "loss": 0.0981, "step": 448000 }, { "epoch": 4.41, "grad_norm": 16.90117073059082, "learning_rate": 2.8262186342558118e-06, "loss": 0.1086, "step": 448025 }, { "epoch": 4.41, "grad_norm": 9.648460388183594, "learning_rate": 2.826094511801563e-06, "loss": 0.09, "step": 448050 }, { "epoch": 4.41, "grad_norm": 11.130219459533691, "learning_rate": 2.8259703893473146e-06, "loss": 0.0854, "step": 448075 }, { "epoch": 4.41, "grad_norm": 8.488946914672852, "learning_rate": 2.8258462668930662e-06, "loss": 0.0838, "step": 448100 }, { "epoch": 4.41, "grad_norm": 20.255739212036133, "learning_rate": 2.825722144438818e-06, "loss": 0.1358, "step": 448125 }, { "epoch": 4.41, "grad_norm": 0.758567214012146, "learning_rate": 2.825598021984569e-06, "loss": 0.0872, "step": 448150 }, { "epoch": 4.41, "grad_norm": 19.97162437438965, "learning_rate": 2.825473899530321e-06, "loss": 0.0938, "step": 448175 }, { "epoch": 4.41, "grad_norm": 5.509817600250244, "learning_rate": 2.8253497770760724e-06, "loss": 0.0712, "step": 448200 }, { "epoch": 4.41, "grad_norm": 25.34741973876953, "learning_rate": 2.8252256546218236e-06, "loss": 0.1236, "step": 448225 }, { "epoch": 4.41, "grad_norm": 5.331557750701904, "learning_rate": 2.8251015321675756e-06, "loss": 0.062, "step": 448250 }, { "epoch": 4.41, "grad_norm": 13.36833667755127, "learning_rate": 2.824977409713327e-06, "loss": 0.1505, "step": 448275 }, { "epoch": 4.41, "grad_norm": 4.418307304382324, "learning_rate": 2.8248532872590785e-06, "loss": 0.1028, "step": 448300 }, { "epoch": 4.41, "grad_norm": 6.273461818695068, "learning_rate": 2.82472916480483e-06, "loss": 0.0867, "step": 448325 }, { "epoch": 4.41, "grad_norm": 0.13257163763046265, "learning_rate": 2.8246050423505817e-06, "loss": 0.0725, "step": 448350 }, { "epoch": 4.41, "grad_norm": 12.823504447937012, "learning_rate": 2.824480919896333e-06, "loss": 0.1067, "step": 448375 }, { "epoch": 4.41, "grad_norm": 8.246255874633789, "learning_rate": 2.824356797442085e-06, "loss": 0.0674, "step": 448400 }, { "epoch": 4.41, "grad_norm": 7.912371635437012, "learning_rate": 2.8242326749878362e-06, "loss": 0.0821, "step": 448425 }, { "epoch": 4.41, "grad_norm": 4.93453311920166, "learning_rate": 2.8241085525335874e-06, "loss": 0.0675, "step": 448450 }, { "epoch": 4.41, "grad_norm": 6.038344860076904, "learning_rate": 2.8239844300793395e-06, "loss": 0.1052, "step": 448475 }, { "epoch": 4.41, "grad_norm": 1.170331597328186, "learning_rate": 2.8238603076250907e-06, "loss": 0.0852, "step": 448500 }, { "epoch": 4.41, "grad_norm": 11.209949493408203, "learning_rate": 2.8237361851708423e-06, "loss": 0.0997, "step": 448525 }, { "epoch": 4.41, "grad_norm": 1.2184611558914185, "learning_rate": 2.823612062716594e-06, "loss": 0.079, "step": 448550 }, { "epoch": 4.41, "grad_norm": 17.873931884765625, "learning_rate": 2.8234879402623456e-06, "loss": 0.1213, "step": 448575 }, { "epoch": 4.41, "grad_norm": 2.016118049621582, "learning_rate": 2.823363817808097e-06, "loss": 0.0779, "step": 448600 }, { "epoch": 4.41, "grad_norm": 17.055400848388672, "learning_rate": 2.823239695353849e-06, "loss": 0.1075, "step": 448625 }, { "epoch": 4.41, "grad_norm": 1.9267339706420898, "learning_rate": 2.8231155728996e-06, "loss": 0.0758, "step": 448650 }, { "epoch": 4.41, "grad_norm": 7.235929012298584, "learning_rate": 2.8229914504453513e-06, "loss": 0.0806, "step": 448675 }, { "epoch": 4.41, "grad_norm": 8.64980411529541, "learning_rate": 2.822867327991103e-06, "loss": 0.1, "step": 448700 }, { "epoch": 4.41, "grad_norm": 13.808500289916992, "learning_rate": 2.8227432055368546e-06, "loss": 0.0726, "step": 448725 }, { "epoch": 4.41, "grad_norm": 2.403208017349243, "learning_rate": 2.8226190830826062e-06, "loss": 0.0923, "step": 448750 }, { "epoch": 4.41, "grad_norm": 13.069537162780762, "learning_rate": 2.8224949606283574e-06, "loss": 0.1235, "step": 448775 }, { "epoch": 4.41, "grad_norm": 5.580321788787842, "learning_rate": 2.8223708381741095e-06, "loss": 0.0911, "step": 448800 }, { "epoch": 4.41, "grad_norm": 14.850290298461914, "learning_rate": 2.8222467157198607e-06, "loss": 0.0996, "step": 448825 }, { "epoch": 4.41, "grad_norm": 4.505225658416748, "learning_rate": 2.822122593265612e-06, "loss": 0.0916, "step": 448850 }, { "epoch": 4.41, "grad_norm": 13.440564155578613, "learning_rate": 2.821998470811364e-06, "loss": 0.1147, "step": 448875 }, { "epoch": 4.41, "grad_norm": 1.6077938079833984, "learning_rate": 2.821874348357115e-06, "loss": 0.0748, "step": 448900 }, { "epoch": 4.41, "grad_norm": 11.816680908203125, "learning_rate": 2.821750225902867e-06, "loss": 0.1099, "step": 448925 }, { "epoch": 4.41, "grad_norm": 1.9345163106918335, "learning_rate": 2.8216261034486184e-06, "loss": 0.0771, "step": 448950 }, { "epoch": 4.41, "grad_norm": 33.10020065307617, "learning_rate": 2.82150198099437e-06, "loss": 0.0933, "step": 448975 }, { "epoch": 4.41, "grad_norm": 0.5951722264289856, "learning_rate": 2.8213778585401213e-06, "loss": 0.0957, "step": 449000 }, { "epoch": 4.41, "grad_norm": 8.334065437316895, "learning_rate": 2.8212537360858734e-06, "loss": 0.0759, "step": 449025 }, { "epoch": 4.42, "grad_norm": 4.0743794441223145, "learning_rate": 2.8211296136316246e-06, "loss": 0.0831, "step": 449050 }, { "epoch": 4.42, "grad_norm": 16.061426162719727, "learning_rate": 2.821005491177376e-06, "loss": 0.1169, "step": 449075 }, { "epoch": 4.42, "grad_norm": 1.2576911449432373, "learning_rate": 2.820881368723128e-06, "loss": 0.0594, "step": 449100 }, { "epoch": 4.42, "grad_norm": 5.821888446807861, "learning_rate": 2.8207572462688795e-06, "loss": 0.0981, "step": 449125 }, { "epoch": 4.42, "grad_norm": 5.7361674308776855, "learning_rate": 2.8206331238146307e-06, "loss": 0.0771, "step": 449150 }, { "epoch": 4.42, "grad_norm": 9.750536918640137, "learning_rate": 2.8205090013603823e-06, "loss": 0.1182, "step": 449175 }, { "epoch": 4.42, "grad_norm": 0.5093942284584045, "learning_rate": 2.820384878906134e-06, "loss": 0.08, "step": 449200 }, { "epoch": 4.42, "grad_norm": 9.705814361572266, "learning_rate": 2.820260756451885e-06, "loss": 0.1267, "step": 449225 }, { "epoch": 4.42, "grad_norm": 5.851763725280762, "learning_rate": 2.8201366339976372e-06, "loss": 0.0538, "step": 449250 }, { "epoch": 4.42, "grad_norm": 15.012336730957031, "learning_rate": 2.8200125115433884e-06, "loss": 0.1114, "step": 449275 }, { "epoch": 4.42, "grad_norm": 4.085840225219727, "learning_rate": 2.81988838908914e-06, "loss": 0.0574, "step": 449300 }, { "epoch": 4.42, "grad_norm": 16.003189086914062, "learning_rate": 2.8197642666348917e-06, "loss": 0.1197, "step": 449325 }, { "epoch": 4.42, "grad_norm": 4.412835597991943, "learning_rate": 2.8196401441806433e-06, "loss": 0.0644, "step": 449350 }, { "epoch": 4.42, "grad_norm": 16.58310317993164, "learning_rate": 2.8195160217263945e-06, "loss": 0.1232, "step": 449375 }, { "epoch": 4.42, "grad_norm": 3.9755663871765137, "learning_rate": 2.8193918992721466e-06, "loss": 0.0713, "step": 449400 }, { "epoch": 4.42, "grad_norm": 9.407584190368652, "learning_rate": 2.819267776817898e-06, "loss": 0.1044, "step": 449425 }, { "epoch": 4.42, "grad_norm": 8.626413345336914, "learning_rate": 2.819143654363649e-06, "loss": 0.0778, "step": 449450 }, { "epoch": 4.42, "grad_norm": 8.191929817199707, "learning_rate": 2.819019531909401e-06, "loss": 0.132, "step": 449475 }, { "epoch": 4.42, "grad_norm": 0.05578860640525818, "learning_rate": 2.8188954094551523e-06, "loss": 0.0602, "step": 449500 }, { "epoch": 4.42, "grad_norm": 13.334073066711426, "learning_rate": 2.818771287000904e-06, "loss": 0.1139, "step": 449525 }, { "epoch": 4.42, "grad_norm": 7.178984642028809, "learning_rate": 2.818647164546655e-06, "loss": 0.0711, "step": 449550 }, { "epoch": 4.42, "grad_norm": 15.245808601379395, "learning_rate": 2.818523042092407e-06, "loss": 0.0921, "step": 449575 }, { "epoch": 4.42, "grad_norm": 8.399001121520996, "learning_rate": 2.8183989196381584e-06, "loss": 0.0903, "step": 449600 }, { "epoch": 4.42, "grad_norm": 19.5906982421875, "learning_rate": 2.8182747971839096e-06, "loss": 0.0983, "step": 449625 }, { "epoch": 4.42, "grad_norm": 2.21537709236145, "learning_rate": 2.8181506747296617e-06, "loss": 0.0669, "step": 449650 }, { "epoch": 4.42, "grad_norm": 11.005419731140137, "learning_rate": 2.818026552275413e-06, "loss": 0.1032, "step": 449675 }, { "epoch": 4.42, "grad_norm": 0.10851079970598221, "learning_rate": 2.8179024298211645e-06, "loss": 0.0552, "step": 449700 }, { "epoch": 4.42, "grad_norm": 17.582651138305664, "learning_rate": 2.817778307366916e-06, "loss": 0.1191, "step": 449725 }, { "epoch": 4.42, "grad_norm": 0.022237814962863922, "learning_rate": 2.817654184912668e-06, "loss": 0.0888, "step": 449750 }, { "epoch": 4.42, "grad_norm": 16.517559051513672, "learning_rate": 2.817530062458419e-06, "loss": 0.1145, "step": 449775 }, { "epoch": 4.42, "grad_norm": 5.089890003204346, "learning_rate": 2.817405940004171e-06, "loss": 0.0608, "step": 449800 }, { "epoch": 4.42, "grad_norm": 12.35774040222168, "learning_rate": 2.8172818175499223e-06, "loss": 0.08, "step": 449825 }, { "epoch": 4.42, "grad_norm": 1.047281265258789, "learning_rate": 2.8171576950956735e-06, "loss": 0.0977, "step": 449850 }, { "epoch": 4.42, "grad_norm": 18.15035629272461, "learning_rate": 2.8170335726414256e-06, "loss": 0.0979, "step": 449875 }, { "epoch": 4.42, "grad_norm": 2.3985543251037598, "learning_rate": 2.8169094501871768e-06, "loss": 0.0674, "step": 449900 }, { "epoch": 4.42, "grad_norm": 14.867704391479492, "learning_rate": 2.8167853277329284e-06, "loss": 0.1119, "step": 449925 }, { "epoch": 4.42, "grad_norm": 5.5675225257873535, "learning_rate": 2.81666120527868e-06, "loss": 0.0652, "step": 449950 }, { "epoch": 4.42, "grad_norm": 6.66520881652832, "learning_rate": 2.8165370828244317e-06, "loss": 0.0729, "step": 449975 }, { "epoch": 4.42, "grad_norm": 1.3704466819763184, "learning_rate": 2.816412960370183e-06, "loss": 0.0711, "step": 450000 }, { "epoch": 4.42, "grad_norm": 9.775802612304688, "learning_rate": 2.816288837915935e-06, "loss": 0.0709, "step": 450025 }, { "epoch": 4.42, "grad_norm": 4.5296478271484375, "learning_rate": 2.816164715461686e-06, "loss": 0.0693, "step": 450050 }, { "epoch": 4.43, "grad_norm": 12.423920631408691, "learning_rate": 2.8160405930074374e-06, "loss": 0.0745, "step": 450075 }, { "epoch": 4.43, "grad_norm": 15.477151870727539, "learning_rate": 2.8159164705531894e-06, "loss": 0.0695, "step": 450100 }, { "epoch": 4.43, "grad_norm": 12.205564498901367, "learning_rate": 2.8157923480989406e-06, "loss": 0.0997, "step": 450125 }, { "epoch": 4.43, "grad_norm": 0.2352578490972519, "learning_rate": 2.8156682256446923e-06, "loss": 0.1032, "step": 450150 }, { "epoch": 4.43, "grad_norm": 21.173860549926758, "learning_rate": 2.815544103190444e-06, "loss": 0.0977, "step": 450175 }, { "epoch": 4.43, "grad_norm": 1.4707770347595215, "learning_rate": 2.8154199807361955e-06, "loss": 0.0568, "step": 450200 }, { "epoch": 4.43, "grad_norm": 17.055742263793945, "learning_rate": 2.8152958582819467e-06, "loss": 0.1171, "step": 450225 }, { "epoch": 4.43, "grad_norm": 4.6062517166137695, "learning_rate": 2.815171735827699e-06, "loss": 0.0961, "step": 450250 }, { "epoch": 4.43, "grad_norm": 14.670044898986816, "learning_rate": 2.81504761337345e-06, "loss": 0.1136, "step": 450275 }, { "epoch": 4.43, "grad_norm": 3.2510061264038086, "learning_rate": 2.8149234909192012e-06, "loss": 0.0641, "step": 450300 }, { "epoch": 4.43, "grad_norm": 12.898760795593262, "learning_rate": 2.8147993684649533e-06, "loss": 0.114, "step": 450325 }, { "epoch": 4.43, "grad_norm": 3.5866849422454834, "learning_rate": 2.8146752460107045e-06, "loss": 0.0504, "step": 450350 }, { "epoch": 4.43, "grad_norm": 15.112586975097656, "learning_rate": 2.814551123556456e-06, "loss": 0.0773, "step": 450375 }, { "epoch": 4.43, "grad_norm": 1.0856562852859497, "learning_rate": 2.8144270011022078e-06, "loss": 0.0655, "step": 450400 }, { "epoch": 4.43, "grad_norm": 25.59027099609375, "learning_rate": 2.8143028786479594e-06, "loss": 0.125, "step": 450425 }, { "epoch": 4.43, "grad_norm": 1.799151062965393, "learning_rate": 2.8141787561937106e-06, "loss": 0.0751, "step": 450450 }, { "epoch": 4.43, "grad_norm": 11.47509479522705, "learning_rate": 2.814054633739462e-06, "loss": 0.0962, "step": 450475 }, { "epoch": 4.43, "grad_norm": 6.361744403839111, "learning_rate": 2.813930511285214e-06, "loss": 0.0713, "step": 450500 }, { "epoch": 4.43, "grad_norm": 22.076473236083984, "learning_rate": 2.813806388830965e-06, "loss": 0.1692, "step": 450525 }, { "epoch": 4.43, "grad_norm": 6.990599155426025, "learning_rate": 2.8136822663767167e-06, "loss": 0.0921, "step": 450550 }, { "epoch": 4.43, "grad_norm": 10.481950759887695, "learning_rate": 2.8135581439224684e-06, "loss": 0.0922, "step": 450575 }, { "epoch": 4.43, "grad_norm": 7.328751564025879, "learning_rate": 2.81343402146822e-06, "loss": 0.0875, "step": 450600 }, { "epoch": 4.43, "grad_norm": 14.50542163848877, "learning_rate": 2.8133098990139712e-06, "loss": 0.0879, "step": 450625 }, { "epoch": 4.43, "grad_norm": 0.6219955682754517, "learning_rate": 2.8131857765597233e-06, "loss": 0.0728, "step": 450650 }, { "epoch": 4.43, "grad_norm": 13.801007270812988, "learning_rate": 2.8130616541054745e-06, "loss": 0.0901, "step": 450675 }, { "epoch": 4.43, "grad_norm": 7.831782341003418, "learning_rate": 2.8129375316512257e-06, "loss": 0.0916, "step": 450700 }, { "epoch": 4.43, "grad_norm": 10.044265747070312, "learning_rate": 2.8128134091969778e-06, "loss": 0.1092, "step": 450725 }, { "epoch": 4.43, "grad_norm": 3.4239752292633057, "learning_rate": 2.812689286742729e-06, "loss": 0.0715, "step": 450750 }, { "epoch": 4.43, "grad_norm": 17.69380760192871, "learning_rate": 2.8125651642884806e-06, "loss": 0.1157, "step": 450775 }, { "epoch": 4.43, "grad_norm": 5.305530548095703, "learning_rate": 2.8124410418342322e-06, "loss": 0.0627, "step": 450800 }, { "epoch": 4.43, "grad_norm": 15.52424430847168, "learning_rate": 2.812316919379984e-06, "loss": 0.1006, "step": 450825 }, { "epoch": 4.43, "grad_norm": 0.10082103312015533, "learning_rate": 2.812192796925735e-06, "loss": 0.0749, "step": 450850 }, { "epoch": 4.43, "grad_norm": 10.370944023132324, "learning_rate": 2.812068674471487e-06, "loss": 0.106, "step": 450875 }, { "epoch": 4.43, "grad_norm": 6.2693772315979, "learning_rate": 2.8119445520172384e-06, "loss": 0.0697, "step": 450900 }, { "epoch": 4.43, "grad_norm": 22.978219985961914, "learning_rate": 2.8118204295629896e-06, "loss": 0.1253, "step": 450925 }, { "epoch": 4.43, "grad_norm": 1.2706242799758911, "learning_rate": 2.8116963071087416e-06, "loss": 0.0711, "step": 450950 }, { "epoch": 4.43, "grad_norm": 14.577985763549805, "learning_rate": 2.811572184654493e-06, "loss": 0.127, "step": 450975 }, { "epoch": 4.43, "grad_norm": 8.750838279724121, "learning_rate": 2.8114480622002445e-06, "loss": 0.0609, "step": 451000 }, { "epoch": 4.43, "grad_norm": 9.323803901672363, "learning_rate": 2.811323939745996e-06, "loss": 0.0829, "step": 451025 }, { "epoch": 4.43, "grad_norm": 5.272185325622559, "learning_rate": 2.8111998172917477e-06, "loss": 0.0767, "step": 451050 }, { "epoch": 4.44, "grad_norm": 7.66135835647583, "learning_rate": 2.811075694837499e-06, "loss": 0.0688, "step": 451075 }, { "epoch": 4.44, "grad_norm": 1.0813522338867188, "learning_rate": 2.810951572383251e-06, "loss": 0.0975, "step": 451100 }, { "epoch": 4.44, "grad_norm": 9.533188819885254, "learning_rate": 2.8108274499290022e-06, "loss": 0.1125, "step": 451125 }, { "epoch": 4.44, "grad_norm": 1.520459532737732, "learning_rate": 2.8107033274747534e-06, "loss": 0.0893, "step": 451150 }, { "epoch": 4.44, "grad_norm": 16.742938995361328, "learning_rate": 2.8105792050205055e-06, "loss": 0.1094, "step": 451175 }, { "epoch": 4.44, "grad_norm": 3.4142751693725586, "learning_rate": 2.8104550825662567e-06, "loss": 0.085, "step": 451200 }, { "epoch": 4.44, "grad_norm": 20.646507263183594, "learning_rate": 2.8103309601120083e-06, "loss": 0.1065, "step": 451225 }, { "epoch": 4.44, "grad_norm": 6.352124214172363, "learning_rate": 2.81020683765776e-06, "loss": 0.0745, "step": 451250 }, { "epoch": 4.44, "grad_norm": 11.077048301696777, "learning_rate": 2.8100827152035116e-06, "loss": 0.0853, "step": 451275 }, { "epoch": 4.44, "grad_norm": 5.167997360229492, "learning_rate": 2.8099635576474326e-06, "loss": 0.0689, "step": 451300 }, { "epoch": 4.44, "grad_norm": 7.869520664215088, "learning_rate": 2.8098394351931847e-06, "loss": 0.1204, "step": 451325 }, { "epoch": 4.44, "grad_norm": 7.147389888763428, "learning_rate": 2.809715312738936e-06, "loss": 0.0719, "step": 451350 }, { "epoch": 4.44, "grad_norm": 11.989776611328125, "learning_rate": 2.809591190284687e-06, "loss": 0.0989, "step": 451375 }, { "epoch": 4.44, "grad_norm": 5.595965385437012, "learning_rate": 2.809467067830439e-06, "loss": 0.0497, "step": 451400 }, { "epoch": 4.44, "grad_norm": 7.776183128356934, "learning_rate": 2.8093429453761904e-06, "loss": 0.111, "step": 451425 }, { "epoch": 4.44, "grad_norm": 5.753642559051514, "learning_rate": 2.809218822921942e-06, "loss": 0.0746, "step": 451450 }, { "epoch": 4.44, "grad_norm": 6.513875484466553, "learning_rate": 2.8090947004676937e-06, "loss": 0.1071, "step": 451475 }, { "epoch": 4.44, "grad_norm": 9.381633758544922, "learning_rate": 2.8089705780134453e-06, "loss": 0.0837, "step": 451500 }, { "epoch": 4.44, "grad_norm": 16.649518966674805, "learning_rate": 2.8088464555591965e-06, "loss": 0.1128, "step": 451525 }, { "epoch": 4.44, "grad_norm": 0.870826244354248, "learning_rate": 2.8087223331049486e-06, "loss": 0.0762, "step": 451550 }, { "epoch": 4.44, "grad_norm": 11.576827049255371, "learning_rate": 2.8085982106506998e-06, "loss": 0.1065, "step": 451575 }, { "epoch": 4.44, "grad_norm": 6.861343860626221, "learning_rate": 2.808474088196451e-06, "loss": 0.0681, "step": 451600 }, { "epoch": 4.44, "grad_norm": 16.830644607543945, "learning_rate": 2.808349965742203e-06, "loss": 0.0921, "step": 451625 }, { "epoch": 4.44, "grad_norm": 5.571023941040039, "learning_rate": 2.8082258432879543e-06, "loss": 0.0696, "step": 451650 }, { "epoch": 4.44, "grad_norm": 13.942535400390625, "learning_rate": 2.808101720833706e-06, "loss": 0.1093, "step": 451675 }, { "epoch": 4.44, "grad_norm": 5.922213077545166, "learning_rate": 2.8079775983794575e-06, "loss": 0.0936, "step": 451700 }, { "epoch": 4.44, "grad_norm": 15.554648399353027, "learning_rate": 2.807853475925209e-06, "loss": 0.1226, "step": 451725 }, { "epoch": 4.44, "grad_norm": 3.075798988342285, "learning_rate": 2.8077293534709604e-06, "loss": 0.0767, "step": 451750 }, { "epoch": 4.44, "grad_norm": 14.868996620178223, "learning_rate": 2.8076052310167124e-06, "loss": 0.1075, "step": 451775 }, { "epoch": 4.44, "grad_norm": 3.500976800918579, "learning_rate": 2.8074811085624636e-06, "loss": 0.0758, "step": 451800 }, { "epoch": 4.44, "grad_norm": 12.04432487487793, "learning_rate": 2.807356986108215e-06, "loss": 0.1223, "step": 451825 }, { "epoch": 4.44, "grad_norm": 0.1898859292268753, "learning_rate": 2.8072328636539665e-06, "loss": 0.0825, "step": 451850 }, { "epoch": 4.44, "grad_norm": 11.017634391784668, "learning_rate": 2.807108741199718e-06, "loss": 0.1314, "step": 451875 }, { "epoch": 4.44, "grad_norm": 2.011967182159424, "learning_rate": 2.8069846187454698e-06, "loss": 0.0719, "step": 451900 }, { "epoch": 4.44, "grad_norm": 14.490568161010742, "learning_rate": 2.806860496291221e-06, "loss": 0.1196, "step": 451925 }, { "epoch": 4.44, "grad_norm": 3.6527421474456787, "learning_rate": 2.806736373836973e-06, "loss": 0.0735, "step": 451950 }, { "epoch": 4.44, "grad_norm": 18.93103790283203, "learning_rate": 2.8066122513827242e-06, "loss": 0.1141, "step": 451975 }, { "epoch": 4.44, "grad_norm": 0.8660667538642883, "learning_rate": 2.806488128928476e-06, "loss": 0.0772, "step": 452000 }, { "epoch": 4.44, "grad_norm": 22.040863037109375, "learning_rate": 2.8063640064742275e-06, "loss": 0.1093, "step": 452025 }, { "epoch": 4.44, "grad_norm": 2.3814191818237305, "learning_rate": 2.806239884019979e-06, "loss": 0.0757, "step": 452050 }, { "epoch": 4.44, "grad_norm": 9.07811450958252, "learning_rate": 2.8061157615657304e-06, "loss": 0.1026, "step": 452075 }, { "epoch": 4.45, "grad_norm": 4.529189109802246, "learning_rate": 2.8059916391114824e-06, "loss": 0.0635, "step": 452100 }, { "epoch": 4.45, "grad_norm": 5.7521867752075195, "learning_rate": 2.8058675166572336e-06, "loss": 0.1407, "step": 452125 }, { "epoch": 4.45, "grad_norm": 6.057012557983398, "learning_rate": 2.805743394202985e-06, "loss": 0.0725, "step": 452150 }, { "epoch": 4.45, "grad_norm": 23.321544647216797, "learning_rate": 2.805619271748737e-06, "loss": 0.1036, "step": 452175 }, { "epoch": 4.45, "grad_norm": 7.709458827972412, "learning_rate": 2.805495149294488e-06, "loss": 0.0772, "step": 452200 }, { "epoch": 4.45, "grad_norm": 18.932811737060547, "learning_rate": 2.8053710268402397e-06, "loss": 0.1122, "step": 452225 }, { "epoch": 4.45, "grad_norm": 1.2237576246261597, "learning_rate": 2.8052469043859914e-06, "loss": 0.0792, "step": 452250 }, { "epoch": 4.45, "grad_norm": 7.461517333984375, "learning_rate": 2.805122781931743e-06, "loss": 0.0811, "step": 452275 }, { "epoch": 4.45, "grad_norm": 1.8131582736968994, "learning_rate": 2.8049986594774942e-06, "loss": 0.0875, "step": 452300 }, { "epoch": 4.45, "grad_norm": 6.4626054763793945, "learning_rate": 2.8048745370232463e-06, "loss": 0.094, "step": 452325 }, { "epoch": 4.45, "grad_norm": 6.33712100982666, "learning_rate": 2.8047504145689975e-06, "loss": 0.0846, "step": 452350 }, { "epoch": 4.45, "grad_norm": 9.209166526794434, "learning_rate": 2.8046262921147487e-06, "loss": 0.1192, "step": 452375 }, { "epoch": 4.45, "grad_norm": 0.8407480120658875, "learning_rate": 2.8045021696605008e-06, "loss": 0.0581, "step": 452400 }, { "epoch": 4.45, "grad_norm": 15.540273666381836, "learning_rate": 2.804378047206252e-06, "loss": 0.129, "step": 452425 }, { "epoch": 4.45, "grad_norm": 4.256284236907959, "learning_rate": 2.8042539247520036e-06, "loss": 0.0835, "step": 452450 }, { "epoch": 4.45, "grad_norm": 13.541424751281738, "learning_rate": 2.8041298022977552e-06, "loss": 0.1037, "step": 452475 }, { "epoch": 4.45, "grad_norm": 4.693929195404053, "learning_rate": 2.804005679843507e-06, "loss": 0.08, "step": 452500 }, { "epoch": 4.45, "grad_norm": 13.004108428955078, "learning_rate": 2.803881557389258e-06, "loss": 0.1079, "step": 452525 }, { "epoch": 4.45, "grad_norm": 5.765754222869873, "learning_rate": 2.80375743493501e-06, "loss": 0.0695, "step": 452550 }, { "epoch": 4.45, "grad_norm": 13.228178024291992, "learning_rate": 2.8036333124807614e-06, "loss": 0.0915, "step": 452575 }, { "epoch": 4.45, "grad_norm": 1.3874868154525757, "learning_rate": 2.8035091900265126e-06, "loss": 0.0724, "step": 452600 }, { "epoch": 4.45, "grad_norm": 14.2423677444458, "learning_rate": 2.8033850675722646e-06, "loss": 0.1287, "step": 452625 }, { "epoch": 4.45, "grad_norm": 3.735818386077881, "learning_rate": 2.803260945118016e-06, "loss": 0.0756, "step": 452650 }, { "epoch": 4.45, "grad_norm": 8.499383926391602, "learning_rate": 2.8031368226637675e-06, "loss": 0.1295, "step": 452675 }, { "epoch": 4.45, "grad_norm": 1.0008785724639893, "learning_rate": 2.8030127002095187e-06, "loss": 0.0773, "step": 452700 }, { "epoch": 4.45, "grad_norm": 13.61362075805664, "learning_rate": 2.8028885777552707e-06, "loss": 0.1298, "step": 452725 }, { "epoch": 4.45, "grad_norm": 9.167935371398926, "learning_rate": 2.802764455301022e-06, "loss": 0.0646, "step": 452750 }, { "epoch": 4.45, "grad_norm": 11.589906692504883, "learning_rate": 2.802640332846773e-06, "loss": 0.1325, "step": 452775 }, { "epoch": 4.45, "grad_norm": 4.525787830352783, "learning_rate": 2.8025162103925252e-06, "loss": 0.088, "step": 452800 }, { "epoch": 4.45, "grad_norm": 5.318804740905762, "learning_rate": 2.8023920879382764e-06, "loss": 0.0803, "step": 452825 }, { "epoch": 4.45, "grad_norm": 5.670190811157227, "learning_rate": 2.802267965484028e-06, "loss": 0.0686, "step": 452850 }, { "epoch": 4.45, "grad_norm": 7.675384521484375, "learning_rate": 2.8021438430297797e-06, "loss": 0.106, "step": 452875 }, { "epoch": 4.45, "grad_norm": 5.7398295402526855, "learning_rate": 2.8020197205755313e-06, "loss": 0.0643, "step": 452900 }, { "epoch": 4.45, "grad_norm": 22.29878044128418, "learning_rate": 2.8018955981212826e-06, "loss": 0.12, "step": 452925 }, { "epoch": 4.45, "grad_norm": 5.749244213104248, "learning_rate": 2.8017714756670346e-06, "loss": 0.0713, "step": 452950 }, { "epoch": 4.45, "grad_norm": 28.81816864013672, "learning_rate": 2.801647353212786e-06, "loss": 0.1177, "step": 452975 }, { "epoch": 4.45, "grad_norm": 6.393866539001465, "learning_rate": 2.801523230758537e-06, "loss": 0.0789, "step": 453000 }, { "epoch": 4.45, "grad_norm": 8.537224769592285, "learning_rate": 2.801399108304289e-06, "loss": 0.0623, "step": 453025 }, { "epoch": 4.45, "grad_norm": 2.5205414295196533, "learning_rate": 2.8012749858500403e-06, "loss": 0.0705, "step": 453050 }, { "epoch": 4.45, "grad_norm": 16.993865966796875, "learning_rate": 2.801150863395792e-06, "loss": 0.1288, "step": 453075 }, { "epoch": 4.45, "grad_norm": 1.4799935817718506, "learning_rate": 2.8010267409415436e-06, "loss": 0.0636, "step": 453100 }, { "epoch": 4.46, "grad_norm": 2.9893670082092285, "learning_rate": 2.800902618487295e-06, "loss": 0.114, "step": 453125 }, { "epoch": 4.46, "grad_norm": 1.2008190155029297, "learning_rate": 2.8007784960330464e-06, "loss": 0.0649, "step": 453150 }, { "epoch": 4.46, "grad_norm": 17.56352996826172, "learning_rate": 2.8006543735787985e-06, "loss": 0.0952, "step": 453175 }, { "epoch": 4.46, "grad_norm": 2.9391400814056396, "learning_rate": 2.8005302511245497e-06, "loss": 0.0619, "step": 453200 }, { "epoch": 4.46, "grad_norm": 5.311920642852783, "learning_rate": 2.800406128670301e-06, "loss": 0.1077, "step": 453225 }, { "epoch": 4.46, "grad_norm": 0.7261995077133179, "learning_rate": 2.800282006216053e-06, "loss": 0.0786, "step": 453250 }, { "epoch": 4.46, "grad_norm": 7.977014541625977, "learning_rate": 2.800157883761804e-06, "loss": 0.0958, "step": 453275 }, { "epoch": 4.46, "grad_norm": 8.339326858520508, "learning_rate": 2.800033761307556e-06, "loss": 0.0887, "step": 453300 }, { "epoch": 4.46, "grad_norm": 10.715819358825684, "learning_rate": 2.7999096388533074e-06, "loss": 0.119, "step": 453325 }, { "epoch": 4.46, "grad_norm": 6.086287975311279, "learning_rate": 2.799785516399059e-06, "loss": 0.0673, "step": 453350 }, { "epoch": 4.46, "grad_norm": 16.84010124206543, "learning_rate": 2.7996613939448103e-06, "loss": 0.0971, "step": 453375 }, { "epoch": 4.46, "grad_norm": 10.217887878417969, "learning_rate": 2.7995372714905623e-06, "loss": 0.0849, "step": 453400 }, { "epoch": 4.46, "grad_norm": 11.613944053649902, "learning_rate": 2.7994131490363136e-06, "loss": 0.1107, "step": 453425 }, { "epoch": 4.46, "grad_norm": 2.65865421295166, "learning_rate": 2.7992890265820648e-06, "loss": 0.0781, "step": 453450 }, { "epoch": 4.46, "grad_norm": 17.352693557739258, "learning_rate": 2.799164904127817e-06, "loss": 0.0931, "step": 453475 }, { "epoch": 4.46, "grad_norm": 1.1056544780731201, "learning_rate": 2.799040781673568e-06, "loss": 0.0639, "step": 453500 }, { "epoch": 4.46, "grad_norm": 14.943819999694824, "learning_rate": 2.7989166592193197e-06, "loss": 0.1121, "step": 453525 }, { "epoch": 4.46, "grad_norm": 4.250779628753662, "learning_rate": 2.798792536765071e-06, "loss": 0.0581, "step": 453550 }, { "epoch": 4.46, "grad_norm": 8.858343124389648, "learning_rate": 2.798668414310823e-06, "loss": 0.1093, "step": 453575 }, { "epoch": 4.46, "grad_norm": 10.10140323638916, "learning_rate": 2.798544291856574e-06, "loss": 0.087, "step": 453600 }, { "epoch": 4.46, "grad_norm": 18.727855682373047, "learning_rate": 2.7984201694023254e-06, "loss": 0.109, "step": 453625 }, { "epoch": 4.46, "grad_norm": 13.916041374206543, "learning_rate": 2.7982960469480774e-06, "loss": 0.0614, "step": 453650 }, { "epoch": 4.46, "grad_norm": 12.613020896911621, "learning_rate": 2.7981719244938286e-06, "loss": 0.1121, "step": 453675 }, { "epoch": 4.46, "grad_norm": 0.7683708071708679, "learning_rate": 2.7980478020395803e-06, "loss": 0.073, "step": 453700 }, { "epoch": 4.46, "grad_norm": 14.647188186645508, "learning_rate": 2.797923679585332e-06, "loss": 0.0983, "step": 453725 }, { "epoch": 4.46, "grad_norm": 0.058659836649894714, "learning_rate": 2.7977995571310835e-06, "loss": 0.0617, "step": 453750 }, { "epoch": 4.46, "grad_norm": 19.219423294067383, "learning_rate": 2.7976754346768348e-06, "loss": 0.113, "step": 453775 }, { "epoch": 4.46, "grad_norm": 4.227267742156982, "learning_rate": 2.797551312222587e-06, "loss": 0.0924, "step": 453800 }, { "epoch": 4.46, "grad_norm": 12.172576904296875, "learning_rate": 2.797427189768338e-06, "loss": 0.1141, "step": 453825 }, { "epoch": 4.46, "grad_norm": 4.061446666717529, "learning_rate": 2.7973030673140892e-06, "loss": 0.0756, "step": 453850 }, { "epoch": 4.46, "grad_norm": 17.18073844909668, "learning_rate": 2.7971789448598413e-06, "loss": 0.1041, "step": 453875 }, { "epoch": 4.46, "grad_norm": 6.365854740142822, "learning_rate": 2.7970548224055925e-06, "loss": 0.0424, "step": 453900 }, { "epoch": 4.46, "grad_norm": 10.503103256225586, "learning_rate": 2.796930699951344e-06, "loss": 0.0903, "step": 453925 }, { "epoch": 4.46, "grad_norm": 0.23923258483409882, "learning_rate": 2.7968065774970958e-06, "loss": 0.0894, "step": 453950 }, { "epoch": 4.46, "grad_norm": 24.4024658203125, "learning_rate": 2.7966824550428474e-06, "loss": 0.1105, "step": 453975 }, { "epoch": 4.46, "grad_norm": 3.2246713638305664, "learning_rate": 2.7965583325885986e-06, "loss": 0.0592, "step": 454000 }, { "epoch": 4.46, "grad_norm": 13.257168769836426, "learning_rate": 2.7964342101343507e-06, "loss": 0.1294, "step": 454025 }, { "epoch": 4.46, "grad_norm": 0.37786972522735596, "learning_rate": 2.796310087680102e-06, "loss": 0.0866, "step": 454050 }, { "epoch": 4.46, "grad_norm": 25.341533660888672, "learning_rate": 2.7961909301240238e-06, "loss": 0.1071, "step": 454075 }, { "epoch": 4.46, "grad_norm": 9.810532569885254, "learning_rate": 2.796066807669775e-06, "loss": 0.0809, "step": 454100 }, { "epoch": 4.47, "grad_norm": 10.13825511932373, "learning_rate": 2.795942685215526e-06, "loss": 0.1072, "step": 454125 }, { "epoch": 4.47, "grad_norm": 7.454688549041748, "learning_rate": 2.795818562761278e-06, "loss": 0.0808, "step": 454150 }, { "epoch": 4.47, "grad_norm": 13.109983444213867, "learning_rate": 2.7956944403070295e-06, "loss": 0.1028, "step": 454175 }, { "epoch": 4.47, "grad_norm": 3.142788887023926, "learning_rate": 2.795570317852781e-06, "loss": 0.0952, "step": 454200 }, { "epoch": 4.47, "grad_norm": 10.681123733520508, "learning_rate": 2.7954461953985323e-06, "loss": 0.0863, "step": 454225 }, { "epoch": 4.47, "grad_norm": 3.5614404678344727, "learning_rate": 2.7953220729442844e-06, "loss": 0.0731, "step": 454250 }, { "epoch": 4.47, "grad_norm": 11.473105430603027, "learning_rate": 2.7951979504900356e-06, "loss": 0.0985, "step": 454275 }, { "epoch": 4.47, "grad_norm": 3.227928638458252, "learning_rate": 2.7950738280357868e-06, "loss": 0.0787, "step": 454300 }, { "epoch": 4.47, "grad_norm": 12.873327255249023, "learning_rate": 2.794949705581539e-06, "loss": 0.1064, "step": 454325 }, { "epoch": 4.47, "grad_norm": 5.0526018142700195, "learning_rate": 2.79482558312729e-06, "loss": 0.0787, "step": 454350 }, { "epoch": 4.47, "grad_norm": 16.815752029418945, "learning_rate": 2.7947014606730417e-06, "loss": 0.1193, "step": 454375 }, { "epoch": 4.47, "grad_norm": 0.7820255756378174, "learning_rate": 2.7945773382187933e-06, "loss": 0.0722, "step": 454400 }, { "epoch": 4.47, "grad_norm": 15.207260131835938, "learning_rate": 2.794453215764545e-06, "loss": 0.1152, "step": 454425 }, { "epoch": 4.47, "grad_norm": 4.489170074462891, "learning_rate": 2.794329093310296e-06, "loss": 0.0953, "step": 454450 }, { "epoch": 4.47, "grad_norm": 18.113834381103516, "learning_rate": 2.7942049708560482e-06, "loss": 0.1277, "step": 454475 }, { "epoch": 4.47, "grad_norm": 7.922464847564697, "learning_rate": 2.7940808484017994e-06, "loss": 0.0819, "step": 454500 }, { "epoch": 4.47, "grad_norm": 11.774053573608398, "learning_rate": 2.7939567259475507e-06, "loss": 0.0842, "step": 454525 }, { "epoch": 4.47, "grad_norm": 5.6537370681762695, "learning_rate": 2.7938326034933027e-06, "loss": 0.0424, "step": 454550 }, { "epoch": 4.47, "grad_norm": 5.632954120635986, "learning_rate": 2.793708481039054e-06, "loss": 0.0946, "step": 454575 }, { "epoch": 4.47, "grad_norm": 5.255924701690674, "learning_rate": 2.7935843585848056e-06, "loss": 0.0952, "step": 454600 }, { "epoch": 4.47, "grad_norm": 10.796538352966309, "learning_rate": 2.793460236130557e-06, "loss": 0.1021, "step": 454625 }, { "epoch": 4.47, "grad_norm": 0.1891724318265915, "learning_rate": 2.793336113676309e-06, "loss": 0.092, "step": 454650 }, { "epoch": 4.47, "grad_norm": 16.3184757232666, "learning_rate": 2.79321199122206e-06, "loss": 0.1041, "step": 454675 }, { "epoch": 4.47, "grad_norm": 1.9560461044311523, "learning_rate": 2.793087868767812e-06, "loss": 0.0739, "step": 454700 }, { "epoch": 4.47, "grad_norm": 9.425487518310547, "learning_rate": 2.7929637463135633e-06, "loss": 0.078, "step": 454725 }, { "epoch": 4.47, "grad_norm": 9.66730785369873, "learning_rate": 2.7928396238593145e-06, "loss": 0.0911, "step": 454750 }, { "epoch": 4.47, "grad_norm": 6.954170227050781, "learning_rate": 2.7927155014050666e-06, "loss": 0.0976, "step": 454775 }, { "epoch": 4.47, "grad_norm": 4.897330284118652, "learning_rate": 2.792591378950818e-06, "loss": 0.0733, "step": 454800 }, { "epoch": 4.47, "grad_norm": 2.1335129737854004, "learning_rate": 2.7924672564965694e-06, "loss": 0.0829, "step": 454825 }, { "epoch": 4.47, "grad_norm": 11.042633056640625, "learning_rate": 2.792343134042321e-06, "loss": 0.0893, "step": 454850 }, { "epoch": 4.47, "grad_norm": 18.850360870361328, "learning_rate": 2.7922190115880727e-06, "loss": 0.0967, "step": 454875 }, { "epoch": 4.47, "grad_norm": 0.9282742142677307, "learning_rate": 2.792094889133824e-06, "loss": 0.0693, "step": 454900 }, { "epoch": 4.47, "grad_norm": 6.265355110168457, "learning_rate": 2.791970766679576e-06, "loss": 0.0881, "step": 454925 }, { "epoch": 4.47, "grad_norm": 2.5377964973449707, "learning_rate": 2.791846644225327e-06, "loss": 0.0534, "step": 454950 }, { "epoch": 4.47, "grad_norm": 14.331351280212402, "learning_rate": 2.791722521771079e-06, "loss": 0.1285, "step": 454975 }, { "epoch": 4.47, "grad_norm": 3.677297830581665, "learning_rate": 2.79159839931683e-06, "loss": 0.0753, "step": 455000 }, { "epoch": 4.47, "grad_norm": 9.939258575439453, "learning_rate": 2.791474276862582e-06, "loss": 0.144, "step": 455025 }, { "epoch": 4.47, "grad_norm": 4.344325542449951, "learning_rate": 2.7913501544083333e-06, "loss": 0.0818, "step": 455050 }, { "epoch": 4.47, "grad_norm": 21.207473754882812, "learning_rate": 2.7912260319540845e-06, "loss": 0.1288, "step": 455075 }, { "epoch": 4.47, "grad_norm": 5.672143936157227, "learning_rate": 2.7911019094998366e-06, "loss": 0.0792, "step": 455100 }, { "epoch": 4.47, "grad_norm": 13.344095230102539, "learning_rate": 2.7909777870455878e-06, "loss": 0.1127, "step": 455125 }, { "epoch": 4.48, "grad_norm": 6.343225002288818, "learning_rate": 2.7908536645913394e-06, "loss": 0.0781, "step": 455150 }, { "epoch": 4.48, "grad_norm": 20.890108108520508, "learning_rate": 2.790729542137091e-06, "loss": 0.1257, "step": 455175 }, { "epoch": 4.48, "grad_norm": 3.7827956676483154, "learning_rate": 2.7906054196828427e-06, "loss": 0.0655, "step": 455200 }, { "epoch": 4.48, "grad_norm": 8.39484977722168, "learning_rate": 2.790481297228594e-06, "loss": 0.1078, "step": 455225 }, { "epoch": 4.48, "grad_norm": 8.525965690612793, "learning_rate": 2.790357174774346e-06, "loss": 0.0624, "step": 455250 }, { "epoch": 4.48, "grad_norm": 12.277379989624023, "learning_rate": 2.790233052320097e-06, "loss": 0.0968, "step": 455275 }, { "epoch": 4.48, "grad_norm": 1.9262523651123047, "learning_rate": 2.7901089298658484e-06, "loss": 0.0703, "step": 455300 }, { "epoch": 4.48, "grad_norm": 17.462263107299805, "learning_rate": 2.7899848074116004e-06, "loss": 0.1191, "step": 455325 }, { "epoch": 4.48, "grad_norm": 4.461526870727539, "learning_rate": 2.7898606849573516e-06, "loss": 0.0888, "step": 455350 }, { "epoch": 4.48, "grad_norm": 14.8563814163208, "learning_rate": 2.7897365625031033e-06, "loss": 0.1082, "step": 455375 }, { "epoch": 4.48, "grad_norm": 5.2556257247924805, "learning_rate": 2.789612440048855e-06, "loss": 0.0679, "step": 455400 }, { "epoch": 4.48, "grad_norm": 19.099008560180664, "learning_rate": 2.7894883175946066e-06, "loss": 0.1246, "step": 455425 }, { "epoch": 4.48, "grad_norm": 7.301398754119873, "learning_rate": 2.7893641951403578e-06, "loss": 0.0832, "step": 455450 }, { "epoch": 4.48, "grad_norm": 19.075443267822266, "learning_rate": 2.78924007268611e-06, "loss": 0.1284, "step": 455475 }, { "epoch": 4.48, "grad_norm": 0.21456700563430786, "learning_rate": 2.789115950231861e-06, "loss": 0.065, "step": 455500 }, { "epoch": 4.48, "grad_norm": 11.996992111206055, "learning_rate": 2.7889918277776122e-06, "loss": 0.1024, "step": 455525 }, { "epoch": 4.48, "grad_norm": 4.110461711883545, "learning_rate": 2.7888677053233643e-06, "loss": 0.08, "step": 455550 }, { "epoch": 4.48, "grad_norm": 7.599008560180664, "learning_rate": 2.7887435828691155e-06, "loss": 0.1045, "step": 455575 }, { "epoch": 4.48, "grad_norm": 4.541342258453369, "learning_rate": 2.788619460414867e-06, "loss": 0.0755, "step": 455600 }, { "epoch": 4.48, "grad_norm": 8.396772384643555, "learning_rate": 2.7884953379606188e-06, "loss": 0.0931, "step": 455625 }, { "epoch": 4.48, "grad_norm": 4.153362274169922, "learning_rate": 2.7883712155063704e-06, "loss": 0.0658, "step": 455650 }, { "epoch": 4.48, "grad_norm": 6.305896282196045, "learning_rate": 2.7882470930521216e-06, "loss": 0.0808, "step": 455675 }, { "epoch": 4.48, "grad_norm": 0.7515016198158264, "learning_rate": 2.7881229705978737e-06, "loss": 0.0957, "step": 455700 }, { "epoch": 4.48, "grad_norm": 14.072905540466309, "learning_rate": 2.787998848143625e-06, "loss": 0.1078, "step": 455725 }, { "epoch": 4.48, "grad_norm": 5.914872169494629, "learning_rate": 2.787874725689376e-06, "loss": 0.0625, "step": 455750 }, { "epoch": 4.48, "grad_norm": 9.75040054321289, "learning_rate": 2.787750603235128e-06, "loss": 0.1111, "step": 455775 }, { "epoch": 4.48, "grad_norm": 0.25246572494506836, "learning_rate": 2.7876264807808794e-06, "loss": 0.079, "step": 455800 }, { "epoch": 4.48, "grad_norm": 11.326257705688477, "learning_rate": 2.787502358326631e-06, "loss": 0.1149, "step": 455825 }, { "epoch": 4.48, "grad_norm": 2.9089956283569336, "learning_rate": 2.7873782358723822e-06, "loss": 0.0862, "step": 455850 }, { "epoch": 4.48, "grad_norm": 12.788834571838379, "learning_rate": 2.7872541134181343e-06, "loss": 0.1158, "step": 455875 }, { "epoch": 4.48, "grad_norm": 23.192895889282227, "learning_rate": 2.7871299909638855e-06, "loss": 0.0867, "step": 455900 }, { "epoch": 4.48, "grad_norm": 21.854249954223633, "learning_rate": 2.7870058685096367e-06, "loss": 0.1115, "step": 455925 }, { "epoch": 4.48, "grad_norm": 0.12457820773124695, "learning_rate": 2.7868817460553888e-06, "loss": 0.073, "step": 455950 }, { "epoch": 4.48, "grad_norm": 9.409026145935059, "learning_rate": 2.78675762360114e-06, "loss": 0.0993, "step": 455975 }, { "epoch": 4.48, "grad_norm": 7.0814738273620605, "learning_rate": 2.7866335011468916e-06, "loss": 0.0779, "step": 456000 }, { "epoch": 4.48, "grad_norm": 6.586020469665527, "learning_rate": 2.7865093786926432e-06, "loss": 0.118, "step": 456025 }, { "epoch": 4.48, "grad_norm": 1.5919744968414307, "learning_rate": 2.786385256238395e-06, "loss": 0.0847, "step": 456050 }, { "epoch": 4.48, "grad_norm": 9.233675003051758, "learning_rate": 2.786261133784146e-06, "loss": 0.1065, "step": 456075 }, { "epoch": 4.48, "grad_norm": 0.7010667324066162, "learning_rate": 2.786137011329898e-06, "loss": 0.1086, "step": 456100 }, { "epoch": 4.48, "grad_norm": Infinity, "learning_rate": 2.786017853773819e-06, "loss": 0.1323, "step": 456125 }, { "epoch": 4.48, "grad_norm": 4.171468257904053, "learning_rate": 2.7858937313195712e-06, "loss": 0.0874, "step": 456150 }, { "epoch": 4.49, "grad_norm": 18.503414154052734, "learning_rate": 2.7857696088653225e-06, "loss": 0.1401, "step": 456175 }, { "epoch": 4.49, "grad_norm": 0.8239110112190247, "learning_rate": 2.7856454864110737e-06, "loss": 0.0707, "step": 456200 }, { "epoch": 4.49, "grad_norm": 17.151777267456055, "learning_rate": 2.7855213639568257e-06, "loss": 0.0844, "step": 456225 }, { "epoch": 4.49, "grad_norm": 1.1183106899261475, "learning_rate": 2.785397241502577e-06, "loss": 0.087, "step": 456250 }, { "epoch": 4.49, "grad_norm": 10.867612838745117, "learning_rate": 2.7852731190483286e-06, "loss": 0.1353, "step": 456275 }, { "epoch": 4.49, "grad_norm": 14.577186584472656, "learning_rate": 2.78514899659408e-06, "loss": 0.0828, "step": 456300 }, { "epoch": 4.49, "grad_norm": 10.745375633239746, "learning_rate": 2.785024874139832e-06, "loss": 0.0981, "step": 456325 }, { "epoch": 4.49, "grad_norm": 1.7314989566802979, "learning_rate": 2.784900751685583e-06, "loss": 0.0783, "step": 456350 }, { "epoch": 4.49, "grad_norm": 9.50405216217041, "learning_rate": 2.784776629231335e-06, "loss": 0.0973, "step": 456375 }, { "epoch": 4.49, "grad_norm": 1.0303354263305664, "learning_rate": 2.7846525067770863e-06, "loss": 0.0926, "step": 456400 }, { "epoch": 4.49, "grad_norm": 15.64710807800293, "learning_rate": 2.7845283843228375e-06, "loss": 0.1217, "step": 456425 }, { "epoch": 4.49, "grad_norm": 6.438575744628906, "learning_rate": 2.784404261868589e-06, "loss": 0.0942, "step": 456450 }, { "epoch": 4.49, "grad_norm": 12.034554481506348, "learning_rate": 2.784280139414341e-06, "loss": 0.0905, "step": 456475 }, { "epoch": 4.49, "grad_norm": 0.2934117317199707, "learning_rate": 2.7841560169600924e-06, "loss": 0.0888, "step": 456500 }, { "epoch": 4.49, "grad_norm": 15.45113754272461, "learning_rate": 2.7840318945058436e-06, "loss": 0.1157, "step": 456525 }, { "epoch": 4.49, "grad_norm": 3.472210645675659, "learning_rate": 2.7839077720515957e-06, "loss": 0.0736, "step": 456550 }, { "epoch": 4.49, "grad_norm": 9.573770523071289, "learning_rate": 2.783783649597347e-06, "loss": 0.1007, "step": 456575 }, { "epoch": 4.49, "grad_norm": 4.772800445556641, "learning_rate": 2.783659527143098e-06, "loss": 0.0732, "step": 456600 }, { "epoch": 4.49, "grad_norm": 12.292373657226562, "learning_rate": 2.78353540468885e-06, "loss": 0.1477, "step": 456625 }, { "epoch": 4.49, "grad_norm": 2.291055679321289, "learning_rate": 2.7834112822346014e-06, "loss": 0.0822, "step": 456650 }, { "epoch": 4.49, "grad_norm": 15.18777084350586, "learning_rate": 2.783287159780353e-06, "loss": 0.1251, "step": 456675 }, { "epoch": 4.49, "grad_norm": 0.486692875623703, "learning_rate": 2.7831630373261047e-06, "loss": 0.0616, "step": 456700 }, { "epoch": 4.49, "grad_norm": 12.973625183105469, "learning_rate": 2.7830389148718563e-06, "loss": 0.1285, "step": 456725 }, { "epoch": 4.49, "grad_norm": 1.6705981492996216, "learning_rate": 2.7829147924176075e-06, "loss": 0.0772, "step": 456750 }, { "epoch": 4.49, "grad_norm": 4.5053205490112305, "learning_rate": 2.7827906699633596e-06, "loss": 0.1327, "step": 456775 }, { "epoch": 4.49, "grad_norm": 4.722771167755127, "learning_rate": 2.7826665475091108e-06, "loss": 0.0953, "step": 456800 }, { "epoch": 4.49, "grad_norm": 14.409794807434082, "learning_rate": 2.782542425054862e-06, "loss": 0.1404, "step": 456825 }, { "epoch": 4.49, "grad_norm": 3.9078292846679688, "learning_rate": 2.782418302600614e-06, "loss": 0.0968, "step": 456850 }, { "epoch": 4.49, "grad_norm": 5.362343788146973, "learning_rate": 2.7822941801463653e-06, "loss": 0.126, "step": 456875 }, { "epoch": 4.49, "grad_norm": 0.6318978667259216, "learning_rate": 2.782170057692117e-06, "loss": 0.0784, "step": 456900 }, { "epoch": 4.49, "grad_norm": 11.9528226852417, "learning_rate": 2.7820459352378685e-06, "loss": 0.1194, "step": 456925 }, { "epoch": 4.49, "grad_norm": 4.653497219085693, "learning_rate": 2.78192181278362e-06, "loss": 0.0747, "step": 456950 }, { "epoch": 4.49, "grad_norm": 12.928133010864258, "learning_rate": 2.7817976903293714e-06, "loss": 0.0976, "step": 456975 }, { "epoch": 4.49, "grad_norm": 6.3983845710754395, "learning_rate": 2.7816735678751234e-06, "loss": 0.09, "step": 457000 }, { "epoch": 4.49, "grad_norm": 13.619693756103516, "learning_rate": 2.7815494454208747e-06, "loss": 0.1234, "step": 457025 }, { "epoch": 4.49, "grad_norm": 4.0578227043151855, "learning_rate": 2.781425322966626e-06, "loss": 0.0733, "step": 457050 }, { "epoch": 4.49, "grad_norm": 10.112156867980957, "learning_rate": 2.781301200512378e-06, "loss": 0.1173, "step": 457075 }, { "epoch": 4.49, "grad_norm": 0.16855861246585846, "learning_rate": 2.781177078058129e-06, "loss": 0.0669, "step": 457100 }, { "epoch": 4.49, "grad_norm": 16.833553314208984, "learning_rate": 2.7810529556038808e-06, "loss": 0.0982, "step": 457125 }, { "epoch": 4.49, "grad_norm": 5.049875736236572, "learning_rate": 2.7809288331496324e-06, "loss": 0.0698, "step": 457150 }, { "epoch": 4.5, "grad_norm": 8.868184089660645, "learning_rate": 2.780804710695384e-06, "loss": 0.0911, "step": 457175 }, { "epoch": 4.5, "grad_norm": 3.237558364868164, "learning_rate": 2.7806805882411352e-06, "loss": 0.0811, "step": 457200 }, { "epoch": 4.5, "grad_norm": 4.634446144104004, "learning_rate": 2.7805564657868873e-06, "loss": 0.1285, "step": 457225 }, { "epoch": 4.5, "grad_norm": 3.036898136138916, "learning_rate": 2.7804323433326385e-06, "loss": 0.0582, "step": 457250 }, { "epoch": 4.5, "grad_norm": 9.061800956726074, "learning_rate": 2.7803082208783897e-06, "loss": 0.1124, "step": 457275 }, { "epoch": 4.5, "grad_norm": 2.2173032760620117, "learning_rate": 2.7801840984241414e-06, "loss": 0.067, "step": 457300 }, { "epoch": 4.5, "grad_norm": 6.0430521965026855, "learning_rate": 2.780059975969893e-06, "loss": 0.1105, "step": 457325 }, { "epoch": 4.5, "grad_norm": 2.20908784866333, "learning_rate": 2.7799358535156446e-06, "loss": 0.0757, "step": 457350 }, { "epoch": 4.5, "grad_norm": 8.934723854064941, "learning_rate": 2.779811731061396e-06, "loss": 0.1126, "step": 457375 }, { "epoch": 4.5, "grad_norm": 8.699161529541016, "learning_rate": 2.779687608607148e-06, "loss": 0.0733, "step": 457400 }, { "epoch": 4.5, "grad_norm": 12.483787536621094, "learning_rate": 2.779563486152899e-06, "loss": 0.1091, "step": 457425 }, { "epoch": 4.5, "grad_norm": 1.8100100755691528, "learning_rate": 2.7794393636986503e-06, "loss": 0.0726, "step": 457450 }, { "epoch": 4.5, "grad_norm": 20.54300308227539, "learning_rate": 2.7793152412444024e-06, "loss": 0.1249, "step": 457475 }, { "epoch": 4.5, "grad_norm": 0.09374067932367325, "learning_rate": 2.7791911187901536e-06, "loss": 0.0762, "step": 457500 }, { "epoch": 4.5, "grad_norm": 15.981900215148926, "learning_rate": 2.7790669963359052e-06, "loss": 0.1232, "step": 457525 }, { "epoch": 4.5, "grad_norm": 11.65615177154541, "learning_rate": 2.778942873881657e-06, "loss": 0.0824, "step": 457550 }, { "epoch": 4.5, "grad_norm": 19.773029327392578, "learning_rate": 2.7788187514274085e-06, "loss": 0.1055, "step": 457575 }, { "epoch": 4.5, "grad_norm": 2.9705374240875244, "learning_rate": 2.7786946289731597e-06, "loss": 0.0514, "step": 457600 }, { "epoch": 4.5, "grad_norm": 22.428617477416992, "learning_rate": 2.7785705065189118e-06, "loss": 0.1105, "step": 457625 }, { "epoch": 4.5, "grad_norm": 7.026367664337158, "learning_rate": 2.778446384064663e-06, "loss": 0.0686, "step": 457650 }, { "epoch": 4.5, "grad_norm": 17.588319778442383, "learning_rate": 2.778322261610414e-06, "loss": 0.1411, "step": 457675 }, { "epoch": 4.5, "grad_norm": 6.031371593475342, "learning_rate": 2.7781981391561663e-06, "loss": 0.0545, "step": 457700 }, { "epoch": 4.5, "grad_norm": 11.226048469543457, "learning_rate": 2.7780740167019175e-06, "loss": 0.0855, "step": 457725 }, { "epoch": 4.5, "grad_norm": 6.546661376953125, "learning_rate": 2.777949894247669e-06, "loss": 0.0611, "step": 457750 }, { "epoch": 4.5, "grad_norm": 14.4788179397583, "learning_rate": 2.7778257717934207e-06, "loss": 0.11, "step": 457775 }, { "epoch": 4.5, "grad_norm": 11.546278953552246, "learning_rate": 2.7777016493391724e-06, "loss": 0.0871, "step": 457800 }, { "epoch": 4.5, "grad_norm": 12.20548152923584, "learning_rate": 2.7775775268849236e-06, "loss": 0.1022, "step": 457825 }, { "epoch": 4.5, "grad_norm": 3.304950475692749, "learning_rate": 2.7774534044306756e-06, "loss": 0.0734, "step": 457850 }, { "epoch": 4.5, "grad_norm": 11.494990348815918, "learning_rate": 2.777329281976427e-06, "loss": 0.1337, "step": 457875 }, { "epoch": 4.5, "grad_norm": 6.0197858810424805, "learning_rate": 2.7772051595221785e-06, "loss": 0.0694, "step": 457900 }, { "epoch": 4.5, "grad_norm": 17.14763069152832, "learning_rate": 2.77708103706793e-06, "loss": 0.1091, "step": 457925 }, { "epoch": 4.5, "grad_norm": 5.401466369628906, "learning_rate": 2.7769569146136818e-06, "loss": 0.1027, "step": 457950 }, { "epoch": 4.5, "grad_norm": 14.858786582946777, "learning_rate": 2.776832792159433e-06, "loss": 0.1124, "step": 457975 }, { "epoch": 4.5, "grad_norm": 12.427326202392578, "learning_rate": 2.776708669705185e-06, "loss": 0.0763, "step": 458000 }, { "epoch": 4.5, "grad_norm": 19.41539192199707, "learning_rate": 2.7765845472509362e-06, "loss": 0.1023, "step": 458025 }, { "epoch": 4.5, "grad_norm": 7.070279121398926, "learning_rate": 2.7764604247966875e-06, "loss": 0.0857, "step": 458050 }, { "epoch": 4.5, "grad_norm": 9.639434814453125, "learning_rate": 2.7763363023424395e-06, "loss": 0.0922, "step": 458075 }, { "epoch": 4.5, "grad_norm": 3.116657018661499, "learning_rate": 2.7762121798881907e-06, "loss": 0.0581, "step": 458100 }, { "epoch": 4.5, "grad_norm": 9.379036903381348, "learning_rate": 2.7760880574339424e-06, "loss": 0.0983, "step": 458125 }, { "epoch": 4.5, "grad_norm": 2.70509672164917, "learning_rate": 2.7759639349796936e-06, "loss": 0.0777, "step": 458150 }, { "epoch": 4.5, "grad_norm": 18.150043487548828, "learning_rate": 2.7758398125254456e-06, "loss": 0.1141, "step": 458175 }, { "epoch": 4.51, "grad_norm": 2.90022611618042, "learning_rate": 2.775715690071197e-06, "loss": 0.0727, "step": 458200 }, { "epoch": 4.51, "grad_norm": 12.139327049255371, "learning_rate": 2.775591567616948e-06, "loss": 0.1238, "step": 458225 }, { "epoch": 4.51, "grad_norm": 0.13050486147403717, "learning_rate": 2.7754674451627e-06, "loss": 0.0854, "step": 458250 }, { "epoch": 4.51, "grad_norm": 9.743023872375488, "learning_rate": 2.7753433227084513e-06, "loss": 0.1053, "step": 458275 }, { "epoch": 4.51, "grad_norm": 0.23192532360553741, "learning_rate": 2.775219200254203e-06, "loss": 0.0518, "step": 458300 }, { "epoch": 4.51, "grad_norm": 5.022729396820068, "learning_rate": 2.7750950777999546e-06, "loss": 0.1124, "step": 458325 }, { "epoch": 4.51, "grad_norm": 2.5443711280822754, "learning_rate": 2.7749709553457062e-06, "loss": 0.0695, "step": 458350 }, { "epoch": 4.51, "grad_norm": 13.586969375610352, "learning_rate": 2.7748468328914574e-06, "loss": 0.1246, "step": 458375 }, { "epoch": 4.51, "grad_norm": 3.483828544616699, "learning_rate": 2.7747227104372095e-06, "loss": 0.0674, "step": 458400 }, { "epoch": 4.51, "grad_norm": 9.708309173583984, "learning_rate": 2.7745985879829607e-06, "loss": 0.0968, "step": 458425 }, { "epoch": 4.51, "grad_norm": 8.297595977783203, "learning_rate": 2.774474465528712e-06, "loss": 0.0862, "step": 458450 }, { "epoch": 4.51, "grad_norm": 10.986157417297363, "learning_rate": 2.774350343074464e-06, "loss": 0.1295, "step": 458475 }, { "epoch": 4.51, "grad_norm": 4.673452854156494, "learning_rate": 2.774226220620215e-06, "loss": 0.0641, "step": 458500 }, { "epoch": 4.51, "grad_norm": 8.179126739501953, "learning_rate": 2.774102098165967e-06, "loss": 0.0863, "step": 458525 }, { "epoch": 4.51, "grad_norm": 5.946209907531738, "learning_rate": 2.7739779757117185e-06, "loss": 0.0785, "step": 458550 }, { "epoch": 4.51, "grad_norm": 12.873091697692871, "learning_rate": 2.77385385325747e-06, "loss": 0.1054, "step": 458575 }, { "epoch": 4.51, "grad_norm": 5.280994892120361, "learning_rate": 2.7737297308032213e-06, "loss": 0.068, "step": 458600 }, { "epoch": 4.51, "grad_norm": 6.455939292907715, "learning_rate": 2.7736056083489734e-06, "loss": 0.1224, "step": 458625 }, { "epoch": 4.51, "grad_norm": 9.004380226135254, "learning_rate": 2.7734814858947246e-06, "loss": 0.0997, "step": 458650 }, { "epoch": 4.51, "grad_norm": 18.234920501708984, "learning_rate": 2.7733573634404758e-06, "loss": 0.1152, "step": 458675 }, { "epoch": 4.51, "grad_norm": 5.124051094055176, "learning_rate": 2.773233240986228e-06, "loss": 0.0752, "step": 458700 }, { "epoch": 4.51, "grad_norm": 8.963199615478516, "learning_rate": 2.773109118531979e-06, "loss": 0.1282, "step": 458725 }, { "epoch": 4.51, "grad_norm": 4.2365241050720215, "learning_rate": 2.7729849960777307e-06, "loss": 0.0699, "step": 458750 }, { "epoch": 4.51, "grad_norm": 5.969352722167969, "learning_rate": 2.7728608736234823e-06, "loss": 0.0918, "step": 458775 }, { "epoch": 4.51, "grad_norm": 1.97771155834198, "learning_rate": 2.772736751169234e-06, "loss": 0.067, "step": 458800 }, { "epoch": 4.51, "grad_norm": 11.420557022094727, "learning_rate": 2.772612628714985e-06, "loss": 0.0814, "step": 458825 }, { "epoch": 4.51, "grad_norm": 4.899914264678955, "learning_rate": 2.7724885062607372e-06, "loss": 0.0733, "step": 458850 }, { "epoch": 4.51, "grad_norm": 20.979839324951172, "learning_rate": 2.7723643838064884e-06, "loss": 0.0973, "step": 458875 }, { "epoch": 4.51, "grad_norm": 5.271071434020996, "learning_rate": 2.7722452262504095e-06, "loss": 0.0614, "step": 458900 }, { "epoch": 4.51, "grad_norm": 10.493672370910645, "learning_rate": 2.7721211037961615e-06, "loss": 0.0987, "step": 458925 }, { "epoch": 4.51, "grad_norm": 4.7806172370910645, "learning_rate": 2.7719969813419127e-06, "loss": 0.0673, "step": 458950 }, { "epoch": 4.51, "grad_norm": 9.696800231933594, "learning_rate": 2.7718728588876644e-06, "loss": 0.1082, "step": 458975 }, { "epoch": 4.51, "grad_norm": 4.974608421325684, "learning_rate": 2.771748736433416e-06, "loss": 0.0657, "step": 459000 }, { "epoch": 4.51, "grad_norm": 8.689769744873047, "learning_rate": 2.7716246139791676e-06, "loss": 0.0717, "step": 459025 }, { "epoch": 4.51, "grad_norm": 3.942929983139038, "learning_rate": 2.771500491524919e-06, "loss": 0.0728, "step": 459050 }, { "epoch": 4.51, "grad_norm": 13.262232780456543, "learning_rate": 2.771376369070671e-06, "loss": 0.0989, "step": 459075 }, { "epoch": 4.51, "grad_norm": 2.4265174865722656, "learning_rate": 2.771252246616422e-06, "loss": 0.0875, "step": 459100 }, { "epoch": 4.51, "grad_norm": 16.10926055908203, "learning_rate": 2.7711281241621733e-06, "loss": 0.1114, "step": 459125 }, { "epoch": 4.51, "grad_norm": 2.952991247177124, "learning_rate": 2.7710040017079254e-06, "loss": 0.1023, "step": 459150 }, { "epoch": 4.51, "grad_norm": 8.411604881286621, "learning_rate": 2.7708798792536766e-06, "loss": 0.0968, "step": 459175 }, { "epoch": 4.51, "grad_norm": 0.40566250681877136, "learning_rate": 2.7707557567994282e-06, "loss": 0.0767, "step": 459200 }, { "epoch": 4.52, "grad_norm": 26.7725887298584, "learning_rate": 2.77063163434518e-06, "loss": 0.1029, "step": 459225 }, { "epoch": 4.52, "grad_norm": 2.499269962310791, "learning_rate": 2.7705075118909315e-06, "loss": 0.0609, "step": 459250 }, { "epoch": 4.52, "grad_norm": 16.430269241333008, "learning_rate": 2.7703833894366827e-06, "loss": 0.1205, "step": 459275 }, { "epoch": 4.52, "grad_norm": 3.4999172687530518, "learning_rate": 2.7702592669824348e-06, "loss": 0.0841, "step": 459300 }, { "epoch": 4.52, "grad_norm": 13.754328727722168, "learning_rate": 2.770135144528186e-06, "loss": 0.1269, "step": 459325 }, { "epoch": 4.52, "grad_norm": 2.5079710483551025, "learning_rate": 2.770011022073937e-06, "loss": 0.0621, "step": 459350 }, { "epoch": 4.52, "grad_norm": 12.034015655517578, "learning_rate": 2.7698868996196893e-06, "loss": 0.1107, "step": 459375 }, { "epoch": 4.52, "grad_norm": 1.892410397529602, "learning_rate": 2.7697627771654405e-06, "loss": 0.0578, "step": 459400 }, { "epoch": 4.52, "grad_norm": 16.835113525390625, "learning_rate": 2.769638654711192e-06, "loss": 0.1084, "step": 459425 }, { "epoch": 4.52, "grad_norm": 1.031327486038208, "learning_rate": 2.7695145322569437e-06, "loss": 0.0766, "step": 459450 }, { "epoch": 4.52, "grad_norm": 10.942011833190918, "learning_rate": 2.7693904098026954e-06, "loss": 0.1302, "step": 459475 }, { "epoch": 4.52, "grad_norm": 1.636289119720459, "learning_rate": 2.7692662873484466e-06, "loss": 0.0813, "step": 459500 }, { "epoch": 4.52, "grad_norm": 13.593072891235352, "learning_rate": 2.7691421648941986e-06, "loss": 0.1138, "step": 459525 }, { "epoch": 4.52, "grad_norm": 7.9259033203125, "learning_rate": 2.76901804243995e-06, "loss": 0.0708, "step": 459550 }, { "epoch": 4.52, "grad_norm": 9.046977043151855, "learning_rate": 2.768893919985701e-06, "loss": 0.0919, "step": 459575 }, { "epoch": 4.52, "grad_norm": 0.9416574239730835, "learning_rate": 2.7687697975314527e-06, "loss": 0.0888, "step": 459600 }, { "epoch": 4.52, "grad_norm": 18.23567771911621, "learning_rate": 2.7686456750772043e-06, "loss": 0.0947, "step": 459625 }, { "epoch": 4.52, "grad_norm": 4.4012532234191895, "learning_rate": 2.768521552622956e-06, "loss": 0.0814, "step": 459650 }, { "epoch": 4.52, "grad_norm": 16.33942985534668, "learning_rate": 2.768397430168707e-06, "loss": 0.0887, "step": 459675 }, { "epoch": 4.52, "grad_norm": 10.42885971069336, "learning_rate": 2.7682733077144592e-06, "loss": 0.0842, "step": 459700 }, { "epoch": 4.52, "grad_norm": 19.933794021606445, "learning_rate": 2.7681491852602105e-06, "loss": 0.1296, "step": 459725 }, { "epoch": 4.52, "grad_norm": 7.000664234161377, "learning_rate": 2.7680250628059617e-06, "loss": 0.0628, "step": 459750 }, { "epoch": 4.52, "grad_norm": 10.598206520080566, "learning_rate": 2.7679009403517137e-06, "loss": 0.0946, "step": 459775 }, { "epoch": 4.52, "grad_norm": 1.7253628969192505, "learning_rate": 2.767776817897465e-06, "loss": 0.0477, "step": 459800 }, { "epoch": 4.52, "grad_norm": 12.844758987426758, "learning_rate": 2.7676526954432166e-06, "loss": 0.1151, "step": 459825 }, { "epoch": 4.52, "grad_norm": 6.611273765563965, "learning_rate": 2.767528572988968e-06, "loss": 0.0842, "step": 459850 }, { "epoch": 4.52, "grad_norm": 6.722512245178223, "learning_rate": 2.76740445053472e-06, "loss": 0.1367, "step": 459875 }, { "epoch": 4.52, "grad_norm": 3.1312241554260254, "learning_rate": 2.767280328080471e-06, "loss": 0.091, "step": 459900 }, { "epoch": 4.52, "grad_norm": 24.52311897277832, "learning_rate": 2.767156205626223e-06, "loss": 0.1164, "step": 459925 }, { "epoch": 4.52, "grad_norm": 12.660104751586914, "learning_rate": 2.7670320831719743e-06, "loss": 0.0836, "step": 459950 }, { "epoch": 4.52, "grad_norm": 14.959395408630371, "learning_rate": 2.7669079607177255e-06, "loss": 0.1199, "step": 459975 }, { "epoch": 4.52, "grad_norm": 2.893411874771118, "learning_rate": 2.7667838382634776e-06, "loss": 0.0708, "step": 460000 }, { "epoch": 4.52, "eval_loss": 0.7141024470329285, "eval_runtime": 6109.4348, "eval_samples_per_second": 1.55, "eval_steps_per_second": 0.194, "eval_wer": 0.11707332775381446, "step": 460000 }, { "epoch": 4.52, "grad_norm": 10.282303810119629, "learning_rate": 2.766659715809229e-06, "loss": 0.0881, "step": 460025 }, { "epoch": 4.52, "grad_norm": 1.1769158840179443, "learning_rate": 2.7665355933549804e-06, "loss": 0.0733, "step": 460050 }, { "epoch": 4.52, "grad_norm": 2.260364055633545, "learning_rate": 2.766411470900732e-06, "loss": 0.0982, "step": 460075 }, { "epoch": 4.52, "grad_norm": 1.1263141632080078, "learning_rate": 2.7662873484464837e-06, "loss": 0.0708, "step": 460100 }, { "epoch": 4.52, "grad_norm": 12.271966934204102, "learning_rate": 2.766163225992235e-06, "loss": 0.0967, "step": 460125 }, { "epoch": 4.52, "grad_norm": 0.9465341567993164, "learning_rate": 2.766039103537987e-06, "loss": 0.0845, "step": 460150 }, { "epoch": 4.52, "grad_norm": 6.486082077026367, "learning_rate": 2.765914981083738e-06, "loss": 0.0855, "step": 460175 }, { "epoch": 4.52, "grad_norm": 8.602561950683594, "learning_rate": 2.7657908586294894e-06, "loss": 0.085, "step": 460200 }, { "epoch": 4.53, "grad_norm": 12.968403816223145, "learning_rate": 2.7656667361752415e-06, "loss": 0.1207, "step": 460225 }, { "epoch": 4.53, "grad_norm": 6.605525970458984, "learning_rate": 2.7655426137209927e-06, "loss": 0.0669, "step": 460250 }, { "epoch": 4.53, "grad_norm": 11.240764617919922, "learning_rate": 2.7654184912667443e-06, "loss": 0.1075, "step": 460275 }, { "epoch": 4.53, "grad_norm": 1.6854246854782104, "learning_rate": 2.765294368812496e-06, "loss": 0.0866, "step": 460300 }, { "epoch": 4.53, "grad_norm": 10.286579132080078, "learning_rate": 2.7651702463582476e-06, "loss": 0.1147, "step": 460325 }, { "epoch": 4.53, "grad_norm": 8.018974304199219, "learning_rate": 2.765046123903999e-06, "loss": 0.0751, "step": 460350 }, { "epoch": 4.53, "grad_norm": 17.368059158325195, "learning_rate": 2.764922001449751e-06, "loss": 0.11, "step": 460375 }, { "epoch": 4.53, "grad_norm": 3.1228675842285156, "learning_rate": 2.764797878995502e-06, "loss": 0.0776, "step": 460400 }, { "epoch": 4.53, "grad_norm": 8.440079689025879, "learning_rate": 2.7646737565412533e-06, "loss": 0.0984, "step": 460425 }, { "epoch": 4.53, "grad_norm": 2.00744366645813, "learning_rate": 2.764549634087005e-06, "loss": 0.0871, "step": 460450 }, { "epoch": 4.53, "grad_norm": 11.237653732299805, "learning_rate": 2.7644255116327565e-06, "loss": 0.1148, "step": 460475 }, { "epoch": 4.53, "grad_norm": 2.515241861343384, "learning_rate": 2.764301389178508e-06, "loss": 0.0657, "step": 460500 }, { "epoch": 4.53, "grad_norm": 14.132705688476562, "learning_rate": 2.7641772667242594e-06, "loss": 0.1111, "step": 460525 }, { "epoch": 4.53, "grad_norm": 5.82793664932251, "learning_rate": 2.7640531442700114e-06, "loss": 0.0688, "step": 460550 }, { "epoch": 4.53, "grad_norm": 10.275362968444824, "learning_rate": 2.7639290218157627e-06, "loss": 0.0855, "step": 460575 }, { "epoch": 4.53, "grad_norm": 1.6271475553512573, "learning_rate": 2.7638048993615143e-06, "loss": 0.064, "step": 460600 }, { "epoch": 4.53, "grad_norm": 18.0947208404541, "learning_rate": 2.763680776907266e-06, "loss": 0.0887, "step": 460625 }, { "epoch": 4.53, "grad_norm": 2.0089573860168457, "learning_rate": 2.7635566544530176e-06, "loss": 0.1168, "step": 460650 }, { "epoch": 4.53, "grad_norm": 14.911259651184082, "learning_rate": 2.7634325319987688e-06, "loss": 0.1138, "step": 460675 }, { "epoch": 4.53, "grad_norm": 6.690704822540283, "learning_rate": 2.7633084095445204e-06, "loss": 0.0652, "step": 460700 }, { "epoch": 4.53, "grad_norm": 19.457500457763672, "learning_rate": 2.763184287090272e-06, "loss": 0.1204, "step": 460725 }, { "epoch": 4.53, "grad_norm": 3.5037598609924316, "learning_rate": 2.7630601646360233e-06, "loss": 0.093, "step": 460750 }, { "epoch": 4.53, "grad_norm": 12.023602485656738, "learning_rate": 2.7629360421817753e-06, "loss": 0.1084, "step": 460775 }, { "epoch": 4.53, "grad_norm": 3.959057569503784, "learning_rate": 2.7628119197275265e-06, "loss": 0.1056, "step": 460800 }, { "epoch": 4.53, "grad_norm": 25.457693099975586, "learning_rate": 2.762687797273278e-06, "loss": 0.1215, "step": 460825 }, { "epoch": 4.53, "grad_norm": 2.3165197372436523, "learning_rate": 2.76256367481903e-06, "loss": 0.0719, "step": 460850 }, { "epoch": 4.53, "grad_norm": 8.417646408081055, "learning_rate": 2.7624395523647814e-06, "loss": 0.1173, "step": 460875 }, { "epoch": 4.53, "grad_norm": 4.868514060974121, "learning_rate": 2.7623154299105326e-06, "loss": 0.0715, "step": 460900 }, { "epoch": 4.53, "grad_norm": 8.664745330810547, "learning_rate": 2.7621913074562847e-06, "loss": 0.1234, "step": 460925 }, { "epoch": 4.53, "grad_norm": 10.08087158203125, "learning_rate": 2.762067185002036e-06, "loss": 0.0955, "step": 460950 }, { "epoch": 4.53, "grad_norm": 7.950956344604492, "learning_rate": 2.761943062547787e-06, "loss": 0.1377, "step": 460975 }, { "epoch": 4.53, "grad_norm": 3.4972386360168457, "learning_rate": 2.761818940093539e-06, "loss": 0.0876, "step": 461000 }, { "epoch": 4.53, "grad_norm": 18.649560928344727, "learning_rate": 2.7616948176392904e-06, "loss": 0.1394, "step": 461025 }, { "epoch": 4.53, "grad_norm": 2.920078754425049, "learning_rate": 2.761570695185042e-06, "loss": 0.0508, "step": 461050 }, { "epoch": 4.53, "grad_norm": 9.23044204711914, "learning_rate": 2.7614465727307937e-06, "loss": 0.1228, "step": 461075 }, { "epoch": 4.53, "grad_norm": 2.5190727710723877, "learning_rate": 2.7613224502765453e-06, "loss": 0.0882, "step": 461100 }, { "epoch": 4.53, "grad_norm": 7.407963752746582, "learning_rate": 2.7611983278222965e-06, "loss": 0.0988, "step": 461125 }, { "epoch": 4.53, "grad_norm": 0.5618082880973816, "learning_rate": 2.7610742053680486e-06, "loss": 0.0675, "step": 461150 }, { "epoch": 4.53, "grad_norm": 7.516798496246338, "learning_rate": 2.7609500829137998e-06, "loss": 0.1013, "step": 461175 }, { "epoch": 4.53, "grad_norm": 0.9552844166755676, "learning_rate": 2.760825960459551e-06, "loss": 0.0764, "step": 461200 }, { "epoch": 4.53, "grad_norm": 7.236693859100342, "learning_rate": 2.760706802903473e-06, "loss": 0.0721, "step": 461225 }, { "epoch": 4.54, "grad_norm": 4.431150913238525, "learning_rate": 2.760582680449224e-06, "loss": 0.0817, "step": 461250 }, { "epoch": 4.54, "grad_norm": 18.398923873901367, "learning_rate": 2.7604585579949757e-06, "loss": 0.1034, "step": 461275 }, { "epoch": 4.54, "grad_norm": 4.143792152404785, "learning_rate": 2.7603344355407273e-06, "loss": 0.0762, "step": 461300 }, { "epoch": 4.54, "grad_norm": 19.14529800415039, "learning_rate": 2.760210313086479e-06, "loss": 0.1221, "step": 461325 }, { "epoch": 4.54, "grad_norm": 4.404816627502441, "learning_rate": 2.76008619063223e-06, "loss": 0.0916, "step": 461350 }, { "epoch": 4.54, "grad_norm": 17.7263126373291, "learning_rate": 2.7599620681779823e-06, "loss": 0.1109, "step": 461375 }, { "epoch": 4.54, "grad_norm": 6.2283453941345215, "learning_rate": 2.7598379457237335e-06, "loss": 0.0763, "step": 461400 }, { "epoch": 4.54, "grad_norm": 12.57240104675293, "learning_rate": 2.7597138232694847e-06, "loss": 0.1156, "step": 461425 }, { "epoch": 4.54, "grad_norm": 5.795522212982178, "learning_rate": 2.7595897008152367e-06, "loss": 0.0722, "step": 461450 }, { "epoch": 4.54, "grad_norm": 23.146312713623047, "learning_rate": 2.759465578360988e-06, "loss": 0.1161, "step": 461475 }, { "epoch": 4.54, "grad_norm": 4.5646653175354, "learning_rate": 2.7593414559067396e-06, "loss": 0.0471, "step": 461500 }, { "epoch": 4.54, "grad_norm": 22.950654983520508, "learning_rate": 2.7592173334524912e-06, "loss": 0.1023, "step": 461525 }, { "epoch": 4.54, "grad_norm": 6.168232440948486, "learning_rate": 2.759093210998243e-06, "loss": 0.0597, "step": 461550 }, { "epoch": 4.54, "grad_norm": 14.00244140625, "learning_rate": 2.758969088543994e-06, "loss": 0.1004, "step": 461575 }, { "epoch": 4.54, "grad_norm": 6.0483078956604, "learning_rate": 2.758844966089746e-06, "loss": 0.0883, "step": 461600 }, { "epoch": 4.54, "grad_norm": 13.916001319885254, "learning_rate": 2.7587208436354973e-06, "loss": 0.095, "step": 461625 }, { "epoch": 4.54, "grad_norm": 6.3443145751953125, "learning_rate": 2.7585967211812485e-06, "loss": 0.077, "step": 461650 }, { "epoch": 4.54, "grad_norm": 8.873587608337402, "learning_rate": 2.7584725987270006e-06, "loss": 0.0945, "step": 461675 }, { "epoch": 4.54, "grad_norm": 4.197493553161621, "learning_rate": 2.758348476272752e-06, "loss": 0.0953, "step": 461700 }, { "epoch": 4.54, "grad_norm": 14.470486640930176, "learning_rate": 2.7582243538185034e-06, "loss": 0.1525, "step": 461725 }, { "epoch": 4.54, "grad_norm": 8.230555534362793, "learning_rate": 2.758100231364255e-06, "loss": 0.0789, "step": 461750 }, { "epoch": 4.54, "grad_norm": 22.753528594970703, "learning_rate": 2.7579761089100067e-06, "loss": 0.0964, "step": 461775 }, { "epoch": 4.54, "grad_norm": 4.3033671379089355, "learning_rate": 2.757851986455758e-06, "loss": 0.0738, "step": 461800 }, { "epoch": 4.54, "grad_norm": 8.818116188049316, "learning_rate": 2.75772786400151e-06, "loss": 0.1043, "step": 461825 }, { "epoch": 4.54, "grad_norm": 2.841874837875366, "learning_rate": 2.757603741547261e-06, "loss": 0.0692, "step": 461850 }, { "epoch": 4.54, "grad_norm": 16.494552612304688, "learning_rate": 2.7574796190930124e-06, "loss": 0.1101, "step": 461875 }, { "epoch": 4.54, "grad_norm": 6.732834339141846, "learning_rate": 2.757355496638764e-06, "loss": 0.058, "step": 461900 }, { "epoch": 4.54, "grad_norm": 27.96565055847168, "learning_rate": 2.7572313741845157e-06, "loss": 0.1007, "step": 461925 }, { "epoch": 4.54, "grad_norm": 8.849324226379395, "learning_rate": 2.7571072517302673e-06, "loss": 0.0935, "step": 461950 }, { "epoch": 4.54, "grad_norm": 7.081066131591797, "learning_rate": 2.7569831292760185e-06, "loss": 0.0712, "step": 461975 }, { "epoch": 4.54, "grad_norm": 4.405538082122803, "learning_rate": 2.7568590068217706e-06, "loss": 0.0748, "step": 462000 }, { "epoch": 4.54, "grad_norm": 14.764602661132812, "learning_rate": 2.756734884367522e-06, "loss": 0.0784, "step": 462025 }, { "epoch": 4.54, "grad_norm": 0.13710105419158936, "learning_rate": 2.756610761913273e-06, "loss": 0.0804, "step": 462050 }, { "epoch": 4.54, "grad_norm": 11.614448547363281, "learning_rate": 2.756486639459025e-06, "loss": 0.0862, "step": 462075 }, { "epoch": 4.54, "grad_norm": 5.987518310546875, "learning_rate": 2.7563625170047763e-06, "loss": 0.0607, "step": 462100 }, { "epoch": 4.54, "grad_norm": 17.2475528717041, "learning_rate": 2.756238394550528e-06, "loss": 0.099, "step": 462125 }, { "epoch": 4.54, "grad_norm": 3.878100872039795, "learning_rate": 2.7561142720962795e-06, "loss": 0.0556, "step": 462150 }, { "epoch": 4.54, "grad_norm": 15.902640342712402, "learning_rate": 2.755990149642031e-06, "loss": 0.1037, "step": 462175 }, { "epoch": 4.54, "grad_norm": 9.073851585388184, "learning_rate": 2.7558660271877824e-06, "loss": 0.1121, "step": 462200 }, { "epoch": 4.54, "grad_norm": 5.04016637802124, "learning_rate": 2.7557419047335345e-06, "loss": 0.0985, "step": 462225 }, { "epoch": 4.54, "grad_norm": 8.943175315856934, "learning_rate": 2.7556177822792857e-06, "loss": 0.0769, "step": 462250 }, { "epoch": 4.55, "grad_norm": 12.244941711425781, "learning_rate": 2.755493659825037e-06, "loss": 0.1093, "step": 462275 }, { "epoch": 4.55, "grad_norm": 2.793147563934326, "learning_rate": 2.755369537370789e-06, "loss": 0.0783, "step": 462300 }, { "epoch": 4.55, "grad_norm": 11.224201202392578, "learning_rate": 2.75524541491654e-06, "loss": 0.0677, "step": 462325 }, { "epoch": 4.55, "grad_norm": 1.8514716625213623, "learning_rate": 2.7551212924622918e-06, "loss": 0.0981, "step": 462350 }, { "epoch": 4.55, "grad_norm": 13.863903045654297, "learning_rate": 2.7549971700080434e-06, "loss": 0.1252, "step": 462375 }, { "epoch": 4.55, "grad_norm": 4.486522197723389, "learning_rate": 2.754873047553795e-06, "loss": 0.0587, "step": 462400 }, { "epoch": 4.55, "grad_norm": 10.710160255432129, "learning_rate": 2.7547489250995463e-06, "loss": 0.1007, "step": 462425 }, { "epoch": 4.55, "grad_norm": 5.161154747009277, "learning_rate": 2.7546248026452983e-06, "loss": 0.0762, "step": 462450 }, { "epoch": 4.55, "grad_norm": 20.14179039001465, "learning_rate": 2.7545006801910495e-06, "loss": 0.1168, "step": 462475 }, { "epoch": 4.55, "grad_norm": 3.158855676651001, "learning_rate": 2.7543765577368007e-06, "loss": 0.0799, "step": 462500 }, { "epoch": 4.55, "grad_norm": 13.587406158447266, "learning_rate": 2.754252435282553e-06, "loss": 0.1045, "step": 462525 }, { "epoch": 4.55, "grad_norm": 0.08883073925971985, "learning_rate": 2.754128312828304e-06, "loss": 0.0907, "step": 462550 }, { "epoch": 4.55, "grad_norm": 11.811948776245117, "learning_rate": 2.7540041903740556e-06, "loss": 0.0945, "step": 462575 }, { "epoch": 4.55, "grad_norm": 4.04717493057251, "learning_rate": 2.7538800679198073e-06, "loss": 0.0833, "step": 462600 }, { "epoch": 4.55, "grad_norm": 11.849316596984863, "learning_rate": 2.753755945465559e-06, "loss": 0.0951, "step": 462625 }, { "epoch": 4.55, "grad_norm": 6.586501121520996, "learning_rate": 2.75363182301131e-06, "loss": 0.0798, "step": 462650 }, { "epoch": 4.55, "grad_norm": 18.455808639526367, "learning_rate": 2.753507700557062e-06, "loss": 0.1022, "step": 462675 }, { "epoch": 4.55, "grad_norm": 1.6370952129364014, "learning_rate": 2.7533835781028134e-06, "loss": 0.0715, "step": 462700 }, { "epoch": 4.55, "grad_norm": 19.9045352935791, "learning_rate": 2.7532594556485646e-06, "loss": 0.1221, "step": 462725 }, { "epoch": 4.55, "grad_norm": 5.046084880828857, "learning_rate": 2.7531353331943162e-06, "loss": 0.0917, "step": 462750 }, { "epoch": 4.55, "grad_norm": 6.5489630699157715, "learning_rate": 2.753011210740068e-06, "loss": 0.0892, "step": 462775 }, { "epoch": 4.55, "grad_norm": 3.9934580326080322, "learning_rate": 2.7528870882858195e-06, "loss": 0.0725, "step": 462800 }, { "epoch": 4.55, "grad_norm": 14.135913848876953, "learning_rate": 2.7527629658315707e-06, "loss": 0.106, "step": 462825 }, { "epoch": 4.55, "grad_norm": 5.0923075675964355, "learning_rate": 2.7526388433773228e-06, "loss": 0.0822, "step": 462850 }, { "epoch": 4.55, "grad_norm": 12.343866348266602, "learning_rate": 2.752514720923074e-06, "loss": 0.0915, "step": 462875 }, { "epoch": 4.55, "grad_norm": 0.48764410614967346, "learning_rate": 2.752390598468825e-06, "loss": 0.1075, "step": 462900 }, { "epoch": 4.55, "grad_norm": 13.37720012664795, "learning_rate": 2.7522664760145773e-06, "loss": 0.0886, "step": 462925 }, { "epoch": 4.55, "grad_norm": 2.0092883110046387, "learning_rate": 2.7521423535603285e-06, "loss": 0.0851, "step": 462950 }, { "epoch": 4.55, "grad_norm": 14.906240463256836, "learning_rate": 2.75201823110608e-06, "loss": 0.1088, "step": 462975 }, { "epoch": 4.55, "grad_norm": 2.6303980350494385, "learning_rate": 2.7518941086518317e-06, "loss": 0.077, "step": 463000 }, { "epoch": 4.55, "grad_norm": 16.037355422973633, "learning_rate": 2.7517699861975834e-06, "loss": 0.1169, "step": 463025 }, { "epoch": 4.55, "grad_norm": 1.4781373739242554, "learning_rate": 2.7516458637433346e-06, "loss": 0.0773, "step": 463050 }, { "epoch": 4.55, "grad_norm": 12.389397621154785, "learning_rate": 2.7515217412890867e-06, "loss": 0.0708, "step": 463075 }, { "epoch": 4.55, "grad_norm": 1.6470592021942139, "learning_rate": 2.751397618834838e-06, "loss": 0.0794, "step": 463100 }, { "epoch": 4.55, "grad_norm": 9.185089111328125, "learning_rate": 2.751273496380589e-06, "loss": 0.0885, "step": 463125 }, { "epoch": 4.55, "grad_norm": 10.559779167175293, "learning_rate": 2.751149373926341e-06, "loss": 0.1016, "step": 463150 }, { "epoch": 4.55, "grad_norm": 8.369686126708984, "learning_rate": 2.7510252514720923e-06, "loss": 0.0935, "step": 463175 }, { "epoch": 4.55, "grad_norm": 5.277775764465332, "learning_rate": 2.750901129017844e-06, "loss": 0.0915, "step": 463200 }, { "epoch": 4.55, "grad_norm": 5.606092929840088, "learning_rate": 2.7507770065635956e-06, "loss": 0.0903, "step": 463225 }, { "epoch": 4.55, "grad_norm": 8.759259223937988, "learning_rate": 2.7506528841093473e-06, "loss": 0.0885, "step": 463250 }, { "epoch": 4.55, "grad_norm": 20.234468460083008, "learning_rate": 2.7505287616550985e-06, "loss": 0.1208, "step": 463275 }, { "epoch": 4.56, "grad_norm": 2.495009422302246, "learning_rate": 2.7504046392008505e-06, "loss": 0.0729, "step": 463300 }, { "epoch": 4.56, "grad_norm": 14.391393661499023, "learning_rate": 2.7502805167466017e-06, "loss": 0.1489, "step": 463325 }, { "epoch": 4.56, "grad_norm": 0.308296263217926, "learning_rate": 2.750156394292353e-06, "loss": 0.0752, "step": 463350 }, { "epoch": 4.56, "grad_norm": 9.604133605957031, "learning_rate": 2.750032271838105e-06, "loss": 0.108, "step": 463375 }, { "epoch": 4.56, "grad_norm": 4.692534923553467, "learning_rate": 2.7499081493838562e-06, "loss": 0.0929, "step": 463400 }, { "epoch": 4.56, "grad_norm": 16.813552856445312, "learning_rate": 2.749784026929608e-06, "loss": 0.1532, "step": 463425 }, { "epoch": 4.56, "grad_norm": 3.1640236377716064, "learning_rate": 2.7496599044753595e-06, "loss": 0.0991, "step": 463450 }, { "epoch": 4.56, "grad_norm": 15.47938060760498, "learning_rate": 2.749535782021111e-06, "loss": 0.1075, "step": 463475 }, { "epoch": 4.56, "grad_norm": 5.80536413192749, "learning_rate": 2.7494116595668623e-06, "loss": 0.0851, "step": 463500 }, { "epoch": 4.56, "grad_norm": 14.665818214416504, "learning_rate": 2.7492875371126144e-06, "loss": 0.1225, "step": 463525 }, { "epoch": 4.56, "grad_norm": 15.440447807312012, "learning_rate": 2.7491634146583656e-06, "loss": 0.0681, "step": 463550 }, { "epoch": 4.56, "grad_norm": 21.333816528320312, "learning_rate": 2.7490392922041172e-06, "loss": 0.1006, "step": 463575 }, { "epoch": 4.56, "grad_norm": 2.838325023651123, "learning_rate": 2.7489151697498684e-06, "loss": 0.0867, "step": 463600 }, { "epoch": 4.56, "grad_norm": 15.098420143127441, "learning_rate": 2.7487910472956205e-06, "loss": 0.1011, "step": 463625 }, { "epoch": 4.56, "grad_norm": 3.2984743118286133, "learning_rate": 2.7486669248413717e-06, "loss": 0.0748, "step": 463650 }, { "epoch": 4.56, "grad_norm": 10.79017448425293, "learning_rate": 2.748542802387123e-06, "loss": 0.143, "step": 463675 }, { "epoch": 4.56, "grad_norm": 6.650773525238037, "learning_rate": 2.748418679932875e-06, "loss": 0.0777, "step": 463700 }, { "epoch": 4.56, "grad_norm": 13.106888771057129, "learning_rate": 2.748294557478626e-06, "loss": 0.0961, "step": 463725 }, { "epoch": 4.56, "grad_norm": 6.418043613433838, "learning_rate": 2.748170435024378e-06, "loss": 0.0699, "step": 463750 }, { "epoch": 4.56, "grad_norm": 3.8629276752471924, "learning_rate": 2.7480463125701295e-06, "loss": 0.1242, "step": 463775 }, { "epoch": 4.56, "grad_norm": 4.208632946014404, "learning_rate": 2.747922190115881e-06, "loss": 0.0923, "step": 463800 }, { "epoch": 4.56, "grad_norm": 13.632193565368652, "learning_rate": 2.7477980676616323e-06, "loss": 0.1157, "step": 463825 }, { "epoch": 4.56, "grad_norm": 8.148730278015137, "learning_rate": 2.7476739452073844e-06, "loss": 0.07, "step": 463850 }, { "epoch": 4.56, "grad_norm": 12.232738494873047, "learning_rate": 2.7475498227531356e-06, "loss": 0.1062, "step": 463875 }, { "epoch": 4.56, "grad_norm": 1.503305435180664, "learning_rate": 2.747425700298887e-06, "loss": 0.0837, "step": 463900 }, { "epoch": 4.56, "grad_norm": Infinity, "learning_rate": 2.7473065427428087e-06, "loss": 0.1273, "step": 463925 }, { "epoch": 4.56, "grad_norm": 2.238152027130127, "learning_rate": 2.74718242028856e-06, "loss": 0.0614, "step": 463950 }, { "epoch": 4.56, "grad_norm": 24.266061782836914, "learning_rate": 2.747058297834312e-06, "loss": 0.1146, "step": 463975 }, { "epoch": 4.56, "grad_norm": 5.0107293128967285, "learning_rate": 2.746934175380063e-06, "loss": 0.0786, "step": 464000 }, { "epoch": 4.56, "grad_norm": 13.931163787841797, "learning_rate": 2.7468100529258148e-06, "loss": 0.1021, "step": 464025 }, { "epoch": 4.56, "grad_norm": 8.693743705749512, "learning_rate": 2.7466859304715664e-06, "loss": 0.0591, "step": 464050 }, { "epoch": 4.56, "grad_norm": 29.137351989746094, "learning_rate": 2.746561808017318e-06, "loss": 0.1529, "step": 464075 }, { "epoch": 4.56, "grad_norm": 3.422452926635742, "learning_rate": 2.7464376855630693e-06, "loss": 0.0839, "step": 464100 }, { "epoch": 4.56, "grad_norm": 21.43878746032715, "learning_rate": 2.7463135631088213e-06, "loss": 0.1274, "step": 464125 }, { "epoch": 4.56, "grad_norm": 5.85890007019043, "learning_rate": 2.7461894406545725e-06, "loss": 0.1036, "step": 464150 }, { "epoch": 4.56, "grad_norm": 14.49704360961914, "learning_rate": 2.7460653182003238e-06, "loss": 0.0963, "step": 464175 }, { "epoch": 4.56, "grad_norm": 3.5233542919158936, "learning_rate": 2.7459411957460754e-06, "loss": 0.0918, "step": 464200 }, { "epoch": 4.56, "grad_norm": 25.734663009643555, "learning_rate": 2.745817073291827e-06, "loss": 0.1184, "step": 464225 }, { "epoch": 4.56, "grad_norm": 1.129319429397583, "learning_rate": 2.7456929508375787e-06, "loss": 0.086, "step": 464250 }, { "epoch": 4.56, "grad_norm": 14.348938941955566, "learning_rate": 2.74556882838333e-06, "loss": 0.1061, "step": 464275 }, { "epoch": 4.57, "grad_norm": 4.673885822296143, "learning_rate": 2.745444705929082e-06, "loss": 0.0909, "step": 464300 }, { "epoch": 4.57, "grad_norm": 14.941856384277344, "learning_rate": 2.745320583474833e-06, "loss": 0.1217, "step": 464325 }, { "epoch": 4.57, "grad_norm": 6.61181640625, "learning_rate": 2.7451964610205843e-06, "loss": 0.0584, "step": 464350 }, { "epoch": 4.57, "grad_norm": 12.435635566711426, "learning_rate": 2.7450723385663364e-06, "loss": 0.0931, "step": 464375 }, { "epoch": 4.57, "grad_norm": 4.555227756500244, "learning_rate": 2.7449482161120876e-06, "loss": 0.071, "step": 464400 }, { "epoch": 4.57, "grad_norm": 4.845681667327881, "learning_rate": 2.7448240936578393e-06, "loss": 0.115, "step": 464425 }, { "epoch": 4.57, "grad_norm": 7.189599990844727, "learning_rate": 2.744699971203591e-06, "loss": 0.0603, "step": 464450 }, { "epoch": 4.57, "grad_norm": 17.790315628051758, "learning_rate": 2.7445758487493425e-06, "loss": 0.1103, "step": 464475 }, { "epoch": 4.57, "grad_norm": 5.0609660148620605, "learning_rate": 2.7444517262950937e-06, "loss": 0.0712, "step": 464500 }, { "epoch": 4.57, "grad_norm": 18.030010223388672, "learning_rate": 2.744327603840846e-06, "loss": 0.0998, "step": 464525 }, { "epoch": 4.57, "grad_norm": 2.344686508178711, "learning_rate": 2.744203481386597e-06, "loss": 0.0714, "step": 464550 }, { "epoch": 4.57, "grad_norm": 27.320022583007812, "learning_rate": 2.7440793589323482e-06, "loss": 0.1015, "step": 464575 }, { "epoch": 4.57, "grad_norm": 5.179138660430908, "learning_rate": 2.7439552364781003e-06, "loss": 0.0661, "step": 464600 }, { "epoch": 4.57, "grad_norm": 7.710716724395752, "learning_rate": 2.7438311140238515e-06, "loss": 0.117, "step": 464625 }, { "epoch": 4.57, "grad_norm": 8.564285278320312, "learning_rate": 2.743706991569603e-06, "loss": 0.0929, "step": 464650 }, { "epoch": 4.57, "grad_norm": 7.323456764221191, "learning_rate": 2.7435828691153548e-06, "loss": 0.1178, "step": 464675 }, { "epoch": 4.57, "grad_norm": 8.586724281311035, "learning_rate": 2.7434587466611064e-06, "loss": 0.0767, "step": 464700 }, { "epoch": 4.57, "grad_norm": 11.361868858337402, "learning_rate": 2.7433346242068576e-06, "loss": 0.1075, "step": 464725 }, { "epoch": 4.57, "grad_norm": 2.5208795070648193, "learning_rate": 2.7432105017526097e-06, "loss": 0.0678, "step": 464750 }, { "epoch": 4.57, "grad_norm": 13.73064136505127, "learning_rate": 2.743086379298361e-06, "loss": 0.0985, "step": 464775 }, { "epoch": 4.57, "grad_norm": 4.780097961425781, "learning_rate": 2.742962256844112e-06, "loss": 0.0737, "step": 464800 }, { "epoch": 4.57, "grad_norm": 14.070172309875488, "learning_rate": 2.742838134389864e-06, "loss": 0.1105, "step": 464825 }, { "epoch": 4.57, "grad_norm": 3.9266064167022705, "learning_rate": 2.7427140119356154e-06, "loss": 0.0795, "step": 464850 }, { "epoch": 4.57, "grad_norm": 10.665141105651855, "learning_rate": 2.742589889481367e-06, "loss": 0.1136, "step": 464875 }, { "epoch": 4.57, "grad_norm": 0.15541809797286987, "learning_rate": 2.7424657670271186e-06, "loss": 0.0838, "step": 464900 }, { "epoch": 4.57, "grad_norm": 19.647871017456055, "learning_rate": 2.7423416445728703e-06, "loss": 0.0847, "step": 464925 }, { "epoch": 4.57, "grad_norm": 3.2597811222076416, "learning_rate": 2.7422175221186215e-06, "loss": 0.0706, "step": 464950 }, { "epoch": 4.57, "grad_norm": 17.311620712280273, "learning_rate": 2.7420933996643735e-06, "loss": 0.0865, "step": 464975 }, { "epoch": 4.57, "grad_norm": 3.360438823699951, "learning_rate": 2.7419692772101247e-06, "loss": 0.0558, "step": 465000 }, { "epoch": 4.57, "grad_norm": 18.5086612701416, "learning_rate": 2.741845154755876e-06, "loss": 0.0995, "step": 465025 }, { "epoch": 4.57, "grad_norm": 3.9468300342559814, "learning_rate": 2.7417210323016276e-06, "loss": 0.0809, "step": 465050 }, { "epoch": 4.57, "grad_norm": 13.540196418762207, "learning_rate": 2.7415969098473792e-06, "loss": 0.1112, "step": 465075 }, { "epoch": 4.57, "grad_norm": 5.348538875579834, "learning_rate": 2.741472787393131e-06, "loss": 0.086, "step": 465100 }, { "epoch": 4.57, "grad_norm": 17.68433380126953, "learning_rate": 2.741348664938882e-06, "loss": 0.1114, "step": 465125 }, { "epoch": 4.57, "grad_norm": 0.9153820872306824, "learning_rate": 2.741224542484634e-06, "loss": 0.0807, "step": 465150 }, { "epoch": 4.57, "grad_norm": 12.371323585510254, "learning_rate": 2.7411004200303853e-06, "loss": 0.0997, "step": 465175 }, { "epoch": 4.57, "grad_norm": 0.07085052132606506, "learning_rate": 2.7409762975761365e-06, "loss": 0.0678, "step": 465200 }, { "epoch": 4.57, "grad_norm": 12.214289665222168, "learning_rate": 2.7408521751218886e-06, "loss": 0.1463, "step": 465225 }, { "epoch": 4.57, "grad_norm": 0.1647195667028427, "learning_rate": 2.74072805266764e-06, "loss": 0.07, "step": 465250 }, { "epoch": 4.57, "grad_norm": 11.836338996887207, "learning_rate": 2.7406039302133915e-06, "loss": 0.1297, "step": 465275 }, { "epoch": 4.57, "grad_norm": 4.886861324310303, "learning_rate": 2.740479807759143e-06, "loss": 0.0644, "step": 465300 }, { "epoch": 4.58, "grad_norm": 3.645303726196289, "learning_rate": 2.7403556853048947e-06, "loss": 0.0901, "step": 465325 }, { "epoch": 4.58, "grad_norm": 2.4007208347320557, "learning_rate": 2.740231562850646e-06, "loss": 0.0687, "step": 465350 }, { "epoch": 4.58, "grad_norm": 15.4550199508667, "learning_rate": 2.740107440396398e-06, "loss": 0.1015, "step": 465375 }, { "epoch": 4.58, "grad_norm": 4.145894527435303, "learning_rate": 2.739983317942149e-06, "loss": 0.083, "step": 465400 }, { "epoch": 4.58, "grad_norm": 13.076316833496094, "learning_rate": 2.7398591954879004e-06, "loss": 0.1047, "step": 465425 }, { "epoch": 4.58, "grad_norm": 7.422539234161377, "learning_rate": 2.7397350730336525e-06, "loss": 0.0511, "step": 465450 }, { "epoch": 4.58, "grad_norm": 10.693062782287598, "learning_rate": 2.7396109505794037e-06, "loss": 0.1025, "step": 465475 }, { "epoch": 4.58, "grad_norm": 2.6152634620666504, "learning_rate": 2.7394868281251553e-06, "loss": 0.0787, "step": 465500 }, { "epoch": 4.58, "grad_norm": 6.7353901863098145, "learning_rate": 2.739362705670907e-06, "loss": 0.0798, "step": 465525 }, { "epoch": 4.58, "grad_norm": 4.824156761169434, "learning_rate": 2.7392385832166586e-06, "loss": 0.066, "step": 465550 }, { "epoch": 4.58, "grad_norm": 15.326886177062988, "learning_rate": 2.73911446076241e-06, "loss": 0.0984, "step": 465575 }, { "epoch": 4.58, "grad_norm": 3.910702705383301, "learning_rate": 2.738990338308162e-06, "loss": 0.0681, "step": 465600 }, { "epoch": 4.58, "grad_norm": 13.388277053833008, "learning_rate": 2.738866215853913e-06, "loss": 0.1133, "step": 465625 }, { "epoch": 4.58, "grad_norm": 1.084241271018982, "learning_rate": 2.7387420933996643e-06, "loss": 0.0839, "step": 465650 }, { "epoch": 4.58, "grad_norm": 10.97835636138916, "learning_rate": 2.7386179709454163e-06, "loss": 0.1227, "step": 465675 }, { "epoch": 4.58, "grad_norm": 1.4492472410202026, "learning_rate": 2.7384938484911676e-06, "loss": 0.0716, "step": 465700 }, { "epoch": 4.58, "grad_norm": 1.525909185409546, "learning_rate": 2.738369726036919e-06, "loss": 0.0976, "step": 465725 }, { "epoch": 4.58, "grad_norm": 1.0256131887435913, "learning_rate": 2.738245603582671e-06, "loss": 0.0764, "step": 465750 }, { "epoch": 4.58, "grad_norm": 17.29257583618164, "learning_rate": 2.7381214811284225e-06, "loss": 0.1027, "step": 465775 }, { "epoch": 4.58, "grad_norm": 6.730659484863281, "learning_rate": 2.7379973586741737e-06, "loss": 0.0838, "step": 465800 }, { "epoch": 4.58, "grad_norm": 8.508415222167969, "learning_rate": 2.7378732362199257e-06, "loss": 0.0916, "step": 465825 }, { "epoch": 4.58, "grad_norm": 2.444648265838623, "learning_rate": 2.737749113765677e-06, "loss": 0.07, "step": 465850 }, { "epoch": 4.58, "grad_norm": 18.81658935546875, "learning_rate": 2.737624991311428e-06, "loss": 0.1268, "step": 465875 }, { "epoch": 4.58, "grad_norm": 3.108544111251831, "learning_rate": 2.7375008688571798e-06, "loss": 0.077, "step": 465900 }, { "epoch": 4.58, "grad_norm": 8.503331184387207, "learning_rate": 2.7373767464029314e-06, "loss": 0.1111, "step": 465925 }, { "epoch": 4.58, "grad_norm": 0.20602744817733765, "learning_rate": 2.737252623948683e-06, "loss": 0.0803, "step": 465950 }, { "epoch": 4.58, "grad_norm": Infinity, "learning_rate": 2.7371334663926045e-06, "loss": 0.1519, "step": 465975 }, { "epoch": 4.58, "grad_norm": 9.136998176574707, "learning_rate": 2.737009343938356e-06, "loss": 0.0779, "step": 466000 }, { "epoch": 4.58, "grad_norm": 6.837961673736572, "learning_rate": 2.7368852214841074e-06, "loss": 0.1157, "step": 466025 }, { "epoch": 4.58, "grad_norm": 3.9304397106170654, "learning_rate": 2.7367610990298594e-06, "loss": 0.0709, "step": 466050 }, { "epoch": 4.58, "grad_norm": 11.992227554321289, "learning_rate": 2.7366369765756106e-06, "loss": 0.0926, "step": 466075 }, { "epoch": 4.58, "grad_norm": 2.7910220623016357, "learning_rate": 2.736512854121362e-06, "loss": 0.0793, "step": 466100 }, { "epoch": 4.58, "grad_norm": 12.965445518493652, "learning_rate": 2.736388731667114e-06, "loss": 0.0577, "step": 466125 }, { "epoch": 4.58, "grad_norm": 7.034872055053711, "learning_rate": 2.736264609212865e-06, "loss": 0.0769, "step": 466150 }, { "epoch": 4.58, "grad_norm": 22.76080894470215, "learning_rate": 2.7361404867586167e-06, "loss": 0.109, "step": 466175 }, { "epoch": 4.58, "grad_norm": 16.571428298950195, "learning_rate": 2.7360163643043684e-06, "loss": 0.0758, "step": 466200 }, { "epoch": 4.58, "grad_norm": 12.697509765625, "learning_rate": 2.73589224185012e-06, "loss": 0.0719, "step": 466225 }, { "epoch": 4.58, "grad_norm": 6.056718349456787, "learning_rate": 2.7357681193958712e-06, "loss": 0.0788, "step": 466250 }, { "epoch": 4.58, "grad_norm": 8.802301406860352, "learning_rate": 2.7356439969416233e-06, "loss": 0.0872, "step": 466275 }, { "epoch": 4.58, "grad_norm": 1.7614283561706543, "learning_rate": 2.7355198744873745e-06, "loss": 0.0409, "step": 466300 }, { "epoch": 4.58, "grad_norm": 10.778374671936035, "learning_rate": 2.7353957520331257e-06, "loss": 0.1129, "step": 466325 }, { "epoch": 4.59, "grad_norm": 2.0865397453308105, "learning_rate": 2.7352716295788778e-06, "loss": 0.0624, "step": 466350 }, { "epoch": 4.59, "grad_norm": 19.191930770874023, "learning_rate": 2.735147507124629e-06, "loss": 0.1026, "step": 466375 }, { "epoch": 4.59, "grad_norm": 8.273846626281738, "learning_rate": 2.7350233846703806e-06, "loss": 0.0881, "step": 466400 }, { "epoch": 4.59, "grad_norm": 24.255319595336914, "learning_rate": 2.7348992622161322e-06, "loss": 0.118, "step": 466425 }, { "epoch": 4.59, "grad_norm": 0.6780545115470886, "learning_rate": 2.734775139761884e-06, "loss": 0.0638, "step": 466450 }, { "epoch": 4.59, "grad_norm": 8.886838912963867, "learning_rate": 2.734651017307635e-06, "loss": 0.0669, "step": 466475 }, { "epoch": 4.59, "grad_norm": 1.4627209901809692, "learning_rate": 2.7345268948533867e-06, "loss": 0.0654, "step": 466500 }, { "epoch": 4.59, "grad_norm": 15.759815216064453, "learning_rate": 2.7344027723991384e-06, "loss": 0.1208, "step": 466525 }, { "epoch": 4.59, "grad_norm": 8.071612358093262, "learning_rate": 2.73427864994489e-06, "loss": 0.0761, "step": 466550 }, { "epoch": 4.59, "grad_norm": 11.671599388122559, "learning_rate": 2.734154527490641e-06, "loss": 0.1213, "step": 466575 }, { "epoch": 4.59, "grad_norm": 2.7280616760253906, "learning_rate": 2.7340304050363933e-06, "loss": 0.069, "step": 466600 }, { "epoch": 4.59, "grad_norm": 17.4981689453125, "learning_rate": 2.7339062825821445e-06, "loss": 0.1172, "step": 466625 }, { "epoch": 4.59, "grad_norm": 0.03271341696381569, "learning_rate": 2.7337821601278957e-06, "loss": 0.0757, "step": 466650 }, { "epoch": 4.59, "grad_norm": 8.439419746398926, "learning_rate": 2.7336580376736477e-06, "loss": 0.1295, "step": 466675 }, { "epoch": 4.59, "grad_norm": 2.3867764472961426, "learning_rate": 2.733533915219399e-06, "loss": 0.0708, "step": 466700 }, { "epoch": 4.59, "grad_norm": 8.726009368896484, "learning_rate": 2.7334097927651506e-06, "loss": 0.0723, "step": 466725 }, { "epoch": 4.59, "grad_norm": 9.044602394104004, "learning_rate": 2.7332856703109022e-06, "loss": 0.0951, "step": 466750 }, { "epoch": 4.59, "grad_norm": 17.034170150756836, "learning_rate": 2.733161547856654e-06, "loss": 0.1351, "step": 466775 }, { "epoch": 4.59, "grad_norm": 0.3240407407283783, "learning_rate": 2.733037425402405e-06, "loss": 0.0731, "step": 466800 }, { "epoch": 4.59, "grad_norm": 18.43966293334961, "learning_rate": 2.732913302948157e-06, "loss": 0.1037, "step": 466825 }, { "epoch": 4.59, "grad_norm": 4.942458629608154, "learning_rate": 2.7327891804939083e-06, "loss": 0.0659, "step": 466850 }, { "epoch": 4.59, "grad_norm": 13.53526782989502, "learning_rate": 2.7326650580396596e-06, "loss": 0.1424, "step": 466875 }, { "epoch": 4.59, "grad_norm": 2.3170316219329834, "learning_rate": 2.7325409355854116e-06, "loss": 0.0612, "step": 466900 }, { "epoch": 4.59, "grad_norm": 5.962653636932373, "learning_rate": 2.732416813131163e-06, "loss": 0.1083, "step": 466925 }, { "epoch": 4.59, "grad_norm": 6.739812850952148, "learning_rate": 2.7322926906769145e-06, "loss": 0.0824, "step": 466950 }, { "epoch": 4.59, "grad_norm": 10.88562297821045, "learning_rate": 2.732168568222666e-06, "loss": 0.0974, "step": 466975 }, { "epoch": 4.59, "grad_norm": 6.971482276916504, "learning_rate": 2.7320444457684177e-06, "loss": 0.101, "step": 467000 }, { "epoch": 4.59, "grad_norm": 19.085153579711914, "learning_rate": 2.731920323314169e-06, "loss": 0.1173, "step": 467025 }, { "epoch": 4.59, "grad_norm": 3.8278956413269043, "learning_rate": 2.731796200859921e-06, "loss": 0.0578, "step": 467050 }, { "epoch": 4.59, "grad_norm": 15.063602447509766, "learning_rate": 2.7316720784056722e-06, "loss": 0.108, "step": 467075 }, { "epoch": 4.59, "grad_norm": 5.473911285400391, "learning_rate": 2.7315479559514234e-06, "loss": 0.0601, "step": 467100 }, { "epoch": 4.59, "grad_norm": 12.472411155700684, "learning_rate": 2.7314238334971755e-06, "loss": 0.1318, "step": 467125 }, { "epoch": 4.59, "grad_norm": 2.77439284324646, "learning_rate": 2.7312997110429267e-06, "loss": 0.0768, "step": 467150 }, { "epoch": 4.59, "grad_norm": 3.8630502223968506, "learning_rate": 2.7311755885886783e-06, "loss": 0.091, "step": 467175 }, { "epoch": 4.59, "grad_norm": 8.699589729309082, "learning_rate": 2.73105146613443e-06, "loss": 0.0817, "step": 467200 }, { "epoch": 4.59, "grad_norm": 18.925626754760742, "learning_rate": 2.7309273436801816e-06, "loss": 0.1335, "step": 467225 }, { "epoch": 4.59, "grad_norm": 7.693785667419434, "learning_rate": 2.730803221225933e-06, "loss": 0.0693, "step": 467250 }, { "epoch": 4.59, "grad_norm": 12.161365509033203, "learning_rate": 2.730679098771685e-06, "loss": 0.0948, "step": 467275 }, { "epoch": 4.59, "grad_norm": 0.35322222113609314, "learning_rate": 2.730554976317436e-06, "loss": 0.0604, "step": 467300 }, { "epoch": 4.59, "grad_norm": 18.413867950439453, "learning_rate": 2.7304308538631873e-06, "loss": 0.0952, "step": 467325 }, { "epoch": 4.6, "grad_norm": 7.762350559234619, "learning_rate": 2.730306731408939e-06, "loss": 0.1037, "step": 467350 }, { "epoch": 4.6, "grad_norm": 11.153928756713867, "learning_rate": 2.7301826089546906e-06, "loss": 0.1351, "step": 467375 }, { "epoch": 4.6, "grad_norm": 7.416159629821777, "learning_rate": 2.730058486500442e-06, "loss": 0.0689, "step": 467400 }, { "epoch": 4.6, "grad_norm": 8.30770206451416, "learning_rate": 2.7299343640461934e-06, "loss": 0.0993, "step": 467425 }, { "epoch": 4.6, "grad_norm": 2.835378408432007, "learning_rate": 2.7298102415919455e-06, "loss": 0.0751, "step": 467450 }, { "epoch": 4.6, "grad_norm": 4.434683799743652, "learning_rate": 2.7296861191376967e-06, "loss": 0.1096, "step": 467475 }, { "epoch": 4.6, "grad_norm": 8.634450912475586, "learning_rate": 2.729561996683448e-06, "loss": 0.0903, "step": 467500 }, { "epoch": 4.6, "grad_norm": 7.315591335296631, "learning_rate": 2.7294378742292e-06, "loss": 0.0953, "step": 467525 }, { "epoch": 4.6, "grad_norm": 1.0139131546020508, "learning_rate": 2.729313751774951e-06, "loss": 0.0531, "step": 467550 }, { "epoch": 4.6, "grad_norm": 14.723188400268555, "learning_rate": 2.729189629320703e-06, "loss": 0.1301, "step": 467575 }, { "epoch": 4.6, "grad_norm": 3.128004789352417, "learning_rate": 2.7290655068664544e-06, "loss": 0.0987, "step": 467600 }, { "epoch": 4.6, "grad_norm": 20.833105087280273, "learning_rate": 2.728941384412206e-06, "loss": 0.0987, "step": 467625 }, { "epoch": 4.6, "grad_norm": 3.760388135910034, "learning_rate": 2.7288172619579573e-06, "loss": 0.0867, "step": 467650 }, { "epoch": 4.6, "grad_norm": 14.826333045959473, "learning_rate": 2.7286931395037093e-06, "loss": 0.1151, "step": 467675 }, { "epoch": 4.6, "grad_norm": 8.821413040161133, "learning_rate": 2.7285690170494605e-06, "loss": 0.0914, "step": 467700 }, { "epoch": 4.6, "grad_norm": 4.754840850830078, "learning_rate": 2.7284448945952118e-06, "loss": 0.1262, "step": 467725 }, { "epoch": 4.6, "grad_norm": 13.92667007446289, "learning_rate": 2.728320772140964e-06, "loss": 0.0798, "step": 467750 }, { "epoch": 4.6, "grad_norm": 9.4573335647583, "learning_rate": 2.728196649686715e-06, "loss": 0.0895, "step": 467775 }, { "epoch": 4.6, "grad_norm": 2.1632354259490967, "learning_rate": 2.7280725272324667e-06, "loss": 0.0702, "step": 467800 }, { "epoch": 4.6, "grad_norm": 11.869490623474121, "learning_rate": 2.7279484047782183e-06, "loss": 0.1115, "step": 467825 }, { "epoch": 4.6, "grad_norm": 3.117668628692627, "learning_rate": 2.72782428232397e-06, "loss": 0.063, "step": 467850 }, { "epoch": 4.6, "grad_norm": 14.167298316955566, "learning_rate": 2.727700159869721e-06, "loss": 0.1055, "step": 467875 }, { "epoch": 4.6, "grad_norm": 0.17131616175174713, "learning_rate": 2.727576037415473e-06, "loss": 0.0786, "step": 467900 }, { "epoch": 4.6, "grad_norm": 17.530410766601562, "learning_rate": 2.7274519149612244e-06, "loss": 0.1179, "step": 467925 }, { "epoch": 4.6, "grad_norm": 7.932085037231445, "learning_rate": 2.7273277925069756e-06, "loss": 0.0832, "step": 467950 }, { "epoch": 4.6, "grad_norm": 10.877840995788574, "learning_rate": 2.7272036700527277e-06, "loss": 0.1253, "step": 467975 }, { "epoch": 4.6, "grad_norm": 6.4626617431640625, "learning_rate": 2.727079547598479e-06, "loss": 0.082, "step": 468000 }, { "epoch": 4.6, "grad_norm": 10.834694862365723, "learning_rate": 2.7269554251442305e-06, "loss": 0.1169, "step": 468025 }, { "epoch": 4.6, "grad_norm": 4.334577560424805, "learning_rate": 2.726831302689982e-06, "loss": 0.0667, "step": 468050 }, { "epoch": 4.6, "grad_norm": 9.540298461914062, "learning_rate": 2.726707180235734e-06, "loss": 0.1252, "step": 468075 }, { "epoch": 4.6, "grad_norm": 0.7238654494285583, "learning_rate": 2.726583057781485e-06, "loss": 0.0733, "step": 468100 }, { "epoch": 4.6, "grad_norm": 21.40056037902832, "learning_rate": 2.726458935327237e-06, "loss": 0.151, "step": 468125 }, { "epoch": 4.6, "grad_norm": 2.2911934852600098, "learning_rate": 2.7263348128729883e-06, "loss": 0.0798, "step": 468150 }, { "epoch": 4.6, "grad_norm": 12.977880477905273, "learning_rate": 2.7262106904187395e-06, "loss": 0.1073, "step": 468175 }, { "epoch": 4.6, "grad_norm": 4.502574443817139, "learning_rate": 2.726086567964491e-06, "loss": 0.0784, "step": 468200 }, { "epoch": 4.6, "grad_norm": 7.3938703536987305, "learning_rate": 2.7259624455102428e-06, "loss": 0.0985, "step": 468225 }, { "epoch": 4.6, "grad_norm": 5.257485866546631, "learning_rate": 2.7258383230559944e-06, "loss": 0.0683, "step": 468250 }, { "epoch": 4.6, "grad_norm": 19.394683837890625, "learning_rate": 2.7257142006017456e-06, "loss": 0.1242, "step": 468275 }, { "epoch": 4.6, "grad_norm": 6.608921527862549, "learning_rate": 2.7255900781474977e-06, "loss": 0.062, "step": 468300 }, { "epoch": 4.6, "grad_norm": 2.418166399002075, "learning_rate": 2.725465955693249e-06, "loss": 0.1107, "step": 468325 }, { "epoch": 4.6, "grad_norm": 11.017848014831543, "learning_rate": 2.725341833239e-06, "loss": 0.099, "step": 468350 }, { "epoch": 4.61, "grad_norm": 14.573016166687012, "learning_rate": 2.725217710784752e-06, "loss": 0.0963, "step": 468375 }, { "epoch": 4.61, "grad_norm": 1.0534604787826538, "learning_rate": 2.7250935883305034e-06, "loss": 0.0688, "step": 468400 }, { "epoch": 4.61, "grad_norm": 18.942415237426758, "learning_rate": 2.724969465876255e-06, "loss": 0.0929, "step": 468425 }, { "epoch": 4.61, "grad_norm": 4.919247627258301, "learning_rate": 2.7248453434220066e-06, "loss": 0.0871, "step": 468450 }, { "epoch": 4.61, "grad_norm": 17.042043685913086, "learning_rate": 2.7247212209677583e-06, "loss": 0.0934, "step": 468475 }, { "epoch": 4.61, "grad_norm": 6.142283916473389, "learning_rate": 2.7245970985135095e-06, "loss": 0.0504, "step": 468500 }, { "epoch": 4.61, "grad_norm": 12.172348976135254, "learning_rate": 2.7244729760592615e-06, "loss": 0.1332, "step": 468525 }, { "epoch": 4.61, "grad_norm": 2.729287624359131, "learning_rate": 2.7243488536050127e-06, "loss": 0.0769, "step": 468550 }, { "epoch": 4.61, "grad_norm": 12.116272926330566, "learning_rate": 2.724224731150764e-06, "loss": 0.1468, "step": 468575 }, { "epoch": 4.61, "grad_norm": 5.015953063964844, "learning_rate": 2.724100608696516e-06, "loss": 0.0524, "step": 468600 }, { "epoch": 4.61, "grad_norm": 15.603918075561523, "learning_rate": 2.7239764862422672e-06, "loss": 0.0961, "step": 468625 }, { "epoch": 4.61, "grad_norm": 13.820024490356445, "learning_rate": 2.723857328686189e-06, "loss": 0.0851, "step": 468650 }, { "epoch": 4.61, "grad_norm": 12.591431617736816, "learning_rate": 2.7237332062319403e-06, "loss": 0.1049, "step": 468675 }, { "epoch": 4.61, "grad_norm": 4.297721862792969, "learning_rate": 2.723609083777692e-06, "loss": 0.0748, "step": 468700 }, { "epoch": 4.61, "grad_norm": 14.133940696716309, "learning_rate": 2.7234849613234436e-06, "loss": 0.1257, "step": 468725 }, { "epoch": 4.61, "grad_norm": 3.983046531677246, "learning_rate": 2.7233608388691952e-06, "loss": 0.0872, "step": 468750 }, { "epoch": 4.61, "grad_norm": 10.380242347717285, "learning_rate": 2.7232367164149464e-06, "loss": 0.1247, "step": 468775 }, { "epoch": 4.61, "grad_norm": 4.531821250915527, "learning_rate": 2.7231125939606976e-06, "loss": 0.0817, "step": 468800 }, { "epoch": 4.61, "grad_norm": 8.080302238464355, "learning_rate": 2.7229884715064497e-06, "loss": 0.119, "step": 468825 }, { "epoch": 4.61, "grad_norm": 3.3468852043151855, "learning_rate": 2.722864349052201e-06, "loss": 0.0675, "step": 468850 }, { "epoch": 4.61, "grad_norm": 11.673286437988281, "learning_rate": 2.7227402265979525e-06, "loss": 0.1322, "step": 468875 }, { "epoch": 4.61, "grad_norm": 0.41690129041671753, "learning_rate": 2.722616104143704e-06, "loss": 0.0955, "step": 468900 }, { "epoch": 4.61, "grad_norm": 11.567327499389648, "learning_rate": 2.722491981689456e-06, "loss": 0.1194, "step": 468925 }, { "epoch": 4.61, "grad_norm": 3.4807958602905273, "learning_rate": 2.722367859235207e-06, "loss": 0.0759, "step": 468950 }, { "epoch": 4.61, "grad_norm": 8.177902221679688, "learning_rate": 2.722243736780959e-06, "loss": 0.1302, "step": 468975 }, { "epoch": 4.61, "grad_norm": 4.940750598907471, "learning_rate": 2.7221196143267103e-06, "loss": 0.0616, "step": 469000 }, { "epoch": 4.61, "grad_norm": 7.630270004272461, "learning_rate": 2.7219954918724615e-06, "loss": 0.1046, "step": 469025 }, { "epoch": 4.61, "grad_norm": 1.0384002923965454, "learning_rate": 2.7218713694182136e-06, "loss": 0.0821, "step": 469050 }, { "epoch": 4.61, "grad_norm": 12.471957206726074, "learning_rate": 2.7217472469639648e-06, "loss": 0.1071, "step": 469075 }, { "epoch": 4.61, "grad_norm": 1.046496033668518, "learning_rate": 2.7216231245097164e-06, "loss": 0.06, "step": 469100 }, { "epoch": 4.61, "grad_norm": 11.028463363647461, "learning_rate": 2.721499002055468e-06, "loss": 0.1157, "step": 469125 }, { "epoch": 4.61, "grad_norm": 5.182130813598633, "learning_rate": 2.7213748796012197e-06, "loss": 0.071, "step": 469150 }, { "epoch": 4.61, "grad_norm": 9.372665405273438, "learning_rate": 2.721250757146971e-06, "loss": 0.1029, "step": 469175 }, { "epoch": 4.61, "grad_norm": 0.48526105284690857, "learning_rate": 2.721126634692723e-06, "loss": 0.0617, "step": 469200 }, { "epoch": 4.61, "grad_norm": 15.964097023010254, "learning_rate": 2.721002512238474e-06, "loss": 0.1417, "step": 469225 }, { "epoch": 4.61, "grad_norm": 3.2810001373291016, "learning_rate": 2.7208783897842254e-06, "loss": 0.1108, "step": 469250 }, { "epoch": 4.61, "grad_norm": 12.366263389587402, "learning_rate": 2.7207542673299774e-06, "loss": 0.1079, "step": 469275 }, { "epoch": 4.61, "grad_norm": 5.524497985839844, "learning_rate": 2.7206301448757286e-06, "loss": 0.0657, "step": 469300 }, { "epoch": 4.61, "grad_norm": 16.680082321166992, "learning_rate": 2.7205060224214803e-06, "loss": 0.1201, "step": 469325 }, { "epoch": 4.61, "grad_norm": 3.153637170791626, "learning_rate": 2.720381899967232e-06, "loss": 0.0652, "step": 469350 }, { "epoch": 4.61, "grad_norm": 7.528114318847656, "learning_rate": 2.7202577775129836e-06, "loss": 0.0838, "step": 469375 }, { "epoch": 4.62, "grad_norm": 0.9297996759414673, "learning_rate": 2.7201336550587348e-06, "loss": 0.0847, "step": 469400 }, { "epoch": 4.62, "grad_norm": 16.125207901000977, "learning_rate": 2.720009532604487e-06, "loss": 0.1164, "step": 469425 }, { "epoch": 4.62, "grad_norm": 1.189900517463684, "learning_rate": 2.719885410150238e-06, "loss": 0.0678, "step": 469450 }, { "epoch": 4.62, "grad_norm": 21.68581199645996, "learning_rate": 2.7197612876959897e-06, "loss": 0.1179, "step": 469475 }, { "epoch": 4.62, "grad_norm": 6.628854274749756, "learning_rate": 2.7196371652417413e-06, "loss": 0.1082, "step": 469500 }, { "epoch": 4.62, "grad_norm": 17.12154769897461, "learning_rate": 2.719513042787493e-06, "loss": 0.1342, "step": 469525 }, { "epoch": 4.62, "grad_norm": 5.800682067871094, "learning_rate": 2.719388920333244e-06, "loss": 0.0688, "step": 469550 }, { "epoch": 4.62, "grad_norm": 13.3785400390625, "learning_rate": 2.719264797878996e-06, "loss": 0.1055, "step": 469575 }, { "epoch": 4.62, "grad_norm": 3.7466907501220703, "learning_rate": 2.7191406754247474e-06, "loss": 0.0868, "step": 469600 }, { "epoch": 4.62, "grad_norm": 15.874985694885254, "learning_rate": 2.7190165529704986e-06, "loss": 0.1368, "step": 469625 }, { "epoch": 4.62, "grad_norm": 3.8033318519592285, "learning_rate": 2.7188924305162503e-06, "loss": 0.0605, "step": 469650 }, { "epoch": 4.62, "grad_norm": 9.042036056518555, "learning_rate": 2.718768308062002e-06, "loss": 0.1264, "step": 469675 }, { "epoch": 4.62, "grad_norm": 1.006729006767273, "learning_rate": 2.7186441856077535e-06, "loss": 0.0784, "step": 469700 }, { "epoch": 4.62, "grad_norm": 15.999824523925781, "learning_rate": 2.7185200631535047e-06, "loss": 0.0913, "step": 469725 }, { "epoch": 4.62, "grad_norm": 1.7523099184036255, "learning_rate": 2.718395940699257e-06, "loss": 0.0771, "step": 469750 }, { "epoch": 4.62, "grad_norm": 17.270475387573242, "learning_rate": 2.718271818245008e-06, "loss": 0.1181, "step": 469775 }, { "epoch": 4.62, "grad_norm": 2.2108426094055176, "learning_rate": 2.7181476957907592e-06, "loss": 0.0489, "step": 469800 }, { "epoch": 4.62, "grad_norm": 0.7683190703392029, "learning_rate": 2.7180235733365113e-06, "loss": 0.0831, "step": 469825 }, { "epoch": 4.62, "grad_norm": 9.992538452148438, "learning_rate": 2.7178994508822625e-06, "loss": 0.0673, "step": 469850 }, { "epoch": 4.62, "grad_norm": 11.072990417480469, "learning_rate": 2.717775328428014e-06, "loss": 0.0847, "step": 469875 }, { "epoch": 4.62, "grad_norm": 17.831647872924805, "learning_rate": 2.7176512059737658e-06, "loss": 0.0747, "step": 469900 }, { "epoch": 4.62, "grad_norm": 11.066442489624023, "learning_rate": 2.7175270835195174e-06, "loss": 0.1155, "step": 469925 }, { "epoch": 4.62, "grad_norm": 0.25127360224723816, "learning_rate": 2.7174029610652686e-06, "loss": 0.0757, "step": 469950 }, { "epoch": 4.62, "grad_norm": 16.841951370239258, "learning_rate": 2.7172788386110207e-06, "loss": 0.0827, "step": 469975 }, { "epoch": 4.62, "grad_norm": 6.071018695831299, "learning_rate": 2.717154716156772e-06, "loss": 0.0759, "step": 470000 }, { "epoch": 4.62, "grad_norm": 11.506573677062988, "learning_rate": 2.717030593702523e-06, "loss": 0.1609, "step": 470025 }, { "epoch": 4.62, "grad_norm": 6.0772385597229, "learning_rate": 2.716906471248275e-06, "loss": 0.0698, "step": 470050 }, { "epoch": 4.62, "grad_norm": 15.907730102539062, "learning_rate": 2.7167823487940264e-06, "loss": 0.0859, "step": 470075 }, { "epoch": 4.62, "grad_norm": 5.652229309082031, "learning_rate": 2.716658226339778e-06, "loss": 0.1081, "step": 470100 }, { "epoch": 4.62, "grad_norm": 22.944334030151367, "learning_rate": 2.7165341038855296e-06, "loss": 0.0984, "step": 470125 }, { "epoch": 4.62, "grad_norm": 0.7367345094680786, "learning_rate": 2.7164099814312813e-06, "loss": 0.0646, "step": 470150 }, { "epoch": 4.62, "grad_norm": 31.113067626953125, "learning_rate": 2.7162858589770325e-06, "loss": 0.0994, "step": 470175 }, { "epoch": 4.62, "grad_norm": 4.980460166931152, "learning_rate": 2.7161617365227845e-06, "loss": 0.0839, "step": 470200 }, { "epoch": 4.62, "grad_norm": 23.68787384033203, "learning_rate": 2.7160376140685358e-06, "loss": 0.0978, "step": 470225 }, { "epoch": 4.62, "grad_norm": 7.159174919128418, "learning_rate": 2.715913491614287e-06, "loss": 0.0791, "step": 470250 }, { "epoch": 4.62, "grad_norm": 9.44359016418457, "learning_rate": 2.715789369160039e-06, "loss": 0.1134, "step": 470275 }, { "epoch": 4.62, "grad_norm": 3.699373960494995, "learning_rate": 2.7156652467057902e-06, "loss": 0.0583, "step": 470300 }, { "epoch": 4.62, "grad_norm": 16.274511337280273, "learning_rate": 2.715541124251542e-06, "loss": 0.1489, "step": 470325 }, { "epoch": 4.62, "grad_norm": 1.6406244039535522, "learning_rate": 2.7154170017972935e-06, "loss": 0.07, "step": 470350 }, { "epoch": 4.62, "grad_norm": 16.82121467590332, "learning_rate": 2.715292879343045e-06, "loss": 0.1149, "step": 470375 }, { "epoch": 4.63, "grad_norm": 0.4309821128845215, "learning_rate": 2.7151687568887964e-06, "loss": 0.0921, "step": 470400 }, { "epoch": 4.63, "grad_norm": 10.600936889648438, "learning_rate": 2.7150446344345484e-06, "loss": 0.1339, "step": 470425 }, { "epoch": 4.63, "grad_norm": 6.355232238769531, "learning_rate": 2.7149205119802996e-06, "loss": 0.059, "step": 470450 }, { "epoch": 4.63, "grad_norm": 14.690594673156738, "learning_rate": 2.714796389526051e-06, "loss": 0.1187, "step": 470475 }, { "epoch": 4.63, "grad_norm": 7.416792869567871, "learning_rate": 2.7146722670718025e-06, "loss": 0.0765, "step": 470500 }, { "epoch": 4.63, "grad_norm": 7.484177112579346, "learning_rate": 2.714548144617554e-06, "loss": 0.1471, "step": 470525 }, { "epoch": 4.63, "grad_norm": 0.11904775351285934, "learning_rate": 2.7144240221633057e-06, "loss": 0.0897, "step": 470550 }, { "epoch": 4.63, "grad_norm": 19.243064880371094, "learning_rate": 2.714299899709057e-06, "loss": 0.1008, "step": 470575 }, { "epoch": 4.63, "grad_norm": 7.832026481628418, "learning_rate": 2.714175777254809e-06, "loss": 0.0673, "step": 470600 }, { "epoch": 4.63, "grad_norm": 10.090559005737305, "learning_rate": 2.7140516548005602e-06, "loss": 0.0895, "step": 470625 }, { "epoch": 4.63, "grad_norm": 0.08304647356271744, "learning_rate": 2.7139275323463114e-06, "loss": 0.0767, "step": 470650 }, { "epoch": 4.63, "grad_norm": 10.537672996520996, "learning_rate": 2.7138034098920635e-06, "loss": 0.1157, "step": 470675 }, { "epoch": 4.63, "grad_norm": 4.862401485443115, "learning_rate": 2.7136792874378147e-06, "loss": 0.0949, "step": 470700 }, { "epoch": 4.63, "grad_norm": 13.92158317565918, "learning_rate": 2.7135551649835663e-06, "loss": 0.1162, "step": 470725 }, { "epoch": 4.63, "grad_norm": 4.637689590454102, "learning_rate": 2.713431042529318e-06, "loss": 0.0907, "step": 470750 }, { "epoch": 4.63, "grad_norm": 8.770833015441895, "learning_rate": 2.7133069200750696e-06, "loss": 0.1017, "step": 470775 }, { "epoch": 4.63, "grad_norm": 7.636362552642822, "learning_rate": 2.713182797620821e-06, "loss": 0.0988, "step": 470800 }, { "epoch": 4.63, "grad_norm": 15.398775100708008, "learning_rate": 2.713058675166573e-06, "loss": 0.1539, "step": 470825 }, { "epoch": 4.63, "grad_norm": 5.446008205413818, "learning_rate": 2.712934552712324e-06, "loss": 0.0721, "step": 470850 }, { "epoch": 4.63, "grad_norm": 8.775425910949707, "learning_rate": 2.7128104302580753e-06, "loss": 0.1021, "step": 470875 }, { "epoch": 4.63, "grad_norm": 3.8792426586151123, "learning_rate": 2.7126863078038274e-06, "loss": 0.0843, "step": 470900 }, { "epoch": 4.63, "grad_norm": 2.053884983062744, "learning_rate": 2.7125621853495786e-06, "loss": 0.0821, "step": 470925 }, { "epoch": 4.63, "grad_norm": 1.219528317451477, "learning_rate": 2.71243806289533e-06, "loss": 0.1044, "step": 470950 }, { "epoch": 4.63, "grad_norm": 15.681386947631836, "learning_rate": 2.712313940441082e-06, "loss": 0.0767, "step": 470975 }, { "epoch": 4.63, "grad_norm": 0.31906241178512573, "learning_rate": 2.7121898179868335e-06, "loss": 0.0876, "step": 471000 }, { "epoch": 4.63, "grad_norm": 6.86387300491333, "learning_rate": 2.7120656955325847e-06, "loss": 0.0854, "step": 471025 }, { "epoch": 4.63, "grad_norm": 4.315699577331543, "learning_rate": 2.7119415730783367e-06, "loss": 0.0921, "step": 471050 }, { "epoch": 4.63, "grad_norm": 17.71082305908203, "learning_rate": 2.711817450624088e-06, "loss": 0.1092, "step": 471075 }, { "epoch": 4.63, "grad_norm": 7.030782699584961, "learning_rate": 2.711693328169839e-06, "loss": 0.0725, "step": 471100 }, { "epoch": 4.63, "grad_norm": 21.41634178161621, "learning_rate": 2.7115692057155912e-06, "loss": 0.117, "step": 471125 }, { "epoch": 4.63, "grad_norm": 0.04119100421667099, "learning_rate": 2.7114450832613424e-06, "loss": 0.0693, "step": 471150 }, { "epoch": 4.63, "grad_norm": 9.707252502441406, "learning_rate": 2.711320960807094e-06, "loss": 0.1036, "step": 471175 }, { "epoch": 4.63, "grad_norm": 7.379706382751465, "learning_rate": 2.7111968383528457e-06, "loss": 0.0833, "step": 471200 }, { "epoch": 4.63, "grad_norm": 12.43130874633789, "learning_rate": 2.7110727158985973e-06, "loss": 0.1322, "step": 471225 }, { "epoch": 4.63, "grad_norm": 6.691642761230469, "learning_rate": 2.7109485934443486e-06, "loss": 0.0938, "step": 471250 }, { "epoch": 4.63, "grad_norm": 5.320775985717773, "learning_rate": 2.7108244709901006e-06, "loss": 0.1211, "step": 471275 }, { "epoch": 4.63, "grad_norm": 4.889492511749268, "learning_rate": 2.710700348535852e-06, "loss": 0.0696, "step": 471300 }, { "epoch": 4.63, "grad_norm": 15.712261199951172, "learning_rate": 2.710576226081603e-06, "loss": 0.1222, "step": 471325 }, { "epoch": 4.63, "grad_norm": 2.0983753204345703, "learning_rate": 2.7104521036273547e-06, "loss": 0.0658, "step": 471350 }, { "epoch": 4.63, "grad_norm": 12.541399955749512, "learning_rate": 2.7103279811731063e-06, "loss": 0.0902, "step": 471375 }, { "epoch": 4.63, "grad_norm": 6.040190696716309, "learning_rate": 2.710203858718858e-06, "loss": 0.0945, "step": 471400 }, { "epoch": 4.64, "grad_norm": 18.147775650024414, "learning_rate": 2.710079736264609e-06, "loss": 0.1248, "step": 471425 }, { "epoch": 4.64, "grad_norm": 0.3639502227306366, "learning_rate": 2.709955613810361e-06, "loss": 0.059, "step": 471450 }, { "epoch": 4.64, "grad_norm": 8.976616859436035, "learning_rate": 2.7098314913561124e-06, "loss": 0.1202, "step": 471475 }, { "epoch": 4.64, "grad_norm": 4.67447566986084, "learning_rate": 2.7097073689018636e-06, "loss": 0.0843, "step": 471500 }, { "epoch": 4.64, "grad_norm": 15.224745750427246, "learning_rate": 2.7095832464476157e-06, "loss": 0.1241, "step": 471525 }, { "epoch": 4.64, "grad_norm": 0.24096663296222687, "learning_rate": 2.709459123993367e-06, "loss": 0.093, "step": 471550 }, { "epoch": 4.64, "grad_norm": 7.590741157531738, "learning_rate": 2.7093350015391185e-06, "loss": 0.1022, "step": 471575 }, { "epoch": 4.64, "grad_norm": 2.560537099838257, "learning_rate": 2.70921087908487e-06, "loss": 0.0784, "step": 471600 }, { "epoch": 4.64, "grad_norm": 8.988014221191406, "learning_rate": 2.709086756630622e-06, "loss": 0.0873, "step": 471625 }, { "epoch": 4.64, "grad_norm": 4.720664978027344, "learning_rate": 2.708962634176373e-06, "loss": 0.0686, "step": 471650 }, { "epoch": 4.64, "grad_norm": 22.28335189819336, "learning_rate": 2.708838511722125e-06, "loss": 0.1015, "step": 471675 }, { "epoch": 4.64, "grad_norm": 8.031600952148438, "learning_rate": 2.7087143892678763e-06, "loss": 0.0822, "step": 471700 }, { "epoch": 4.64, "grad_norm": 15.045014381408691, "learning_rate": 2.7085902668136275e-06, "loss": 0.0965, "step": 471725 }, { "epoch": 4.64, "grad_norm": 10.300114631652832, "learning_rate": 2.7084661443593796e-06, "loss": 0.0671, "step": 471750 }, { "epoch": 4.64, "grad_norm": 7.276844501495361, "learning_rate": 2.7083420219051308e-06, "loss": 0.1295, "step": 471775 }, { "epoch": 4.64, "grad_norm": 4.118685245513916, "learning_rate": 2.7082178994508824e-06, "loss": 0.0893, "step": 471800 }, { "epoch": 4.64, "grad_norm": 15.459386825561523, "learning_rate": 2.708093776996634e-06, "loss": 0.1094, "step": 471825 }, { "epoch": 4.64, "grad_norm": 3.1392486095428467, "learning_rate": 2.7079696545423857e-06, "loss": 0.0892, "step": 471850 }, { "epoch": 4.64, "grad_norm": 13.37873363494873, "learning_rate": 2.707845532088137e-06, "loss": 0.1067, "step": 471875 }, { "epoch": 4.64, "grad_norm": 1.9280030727386475, "learning_rate": 2.707721409633889e-06, "loss": 0.0855, "step": 471900 }, { "epoch": 4.64, "grad_norm": 18.283693313598633, "learning_rate": 2.70759728717964e-06, "loss": 0.113, "step": 471925 }, { "epoch": 4.64, "grad_norm": 5.0666680335998535, "learning_rate": 2.7074731647253914e-06, "loss": 0.0869, "step": 471950 }, { "epoch": 4.64, "grad_norm": 10.305670738220215, "learning_rate": 2.7073490422711434e-06, "loss": 0.1234, "step": 471975 }, { "epoch": 4.64, "grad_norm": 1.377336025238037, "learning_rate": 2.7072249198168946e-06, "loss": 0.0751, "step": 472000 }, { "epoch": 4.64, "grad_norm": 14.71389102935791, "learning_rate": 2.7071007973626463e-06, "loss": 0.1217, "step": 472025 }, { "epoch": 4.64, "grad_norm": 4.241844654083252, "learning_rate": 2.706976674908398e-06, "loss": 0.0817, "step": 472050 }, { "epoch": 4.64, "grad_norm": 12.165579795837402, "learning_rate": 2.7068525524541495e-06, "loss": 0.092, "step": 472075 }, { "epoch": 4.64, "grad_norm": 8.911226272583008, "learning_rate": 2.7067284299999008e-06, "loss": 0.0943, "step": 472100 }, { "epoch": 4.64, "grad_norm": 12.32266902923584, "learning_rate": 2.706604307545653e-06, "loss": 0.1231, "step": 472125 }, { "epoch": 4.64, "grad_norm": 1.3496748208999634, "learning_rate": 2.706480185091404e-06, "loss": 0.0627, "step": 472150 }, { "epoch": 4.64, "grad_norm": 11.20966911315918, "learning_rate": 2.7063560626371557e-06, "loss": 0.1064, "step": 472175 }, { "epoch": 4.64, "grad_norm": 5.807540416717529, "learning_rate": 2.706231940182907e-06, "loss": 0.052, "step": 472200 }, { "epoch": 4.64, "grad_norm": 15.781214714050293, "learning_rate": 2.7061078177286585e-06, "loss": 0.1092, "step": 472225 }, { "epoch": 4.64, "grad_norm": 2.903118848800659, "learning_rate": 2.70598369527441e-06, "loss": 0.065, "step": 472250 }, { "epoch": 4.64, "grad_norm": 11.067026138305664, "learning_rate": 2.7058595728201614e-06, "loss": 0.0784, "step": 472275 }, { "epoch": 4.64, "grad_norm": 3.7472035884857178, "learning_rate": 2.7057354503659134e-06, "loss": 0.0627, "step": 472300 }, { "epoch": 4.64, "grad_norm": 24.296663284301758, "learning_rate": 2.7056113279116646e-06, "loss": 0.0677, "step": 472325 }, { "epoch": 4.64, "grad_norm": 4.864840984344482, "learning_rate": 2.7054872054574163e-06, "loss": 0.062, "step": 472350 }, { "epoch": 4.64, "grad_norm": 13.919133186340332, "learning_rate": 2.705363083003168e-06, "loss": 0.1142, "step": 472375 }, { "epoch": 4.64, "grad_norm": 1.1969246864318848, "learning_rate": 2.7052389605489195e-06, "loss": 0.0696, "step": 472400 }, { "epoch": 4.64, "grad_norm": 12.703204154968262, "learning_rate": 2.7051148380946707e-06, "loss": 0.1458, "step": 472425 }, { "epoch": 4.65, "grad_norm": 1.2903810739517212, "learning_rate": 2.704990715640423e-06, "loss": 0.1079, "step": 472450 }, { "epoch": 4.65, "grad_norm": 6.707892894744873, "learning_rate": 2.704866593186174e-06, "loss": 0.0901, "step": 472475 }, { "epoch": 4.65, "grad_norm": 2.3355610370635986, "learning_rate": 2.7047424707319252e-06, "loss": 0.0687, "step": 472500 }, { "epoch": 4.65, "grad_norm": 11.002338409423828, "learning_rate": 2.7046183482776773e-06, "loss": 0.1006, "step": 472525 }, { "epoch": 4.65, "grad_norm": 2.950701951980591, "learning_rate": 2.7044942258234285e-06, "loss": 0.0716, "step": 472550 }, { "epoch": 4.65, "grad_norm": 6.150269985198975, "learning_rate": 2.70437010336918e-06, "loss": 0.0936, "step": 472575 }, { "epoch": 4.65, "grad_norm": 5.968074321746826, "learning_rate": 2.7042459809149318e-06, "loss": 0.0621, "step": 472600 }, { "epoch": 4.65, "grad_norm": 7.105666637420654, "learning_rate": 2.7041218584606834e-06, "loss": 0.0865, "step": 472625 }, { "epoch": 4.65, "grad_norm": 1.2004984617233276, "learning_rate": 2.704002700904605e-06, "loss": 0.0731, "step": 472650 }, { "epoch": 4.65, "grad_norm": 33.9060173034668, "learning_rate": 2.7038785784503565e-06, "loss": 0.1219, "step": 472675 }, { "epoch": 4.65, "grad_norm": 1.4834167957305908, "learning_rate": 2.7037544559961077e-06, "loss": 0.0639, "step": 472700 }, { "epoch": 4.65, "grad_norm": 11.604058265686035, "learning_rate": 2.7036303335418597e-06, "loss": 0.1119, "step": 472725 }, { "epoch": 4.65, "grad_norm": 5.352287769317627, "learning_rate": 2.703506211087611e-06, "loss": 0.0815, "step": 472750 }, { "epoch": 4.65, "grad_norm": 8.787230491638184, "learning_rate": 2.703382088633362e-06, "loss": 0.1058, "step": 472775 }, { "epoch": 4.65, "grad_norm": 10.806256294250488, "learning_rate": 2.703257966179114e-06, "loss": 0.0913, "step": 472800 }, { "epoch": 4.65, "grad_norm": 15.547467231750488, "learning_rate": 2.7031338437248654e-06, "loss": 0.1347, "step": 472825 }, { "epoch": 4.65, "grad_norm": 14.817068099975586, "learning_rate": 2.703014686168787e-06, "loss": 0.0621, "step": 472850 }, { "epoch": 4.65, "grad_norm": 3.677493095397949, "learning_rate": 2.7028905637145385e-06, "loss": 0.1053, "step": 472875 }, { "epoch": 4.65, "grad_norm": 2.015432119369507, "learning_rate": 2.70276644126029e-06, "loss": 0.0662, "step": 472900 }, { "epoch": 4.65, "grad_norm": 11.255522727966309, "learning_rate": 2.7026423188060414e-06, "loss": 0.0808, "step": 472925 }, { "epoch": 4.65, "grad_norm": 4.70019006729126, "learning_rate": 2.7025181963517934e-06, "loss": 0.1008, "step": 472950 }, { "epoch": 4.65, "grad_norm": 10.467632293701172, "learning_rate": 2.7023940738975446e-06, "loss": 0.0956, "step": 472975 }, { "epoch": 4.65, "grad_norm": 6.3486409187316895, "learning_rate": 2.702269951443296e-06, "loss": 0.0788, "step": 473000 }, { "epoch": 4.65, "grad_norm": 11.324139595031738, "learning_rate": 2.702145828989048e-06, "loss": 0.0968, "step": 473025 }, { "epoch": 4.65, "grad_norm": 3.825244903564453, "learning_rate": 2.702021706534799e-06, "loss": 0.0512, "step": 473050 }, { "epoch": 4.65, "grad_norm": 14.601482391357422, "learning_rate": 2.7018975840805508e-06, "loss": 0.1003, "step": 473075 }, { "epoch": 4.65, "grad_norm": 0.9933934211730957, "learning_rate": 2.7017734616263024e-06, "loss": 0.0717, "step": 473100 }, { "epoch": 4.65, "grad_norm": 15.712185859680176, "learning_rate": 2.701649339172054e-06, "loss": 0.1108, "step": 473125 }, { "epoch": 4.65, "grad_norm": 4.303161144256592, "learning_rate": 2.7015252167178052e-06, "loss": 0.0818, "step": 473150 }, { "epoch": 4.65, "grad_norm": 23.229354858398438, "learning_rate": 2.7014010942635573e-06, "loss": 0.1311, "step": 473175 }, { "epoch": 4.65, "grad_norm": 6.266232490539551, "learning_rate": 2.7012769718093085e-06, "loss": 0.0878, "step": 473200 }, { "epoch": 4.65, "grad_norm": 12.194971084594727, "learning_rate": 2.7011528493550597e-06, "loss": 0.0995, "step": 473225 }, { "epoch": 4.65, "grad_norm": 5.652313709259033, "learning_rate": 2.7010287269008118e-06, "loss": 0.0797, "step": 473250 }, { "epoch": 4.65, "grad_norm": 18.32110023498535, "learning_rate": 2.700904604446563e-06, "loss": 0.1092, "step": 473275 }, { "epoch": 4.65, "grad_norm": 7.674649715423584, "learning_rate": 2.7007804819923146e-06, "loss": 0.0646, "step": 473300 }, { "epoch": 4.65, "grad_norm": 12.053043365478516, "learning_rate": 2.700656359538066e-06, "loss": 0.1037, "step": 473325 }, { "epoch": 4.65, "grad_norm": 0.38277649879455566, "learning_rate": 2.700532237083818e-06, "loss": 0.0779, "step": 473350 }, { "epoch": 4.65, "grad_norm": 10.3519868850708, "learning_rate": 2.700408114629569e-06, "loss": 0.1279, "step": 473375 }, { "epoch": 4.65, "grad_norm": 8.762249946594238, "learning_rate": 2.7002839921753203e-06, "loss": 0.0701, "step": 473400 }, { "epoch": 4.65, "grad_norm": 15.455989837646484, "learning_rate": 2.7001598697210724e-06, "loss": 0.1037, "step": 473425 }, { "epoch": 4.66, "grad_norm": 4.440061092376709, "learning_rate": 2.7000357472668236e-06, "loss": 0.0796, "step": 473450 }, { "epoch": 4.66, "grad_norm": 14.234903335571289, "learning_rate": 2.6999116248125752e-06, "loss": 0.1033, "step": 473475 }, { "epoch": 4.66, "grad_norm": 7.35899543762207, "learning_rate": 2.699787502358327e-06, "loss": 0.0632, "step": 473500 }, { "epoch": 4.66, "grad_norm": 18.988859176635742, "learning_rate": 2.6996633799040785e-06, "loss": 0.1052, "step": 473525 }, { "epoch": 4.66, "grad_norm": 0.7143548727035522, "learning_rate": 2.6995392574498297e-06, "loss": 0.08, "step": 473550 }, { "epoch": 4.66, "grad_norm": 18.486614227294922, "learning_rate": 2.6994151349955818e-06, "loss": 0.1142, "step": 473575 }, { "epoch": 4.66, "grad_norm": 9.371012687683105, "learning_rate": 2.699291012541333e-06, "loss": 0.093, "step": 473600 }, { "epoch": 4.66, "grad_norm": 9.243622779846191, "learning_rate": 2.699166890087084e-06, "loss": 0.1182, "step": 473625 }, { "epoch": 4.66, "grad_norm": 1.3456757068634033, "learning_rate": 2.6990427676328362e-06, "loss": 0.0742, "step": 473650 }, { "epoch": 4.66, "grad_norm": 12.946379661560059, "learning_rate": 2.6989186451785875e-06, "loss": 0.0957, "step": 473675 }, { "epoch": 4.66, "grad_norm": 8.214027404785156, "learning_rate": 2.698794522724339e-06, "loss": 0.0972, "step": 473700 }, { "epoch": 4.66, "grad_norm": 10.72659969329834, "learning_rate": 2.6986704002700907e-06, "loss": 0.0988, "step": 473725 }, { "epoch": 4.66, "grad_norm": 6.111634731292725, "learning_rate": 2.6985462778158424e-06, "loss": 0.0764, "step": 473750 }, { "epoch": 4.66, "grad_norm": 10.411382675170898, "learning_rate": 2.6984221553615936e-06, "loss": 0.0988, "step": 473775 }, { "epoch": 4.66, "grad_norm": 5.914152145385742, "learning_rate": 2.6982980329073456e-06, "loss": 0.0627, "step": 473800 }, { "epoch": 4.66, "grad_norm": 9.020149230957031, "learning_rate": 2.698173910453097e-06, "loss": 0.0766, "step": 473825 }, { "epoch": 4.66, "grad_norm": 4.706334590911865, "learning_rate": 2.698049787998848e-06, "loss": 0.0876, "step": 473850 }, { "epoch": 4.66, "grad_norm": 18.18819236755371, "learning_rate": 2.6979256655446e-06, "loss": 0.1024, "step": 473875 }, { "epoch": 4.66, "grad_norm": 3.935906410217285, "learning_rate": 2.6978015430903513e-06, "loss": 0.0661, "step": 473900 }, { "epoch": 4.66, "grad_norm": 19.308290481567383, "learning_rate": 2.697677420636103e-06, "loss": 0.1249, "step": 473925 }, { "epoch": 4.66, "grad_norm": 1.7842662334442139, "learning_rate": 2.6975532981818546e-06, "loss": 0.0842, "step": 473950 }, { "epoch": 4.66, "grad_norm": 9.844523429870605, "learning_rate": 2.6974291757276062e-06, "loss": 0.1301, "step": 473975 }, { "epoch": 4.66, "grad_norm": 5.186667442321777, "learning_rate": 2.6973050532733574e-06, "loss": 0.0799, "step": 474000 }, { "epoch": 4.66, "grad_norm": 16.948396682739258, "learning_rate": 2.6971809308191095e-06, "loss": 0.0992, "step": 474025 }, { "epoch": 4.66, "grad_norm": 8.433989524841309, "learning_rate": 2.6970568083648607e-06, "loss": 0.0774, "step": 474050 }, { "epoch": 4.66, "grad_norm": 9.028881072998047, "learning_rate": 2.696932685910612e-06, "loss": 0.091, "step": 474075 }, { "epoch": 4.66, "grad_norm": 5.825493335723877, "learning_rate": 2.696808563456364e-06, "loss": 0.0823, "step": 474100 }, { "epoch": 4.66, "grad_norm": 30.91020393371582, "learning_rate": 2.696684441002115e-06, "loss": 0.1277, "step": 474125 }, { "epoch": 4.66, "grad_norm": 6.853664398193359, "learning_rate": 2.696560318547867e-06, "loss": 0.0657, "step": 474150 }, { "epoch": 4.66, "grad_norm": 13.692469596862793, "learning_rate": 2.696436196093618e-06, "loss": 0.1082, "step": 474175 }, { "epoch": 4.66, "grad_norm": 5.810766220092773, "learning_rate": 2.69631207363937e-06, "loss": 0.0762, "step": 474200 }, { "epoch": 4.66, "grad_norm": 16.125244140625, "learning_rate": 2.6961879511851213e-06, "loss": 0.1066, "step": 474225 }, { "epoch": 4.66, "grad_norm": 8.89023494720459, "learning_rate": 2.6960638287308725e-06, "loss": 0.0949, "step": 474250 }, { "epoch": 4.66, "grad_norm": 3.524472236633301, "learning_rate": 2.6959397062766246e-06, "loss": 0.1052, "step": 474275 }, { "epoch": 4.66, "grad_norm": 3.3792827129364014, "learning_rate": 2.695815583822376e-06, "loss": 0.0778, "step": 474300 }, { "epoch": 4.66, "grad_norm": 17.715776443481445, "learning_rate": 2.6956914613681274e-06, "loss": 0.1077, "step": 474325 }, { "epoch": 4.66, "grad_norm": 0.4744841754436493, "learning_rate": 2.695567338913879e-06, "loss": 0.0721, "step": 474350 }, { "epoch": 4.66, "grad_norm": 16.960098266601562, "learning_rate": 2.6954432164596307e-06, "loss": 0.1443, "step": 474375 }, { "epoch": 4.66, "grad_norm": 0.013244189321994781, "learning_rate": 2.695319094005382e-06, "loss": 0.0793, "step": 474400 }, { "epoch": 4.66, "grad_norm": 17.608360290527344, "learning_rate": 2.695194971551134e-06, "loss": 0.0978, "step": 474425 }, { "epoch": 4.66, "grad_norm": 2.473168134689331, "learning_rate": 2.695070849096885e-06, "loss": 0.0593, "step": 474450 }, { "epoch": 4.67, "grad_norm": 13.614124298095703, "learning_rate": 2.6949467266426364e-06, "loss": 0.0711, "step": 474475 }, { "epoch": 4.67, "grad_norm": 1.6398720741271973, "learning_rate": 2.6948226041883884e-06, "loss": 0.0675, "step": 474500 }, { "epoch": 4.67, "grad_norm": 11.053776741027832, "learning_rate": 2.6946984817341397e-06, "loss": 0.0869, "step": 474525 }, { "epoch": 4.67, "grad_norm": 4.361950397491455, "learning_rate": 2.6945743592798913e-06, "loss": 0.0812, "step": 474550 }, { "epoch": 4.67, "grad_norm": 10.33955192565918, "learning_rate": 2.694450236825643e-06, "loss": 0.1004, "step": 474575 }, { "epoch": 4.67, "grad_norm": 6.128778457641602, "learning_rate": 2.6943261143713946e-06, "loss": 0.0857, "step": 474600 }, { "epoch": 4.67, "grad_norm": 12.825125694274902, "learning_rate": 2.6942019919171458e-06, "loss": 0.1028, "step": 474625 }, { "epoch": 4.67, "grad_norm": 3.3263468742370605, "learning_rate": 2.694077869462898e-06, "loss": 0.0837, "step": 474650 }, { "epoch": 4.67, "grad_norm": 61.96453094482422, "learning_rate": 2.693953747008649e-06, "loss": 0.0994, "step": 474675 }, { "epoch": 4.67, "grad_norm": 9.15953254699707, "learning_rate": 2.6938296245544003e-06, "loss": 0.0661, "step": 474700 }, { "epoch": 4.67, "grad_norm": 8.717427253723145, "learning_rate": 2.6937055021001523e-06, "loss": 0.1157, "step": 474725 }, { "epoch": 4.67, "grad_norm": 11.024185180664062, "learning_rate": 2.6935813796459035e-06, "loss": 0.0788, "step": 474750 }, { "epoch": 4.67, "grad_norm": 13.575932502746582, "learning_rate": 2.693457257191655e-06, "loss": 0.1175, "step": 474775 }, { "epoch": 4.67, "grad_norm": 5.364806652069092, "learning_rate": 2.693333134737407e-06, "loss": 0.0832, "step": 474800 }, { "epoch": 4.67, "grad_norm": 12.674403190612793, "learning_rate": 2.6932090122831584e-06, "loss": 0.1078, "step": 474825 }, { "epoch": 4.67, "grad_norm": 5.213263034820557, "learning_rate": 2.6930848898289096e-06, "loss": 0.0855, "step": 474850 }, { "epoch": 4.67, "grad_norm": 9.075533866882324, "learning_rate": 2.6929607673746617e-06, "loss": 0.0937, "step": 474875 }, { "epoch": 4.67, "grad_norm": 5.57774543762207, "learning_rate": 2.692836644920413e-06, "loss": 0.0573, "step": 474900 }, { "epoch": 4.67, "grad_norm": 10.52556037902832, "learning_rate": 2.692712522466164e-06, "loss": 0.1263, "step": 474925 }, { "epoch": 4.67, "grad_norm": 9.345410346984863, "learning_rate": 2.692588400011916e-06, "loss": 0.076, "step": 474950 }, { "epoch": 4.67, "grad_norm": 12.598138809204102, "learning_rate": 2.6924642775576674e-06, "loss": 0.1253, "step": 474975 }, { "epoch": 4.67, "grad_norm": 11.15251636505127, "learning_rate": 2.692340155103419e-06, "loss": 0.1008, "step": 475000 }, { "epoch": 4.67, "grad_norm": 3.5477683544158936, "learning_rate": 2.6922160326491707e-06, "loss": 0.1286, "step": 475025 }, { "epoch": 4.67, "grad_norm": 1.791719913482666, "learning_rate": 2.6920919101949223e-06, "loss": 0.0631, "step": 475050 }, { "epoch": 4.67, "grad_norm": 17.12889289855957, "learning_rate": 2.6919677877406735e-06, "loss": 0.0898, "step": 475075 }, { "epoch": 4.67, "grad_norm": 3.3922431468963623, "learning_rate": 2.6918436652864247e-06, "loss": 0.0921, "step": 475100 }, { "epoch": 4.67, "grad_norm": 7.340068817138672, "learning_rate": 2.6917195428321768e-06, "loss": 0.1037, "step": 475125 }, { "epoch": 4.67, "grad_norm": 4.839532375335693, "learning_rate": 2.691595420377928e-06, "loss": 0.0773, "step": 475150 }, { "epoch": 4.67, "grad_norm": 14.380091667175293, "learning_rate": 2.6914712979236796e-06, "loss": 0.112, "step": 475175 }, { "epoch": 4.67, "grad_norm": 2.2736716270446777, "learning_rate": 2.691352140367601e-06, "loss": 0.0858, "step": 475200 }, { "epoch": 4.67, "grad_norm": 13.175485610961914, "learning_rate": 2.6912280179133527e-06, "loss": 0.1177, "step": 475225 }, { "epoch": 4.67, "grad_norm": 4.076802730560303, "learning_rate": 2.6911038954591043e-06, "loss": 0.0714, "step": 475250 }, { "epoch": 4.67, "grad_norm": 8.268058776855469, "learning_rate": 2.690979773004856e-06, "loss": 0.0953, "step": 475275 }, { "epoch": 4.67, "grad_norm": 8.141687393188477, "learning_rate": 2.690855650550607e-06, "loss": 0.0848, "step": 475300 }, { "epoch": 4.67, "grad_norm": 11.566391944885254, "learning_rate": 2.6907315280963593e-06, "loss": 0.1181, "step": 475325 }, { "epoch": 4.67, "grad_norm": 0.20301495492458344, "learning_rate": 2.6906074056421105e-06, "loss": 0.0896, "step": 475350 }, { "epoch": 4.67, "grad_norm": 11.996598243713379, "learning_rate": 2.690483283187862e-06, "loss": 0.1059, "step": 475375 }, { "epoch": 4.67, "grad_norm": 15.789253234863281, "learning_rate": 2.6903591607336137e-06, "loss": 0.0513, "step": 475400 }, { "epoch": 4.67, "grad_norm": 16.806753158569336, "learning_rate": 2.6902350382793654e-06, "loss": 0.1169, "step": 475425 }, { "epoch": 4.67, "grad_norm": 2.5260818004608154, "learning_rate": 2.6901109158251166e-06, "loss": 0.0958, "step": 475450 }, { "epoch": 4.67, "grad_norm": 2.789079427719116, "learning_rate": 2.6899867933708686e-06, "loss": 0.0835, "step": 475475 }, { "epoch": 4.68, "grad_norm": 1.7685816287994385, "learning_rate": 2.68986267091662e-06, "loss": 0.0915, "step": 475500 }, { "epoch": 4.68, "grad_norm": 10.10061264038086, "learning_rate": 2.689738548462371e-06, "loss": 0.126, "step": 475525 }, { "epoch": 4.68, "grad_norm": 4.3358988761901855, "learning_rate": 2.689614426008123e-06, "loss": 0.0743, "step": 475550 }, { "epoch": 4.68, "grad_norm": 9.848849296569824, "learning_rate": 2.6894903035538743e-06, "loss": 0.1007, "step": 475575 }, { "epoch": 4.68, "grad_norm": 5.677077770233154, "learning_rate": 2.689366181099626e-06, "loss": 0.0631, "step": 475600 }, { "epoch": 4.68, "grad_norm": 14.072455406188965, "learning_rate": 2.689242058645377e-06, "loss": 0.124, "step": 475625 }, { "epoch": 4.68, "grad_norm": 3.475780963897705, "learning_rate": 2.6891179361911292e-06, "loss": 0.0855, "step": 475650 }, { "epoch": 4.68, "grad_norm": 7.84485387802124, "learning_rate": 2.6889938137368804e-06, "loss": 0.1094, "step": 475675 }, { "epoch": 4.68, "grad_norm": 2.1363751888275146, "learning_rate": 2.6888696912826317e-06, "loss": 0.0772, "step": 475700 }, { "epoch": 4.68, "grad_norm": 27.80305290222168, "learning_rate": 2.6887455688283837e-06, "loss": 0.1362, "step": 475725 }, { "epoch": 4.68, "grad_norm": 7.648523807525635, "learning_rate": 2.688621446374135e-06, "loss": 0.0975, "step": 475750 }, { "epoch": 4.68, "grad_norm": 22.668153762817383, "learning_rate": 2.6884973239198866e-06, "loss": 0.1048, "step": 475775 }, { "epoch": 4.68, "grad_norm": 1.150835633277893, "learning_rate": 2.688373201465638e-06, "loss": 0.0728, "step": 475800 }, { "epoch": 4.68, "grad_norm": 17.619678497314453, "learning_rate": 2.68824907901139e-06, "loss": 0.1103, "step": 475825 }, { "epoch": 4.68, "grad_norm": 7.629115581512451, "learning_rate": 2.688124956557141e-06, "loss": 0.0686, "step": 475850 }, { "epoch": 4.68, "grad_norm": 52.86785888671875, "learning_rate": 2.688000834102893e-06, "loss": 0.0986, "step": 475875 }, { "epoch": 4.68, "grad_norm": 2.019256830215454, "learning_rate": 2.6878767116486443e-06, "loss": 0.0921, "step": 475900 }, { "epoch": 4.68, "grad_norm": 5.433499336242676, "learning_rate": 2.6877525891943955e-06, "loss": 0.1425, "step": 475925 }, { "epoch": 4.68, "grad_norm": 5.88113260269165, "learning_rate": 2.6876284667401476e-06, "loss": 0.0752, "step": 475950 }, { "epoch": 4.68, "grad_norm": 13.324617385864258, "learning_rate": 2.687504344285899e-06, "loss": 0.0982, "step": 475975 }, { "epoch": 4.68, "grad_norm": 3.5330810546875, "learning_rate": 2.6873802218316504e-06, "loss": 0.0779, "step": 476000 }, { "epoch": 4.68, "grad_norm": 16.70151710510254, "learning_rate": 2.687256099377402e-06, "loss": 0.1008, "step": 476025 }, { "epoch": 4.68, "grad_norm": 4.844366073608398, "learning_rate": 2.6871319769231537e-06, "loss": 0.068, "step": 476050 }, { "epoch": 4.68, "grad_norm": 14.704895973205566, "learning_rate": 2.687007854468905e-06, "loss": 0.1266, "step": 476075 }, { "epoch": 4.68, "grad_norm": 2.3266165256500244, "learning_rate": 2.686883732014657e-06, "loss": 0.0671, "step": 476100 }, { "epoch": 4.68, "grad_norm": 9.560423851013184, "learning_rate": 2.686759609560408e-06, "loss": 0.1187, "step": 476125 }, { "epoch": 4.68, "grad_norm": 1.8280932903289795, "learning_rate": 2.6866354871061594e-06, "loss": 0.0467, "step": 476150 }, { "epoch": 4.68, "grad_norm": 12.27151870727539, "learning_rate": 2.6865113646519115e-06, "loss": 0.1163, "step": 476175 }, { "epoch": 4.68, "grad_norm": 4.65716552734375, "learning_rate": 2.6863872421976627e-06, "loss": 0.0834, "step": 476200 }, { "epoch": 4.68, "grad_norm": 13.624289512634277, "learning_rate": 2.6862631197434143e-06, "loss": 0.1117, "step": 476225 }, { "epoch": 4.68, "grad_norm": 2.7608935832977295, "learning_rate": 2.686138997289166e-06, "loss": 0.0788, "step": 476250 }, { "epoch": 4.68, "grad_norm": 12.266965866088867, "learning_rate": 2.6860148748349176e-06, "loss": 0.1163, "step": 476275 }, { "epoch": 4.68, "grad_norm": 3.639458417892456, "learning_rate": 2.6858907523806688e-06, "loss": 0.0791, "step": 476300 }, { "epoch": 4.68, "grad_norm": 13.248106956481934, "learning_rate": 2.685766629926421e-06, "loss": 0.0841, "step": 476325 }, { "epoch": 4.68, "grad_norm": 2.4225661754608154, "learning_rate": 2.685642507472172e-06, "loss": 0.0623, "step": 476350 }, { "epoch": 4.68, "grad_norm": 8.758471488952637, "learning_rate": 2.6855183850179233e-06, "loss": 0.0998, "step": 476375 }, { "epoch": 4.68, "grad_norm": 0.5032954216003418, "learning_rate": 2.6853942625636753e-06, "loss": 0.0747, "step": 476400 }, { "epoch": 4.68, "grad_norm": 12.55912971496582, "learning_rate": 2.6852701401094265e-06, "loss": 0.1052, "step": 476425 }, { "epoch": 4.68, "grad_norm": 4.535421848297119, "learning_rate": 2.685146017655178e-06, "loss": 0.0863, "step": 476450 }, { "epoch": 4.68, "grad_norm": 10.173933982849121, "learning_rate": 2.6850218952009294e-06, "loss": 0.1142, "step": 476475 }, { "epoch": 4.69, "grad_norm": 5.785382270812988, "learning_rate": 2.6848977727466814e-06, "loss": 0.0785, "step": 476500 }, { "epoch": 4.69, "grad_norm": 14.0978364944458, "learning_rate": 2.6847736502924327e-06, "loss": 0.0899, "step": 476525 }, { "epoch": 4.69, "grad_norm": 5.922794818878174, "learning_rate": 2.684649527838184e-06, "loss": 0.0763, "step": 476550 }, { "epoch": 4.69, "grad_norm": 15.318448066711426, "learning_rate": 2.684525405383936e-06, "loss": 0.0964, "step": 476575 }, { "epoch": 4.69, "grad_norm": 9.62470817565918, "learning_rate": 2.684401282929687e-06, "loss": 0.0763, "step": 476600 }, { "epoch": 4.69, "grad_norm": 12.450260162353516, "learning_rate": 2.6842771604754388e-06, "loss": 0.1025, "step": 476625 }, { "epoch": 4.69, "grad_norm": 13.083978652954102, "learning_rate": 2.6841530380211904e-06, "loss": 0.0839, "step": 476650 }, { "epoch": 4.69, "grad_norm": 4.889739990234375, "learning_rate": 2.684028915566942e-06, "loss": 0.1106, "step": 476675 }, { "epoch": 4.69, "grad_norm": 2.4065136909484863, "learning_rate": 2.6839047931126932e-06, "loss": 0.0819, "step": 476700 }, { "epoch": 4.69, "grad_norm": 13.82075023651123, "learning_rate": 2.6837806706584453e-06, "loss": 0.1152, "step": 476725 }, { "epoch": 4.69, "grad_norm": 4.526352882385254, "learning_rate": 2.6836565482041965e-06, "loss": 0.0777, "step": 476750 }, { "epoch": 4.69, "grad_norm": 18.542095184326172, "learning_rate": 2.6835324257499477e-06, "loss": 0.1099, "step": 476775 }, { "epoch": 4.69, "grad_norm": 2.5725491046905518, "learning_rate": 2.6834083032956998e-06, "loss": 0.0899, "step": 476800 }, { "epoch": 4.69, "grad_norm": 16.845720291137695, "learning_rate": 2.683284180841451e-06, "loss": 0.1158, "step": 476825 }, { "epoch": 4.69, "grad_norm": 0.2540605664253235, "learning_rate": 2.6831600583872026e-06, "loss": 0.0522, "step": 476850 }, { "epoch": 4.69, "grad_norm": 69.77037048339844, "learning_rate": 2.6830359359329543e-06, "loss": 0.1193, "step": 476875 }, { "epoch": 4.69, "grad_norm": 2.215684413909912, "learning_rate": 2.682911813478706e-06, "loss": 0.073, "step": 476900 }, { "epoch": 4.69, "grad_norm": 18.673295974731445, "learning_rate": 2.682787691024457e-06, "loss": 0.1073, "step": 476925 }, { "epoch": 4.69, "grad_norm": 0.8742965459823608, "learning_rate": 2.682663568570209e-06, "loss": 0.075, "step": 476950 }, { "epoch": 4.69, "grad_norm": 9.22329330444336, "learning_rate": 2.6825394461159604e-06, "loss": 0.1041, "step": 476975 }, { "epoch": 4.69, "grad_norm": 0.8436461687088013, "learning_rate": 2.6824153236617116e-06, "loss": 0.087, "step": 477000 }, { "epoch": 4.69, "grad_norm": 18.82309341430664, "learning_rate": 2.6822912012074637e-06, "loss": 0.1201, "step": 477025 }, { "epoch": 4.69, "grad_norm": 6.817106246948242, "learning_rate": 2.682167078753215e-06, "loss": 0.095, "step": 477050 }, { "epoch": 4.69, "grad_norm": 11.660496711730957, "learning_rate": 2.6820429562989665e-06, "loss": 0.1011, "step": 477075 }, { "epoch": 4.69, "grad_norm": 0.8965980410575867, "learning_rate": 2.681918833844718e-06, "loss": 0.0724, "step": 477100 }, { "epoch": 4.69, "grad_norm": 5.96552848815918, "learning_rate": 2.6817947113904698e-06, "loss": 0.0779, "step": 477125 }, { "epoch": 4.69, "grad_norm": 10.434918403625488, "learning_rate": 2.681670588936221e-06, "loss": 0.0865, "step": 477150 }, { "epoch": 4.69, "grad_norm": 12.836813926696777, "learning_rate": 2.681546466481973e-06, "loss": 0.1108, "step": 477175 }, { "epoch": 4.69, "grad_norm": 2.0144920349121094, "learning_rate": 2.6814223440277243e-06, "loss": 0.0728, "step": 477200 }, { "epoch": 4.69, "grad_norm": 11.396921157836914, "learning_rate": 2.6812982215734755e-06, "loss": 0.0922, "step": 477225 }, { "epoch": 4.69, "grad_norm": 0.10352374613285065, "learning_rate": 2.6811740991192275e-06, "loss": 0.0562, "step": 477250 }, { "epoch": 4.69, "grad_norm": 11.54139518737793, "learning_rate": 2.6810499766649787e-06, "loss": 0.1082, "step": 477275 }, { "epoch": 4.69, "grad_norm": 9.161909103393555, "learning_rate": 2.6809258542107304e-06, "loss": 0.0886, "step": 477300 }, { "epoch": 4.69, "grad_norm": 17.799034118652344, "learning_rate": 2.6808017317564816e-06, "loss": 0.097, "step": 477325 }, { "epoch": 4.69, "grad_norm": 3.6843695640563965, "learning_rate": 2.6806776093022336e-06, "loss": 0.0741, "step": 477350 }, { "epoch": 4.69, "grad_norm": 12.372404098510742, "learning_rate": 2.680553486847985e-06, "loss": 0.09, "step": 477375 }, { "epoch": 4.69, "grad_norm": 0.7068910598754883, "learning_rate": 2.680429364393736e-06, "loss": 0.123, "step": 477400 }, { "epoch": 4.69, "grad_norm": 17.415109634399414, "learning_rate": 2.680305241939488e-06, "loss": 0.1064, "step": 477425 }, { "epoch": 4.69, "grad_norm": 3.078941822052002, "learning_rate": 2.6801811194852393e-06, "loss": 0.0774, "step": 477450 }, { "epoch": 4.69, "grad_norm": 7.132226467132568, "learning_rate": 2.680056997030991e-06, "loss": 0.1204, "step": 477475 }, { "epoch": 4.69, "grad_norm": 44.55335998535156, "learning_rate": 2.6799328745767426e-06, "loss": 0.0716, "step": 477500 }, { "epoch": 4.7, "grad_norm": 11.455005645751953, "learning_rate": 2.6798087521224942e-06, "loss": 0.0773, "step": 477525 }, { "epoch": 4.7, "grad_norm": 1.060223937034607, "learning_rate": 2.6796846296682454e-06, "loss": 0.0688, "step": 477550 }, { "epoch": 4.7, "grad_norm": 10.285951614379883, "learning_rate": 2.6795605072139975e-06, "loss": 0.1264, "step": 477575 }, { "epoch": 4.7, "grad_norm": 1.8852883577346802, "learning_rate": 2.6794363847597487e-06, "loss": 0.0876, "step": 477600 }, { "epoch": 4.7, "grad_norm": 6.537779331207275, "learning_rate": 2.6793122623055e-06, "loss": 0.0876, "step": 477625 }, { "epoch": 4.7, "grad_norm": 1.9336785078048706, "learning_rate": 2.679193104749422e-06, "loss": 0.0909, "step": 477650 }, { "epoch": 4.7, "grad_norm": 4.191361427307129, "learning_rate": 2.679068982295173e-06, "loss": 0.0888, "step": 477675 }, { "epoch": 4.7, "grad_norm": 6.433690547943115, "learning_rate": 2.678944859840925e-06, "loss": 0.0754, "step": 477700 }, { "epoch": 4.7, "grad_norm": 7.954198837280273, "learning_rate": 2.6788207373866763e-06, "loss": 0.0979, "step": 477725 }, { "epoch": 4.7, "grad_norm": 6.756284236907959, "learning_rate": 2.678696614932428e-06, "loss": 0.0762, "step": 477750 }, { "epoch": 4.7, "grad_norm": 12.679267883300781, "learning_rate": 2.6785724924781796e-06, "loss": 0.1076, "step": 477775 }, { "epoch": 4.7, "grad_norm": 0.7175911664962769, "learning_rate": 2.678448370023931e-06, "loss": 0.082, "step": 477800 }, { "epoch": 4.7, "grad_norm": 9.994357109069824, "learning_rate": 2.6783242475696824e-06, "loss": 0.1017, "step": 477825 }, { "epoch": 4.7, "grad_norm": 6.366729259490967, "learning_rate": 2.6782001251154345e-06, "loss": 0.0587, "step": 477850 }, { "epoch": 4.7, "grad_norm": 13.519132614135742, "learning_rate": 2.6780760026611857e-06, "loss": 0.1476, "step": 477875 }, { "epoch": 4.7, "grad_norm": 2.074309825897217, "learning_rate": 2.677951880206937e-06, "loss": 0.0691, "step": 477900 }, { "epoch": 4.7, "grad_norm": 13.976500511169434, "learning_rate": 2.6778277577526885e-06, "loss": 0.09, "step": 477925 }, { "epoch": 4.7, "grad_norm": 14.861598014831543, "learning_rate": 2.67770363529844e-06, "loss": 0.0589, "step": 477950 }, { "epoch": 4.7, "grad_norm": 13.482359886169434, "learning_rate": 2.677579512844192e-06, "loss": 0.1007, "step": 477975 }, { "epoch": 4.7, "grad_norm": 2.9006004333496094, "learning_rate": 2.677455390389943e-06, "loss": 0.0801, "step": 478000 }, { "epoch": 4.7, "grad_norm": 13.627789497375488, "learning_rate": 2.677331267935695e-06, "loss": 0.1009, "step": 478025 }, { "epoch": 4.7, "grad_norm": 4.86698055267334, "learning_rate": 2.6772071454814463e-06, "loss": 0.0826, "step": 478050 }, { "epoch": 4.7, "grad_norm": 8.723183631896973, "learning_rate": 2.6770830230271975e-06, "loss": 0.1117, "step": 478075 }, { "epoch": 4.7, "grad_norm": 9.56558895111084, "learning_rate": 2.6769589005729495e-06, "loss": 0.0554, "step": 478100 }, { "epoch": 4.7, "grad_norm": 11.435928344726562, "learning_rate": 2.6768347781187008e-06, "loss": 0.1245, "step": 478125 }, { "epoch": 4.7, "grad_norm": 2.0019023418426514, "learning_rate": 2.6767106556644524e-06, "loss": 0.0805, "step": 478150 }, { "epoch": 4.7, "grad_norm": 10.635177612304688, "learning_rate": 2.676586533210204e-06, "loss": 0.1319, "step": 478175 }, { "epoch": 4.7, "grad_norm": 5.957663536071777, "learning_rate": 2.6764624107559557e-06, "loss": 0.0985, "step": 478200 }, { "epoch": 4.7, "grad_norm": 19.69059181213379, "learning_rate": 2.676338288301707e-06, "loss": 0.1324, "step": 478225 }, { "epoch": 4.7, "grad_norm": 8.765231132507324, "learning_rate": 2.676214165847459e-06, "loss": 0.0664, "step": 478250 }, { "epoch": 4.7, "grad_norm": 21.134628295898438, "learning_rate": 2.67609004339321e-06, "loss": 0.1277, "step": 478275 }, { "epoch": 4.7, "grad_norm": 0.33825165033340454, "learning_rate": 2.6759659209389618e-06, "loss": 0.0802, "step": 478300 }, { "epoch": 4.7, "grad_norm": 17.571670532226562, "learning_rate": 2.6758417984847134e-06, "loss": 0.1087, "step": 478325 }, { "epoch": 4.7, "grad_norm": 5.069879531860352, "learning_rate": 2.675717676030465e-06, "loss": 0.1011, "step": 478350 }, { "epoch": 4.7, "grad_norm": 5.126957893371582, "learning_rate": 2.6755935535762163e-06, "loss": 0.1361, "step": 478375 }, { "epoch": 4.7, "grad_norm": 2.983450174331665, "learning_rate": 2.6754694311219683e-06, "loss": 0.0804, "step": 478400 }, { "epoch": 4.7, "grad_norm": 16.668092727661133, "learning_rate": 2.6753453086677195e-06, "loss": 0.0998, "step": 478425 }, { "epoch": 4.7, "grad_norm": 0.47203513979911804, "learning_rate": 2.6752211862134707e-06, "loss": 0.06, "step": 478450 }, { "epoch": 4.7, "grad_norm": 6.2460246086120605, "learning_rate": 2.675097063759223e-06, "loss": 0.1335, "step": 478475 }, { "epoch": 4.7, "grad_norm": 8.45829963684082, "learning_rate": 2.674972941304974e-06, "loss": 0.086, "step": 478500 }, { "epoch": 4.7, "grad_norm": 17.660736083984375, "learning_rate": 2.6748488188507256e-06, "loss": 0.135, "step": 478525 }, { "epoch": 4.71, "grad_norm": 3.289024829864502, "learning_rate": 2.6747246963964773e-06, "loss": 0.078, "step": 478550 }, { "epoch": 4.71, "grad_norm": 10.926634788513184, "learning_rate": 2.674600573942229e-06, "loss": 0.1054, "step": 478575 }, { "epoch": 4.71, "grad_norm": 4.110174179077148, "learning_rate": 2.67447645148798e-06, "loss": 0.0984, "step": 478600 }, { "epoch": 4.71, "grad_norm": 26.972963333129883, "learning_rate": 2.674352329033732e-06, "loss": 0.1343, "step": 478625 }, { "epoch": 4.71, "grad_norm": 6.347907066345215, "learning_rate": 2.6742282065794834e-06, "loss": 0.0664, "step": 478650 }, { "epoch": 4.71, "grad_norm": 10.479639053344727, "learning_rate": 2.6741040841252346e-06, "loss": 0.1061, "step": 478675 }, { "epoch": 4.71, "grad_norm": 0.08713240176439285, "learning_rate": 2.6739799616709867e-06, "loss": 0.0761, "step": 478700 }, { "epoch": 4.71, "grad_norm": 7.430052757263184, "learning_rate": 2.673855839216738e-06, "loss": 0.0905, "step": 478725 }, { "epoch": 4.71, "grad_norm": 4.155251502990723, "learning_rate": 2.6737317167624895e-06, "loss": 0.0798, "step": 478750 }, { "epoch": 4.71, "grad_norm": 12.976927757263184, "learning_rate": 2.6736075943082407e-06, "loss": 0.1317, "step": 478775 }, { "epoch": 4.71, "grad_norm": 4.040083408355713, "learning_rate": 2.6734834718539928e-06, "loss": 0.0691, "step": 478800 }, { "epoch": 4.71, "grad_norm": 8.53876781463623, "learning_rate": 2.673359349399744e-06, "loss": 0.1087, "step": 478825 }, { "epoch": 4.71, "grad_norm": 5.900203227996826, "learning_rate": 2.673240191843666e-06, "loss": 0.0929, "step": 478850 }, { "epoch": 4.71, "grad_norm": 13.506450653076172, "learning_rate": 2.673116069389417e-06, "loss": 0.1222, "step": 478875 }, { "epoch": 4.71, "grad_norm": 7.290081977844238, "learning_rate": 2.6729919469351683e-06, "loss": 0.0677, "step": 478900 }, { "epoch": 4.71, "grad_norm": 14.289203643798828, "learning_rate": 2.6728678244809203e-06, "loss": 0.1268, "step": 478925 }, { "epoch": 4.71, "grad_norm": 7.763796329498291, "learning_rate": 2.6727437020266716e-06, "loss": 0.0866, "step": 478950 }, { "epoch": 4.71, "grad_norm": 20.384357452392578, "learning_rate": 2.672619579572423e-06, "loss": 0.0961, "step": 478975 }, { "epoch": 4.71, "grad_norm": 3.714977979660034, "learning_rate": 2.672495457118175e-06, "loss": 0.078, "step": 479000 }, { "epoch": 4.71, "grad_norm": 13.947840690612793, "learning_rate": 2.6723713346639265e-06, "loss": 0.113, "step": 479025 }, { "epoch": 4.71, "grad_norm": 3.4245247840881348, "learning_rate": 2.6722472122096777e-06, "loss": 0.0932, "step": 479050 }, { "epoch": 4.71, "grad_norm": 15.458610534667969, "learning_rate": 2.6721230897554297e-06, "loss": 0.1145, "step": 479075 }, { "epoch": 4.71, "grad_norm": 5.663729667663574, "learning_rate": 2.671998967301181e-06, "loss": 0.0777, "step": 479100 }, { "epoch": 4.71, "grad_norm": 13.98985481262207, "learning_rate": 2.671874844846932e-06, "loss": 0.1094, "step": 479125 }, { "epoch": 4.71, "grad_norm": 4.641397953033447, "learning_rate": 2.6717507223926842e-06, "loss": 0.0808, "step": 479150 }, { "epoch": 4.71, "grad_norm": 16.71841049194336, "learning_rate": 2.6716265999384354e-06, "loss": 0.0915, "step": 479175 }, { "epoch": 4.71, "grad_norm": 3.916027307510376, "learning_rate": 2.671502477484187e-06, "loss": 0.095, "step": 479200 }, { "epoch": 4.71, "grad_norm": 17.482759475708008, "learning_rate": 2.6713783550299387e-06, "loss": 0.1527, "step": 479225 }, { "epoch": 4.71, "grad_norm": 3.695880889892578, "learning_rate": 2.6712542325756903e-06, "loss": 0.0768, "step": 479250 }, { "epoch": 4.71, "grad_norm": 11.376517295837402, "learning_rate": 2.6711301101214415e-06, "loss": 0.1139, "step": 479275 }, { "epoch": 4.71, "grad_norm": 6.544904708862305, "learning_rate": 2.6710059876671936e-06, "loss": 0.0672, "step": 479300 }, { "epoch": 4.71, "grad_norm": 21.212892532348633, "learning_rate": 2.670881865212945e-06, "loss": 0.1074, "step": 479325 }, { "epoch": 4.71, "grad_norm": 5.377486705780029, "learning_rate": 2.670757742758696e-06, "loss": 0.0683, "step": 479350 }, { "epoch": 4.71, "grad_norm": 14.122859001159668, "learning_rate": 2.6706336203044477e-06, "loss": 0.1303, "step": 479375 }, { "epoch": 4.71, "grad_norm": 7.096579551696777, "learning_rate": 2.6705094978501993e-06, "loss": 0.0721, "step": 479400 }, { "epoch": 4.71, "grad_norm": 13.262829780578613, "learning_rate": 2.670385375395951e-06, "loss": 0.1161, "step": 479425 }, { "epoch": 4.71, "grad_norm": 7.609637260437012, "learning_rate": 2.670261252941702e-06, "loss": 0.0982, "step": 479450 }, { "epoch": 4.71, "grad_norm": 9.104684829711914, "learning_rate": 2.670137130487454e-06, "loss": 0.1131, "step": 479475 }, { "epoch": 4.71, "grad_norm": 0.4515303373336792, "learning_rate": 2.6700130080332054e-06, "loss": 0.0758, "step": 479500 }, { "epoch": 4.71, "grad_norm": 17.02072525024414, "learning_rate": 2.6698888855789566e-06, "loss": 0.1294, "step": 479525 }, { "epoch": 4.72, "grad_norm": 6.053476810455322, "learning_rate": 2.6697647631247087e-06, "loss": 0.0699, "step": 479550 }, { "epoch": 4.72, "grad_norm": 17.0843505859375, "learning_rate": 2.66964064067046e-06, "loss": 0.1041, "step": 479575 }, { "epoch": 4.72, "grad_norm": 10.967707633972168, "learning_rate": 2.6695165182162115e-06, "loss": 0.078, "step": 479600 }, { "epoch": 4.72, "grad_norm": 8.268441200256348, "learning_rate": 2.669392395761963e-06, "loss": 0.1411, "step": 479625 }, { "epoch": 4.72, "grad_norm": 5.556288242340088, "learning_rate": 2.669268273307715e-06, "loss": 0.0878, "step": 479650 }, { "epoch": 4.72, "grad_norm": 10.32983112335205, "learning_rate": 2.669144150853466e-06, "loss": 0.1262, "step": 479675 }, { "epoch": 4.72, "grad_norm": 0.040351174771785736, "learning_rate": 2.669020028399218e-06, "loss": 0.0793, "step": 479700 }, { "epoch": 4.72, "grad_norm": 16.26752471923828, "learning_rate": 2.6688959059449693e-06, "loss": 0.1312, "step": 479725 }, { "epoch": 4.72, "grad_norm": 4.09718656539917, "learning_rate": 2.6687717834907205e-06, "loss": 0.07, "step": 479750 }, { "epoch": 4.72, "grad_norm": 7.890202522277832, "learning_rate": 2.6686476610364725e-06, "loss": 0.1021, "step": 479775 }, { "epoch": 4.72, "grad_norm": 2.600494623184204, "learning_rate": 2.6685235385822238e-06, "loss": 0.0754, "step": 479800 }, { "epoch": 4.72, "grad_norm": 18.125226974487305, "learning_rate": 2.6683994161279754e-06, "loss": 0.1145, "step": 479825 }, { "epoch": 4.72, "grad_norm": 5.8256144523620605, "learning_rate": 2.668275293673727e-06, "loss": 0.0759, "step": 479850 }, { "epoch": 4.72, "grad_norm": 12.67068099975586, "learning_rate": 2.6681511712194787e-06, "loss": 0.1127, "step": 479875 }, { "epoch": 4.72, "grad_norm": 0.4975239932537079, "learning_rate": 2.66802704876523e-06, "loss": 0.0638, "step": 479900 }, { "epoch": 4.72, "grad_norm": 17.44512939453125, "learning_rate": 2.667902926310982e-06, "loss": 0.1265, "step": 479925 }, { "epoch": 4.72, "grad_norm": 2.811756134033203, "learning_rate": 2.667778803856733e-06, "loss": 0.0804, "step": 479950 }, { "epoch": 4.72, "grad_norm": 13.66208553314209, "learning_rate": 2.6676546814024844e-06, "loss": 0.1476, "step": 479975 }, { "epoch": 4.72, "grad_norm": 0.5952547192573547, "learning_rate": 2.6675305589482364e-06, "loss": 0.0695, "step": 480000 }, { "epoch": 4.72, "eval_loss": 0.686904788017273, "eval_runtime": 6120.9356, "eval_samples_per_second": 1.547, "eval_steps_per_second": 0.193, "eval_wer": 0.11689628532801133, "step": 480000 }, { "epoch": 4.72, "grad_norm": 13.063056945800781, "learning_rate": 2.6674064364939876e-06, "loss": 0.0754, "step": 480025 }, { "epoch": 4.72, "grad_norm": 5.036622524261475, "learning_rate": 2.6672823140397393e-06, "loss": 0.1017, "step": 480050 }, { "epoch": 4.72, "grad_norm": 7.762392044067383, "learning_rate": 2.667158191585491e-06, "loss": 0.0889, "step": 480075 }, { "epoch": 4.72, "grad_norm": 4.013542175292969, "learning_rate": 2.6670340691312425e-06, "loss": 0.067, "step": 480100 }, { "epoch": 4.72, "grad_norm": 13.160297393798828, "learning_rate": 2.6669099466769937e-06, "loss": 0.1061, "step": 480125 }, { "epoch": 4.72, "grad_norm": 1.774077296257019, "learning_rate": 2.666785824222746e-06, "loss": 0.0833, "step": 480150 }, { "epoch": 4.72, "grad_norm": 11.113624572753906, "learning_rate": 2.666661701768497e-06, "loss": 0.1238, "step": 480175 }, { "epoch": 4.72, "grad_norm": 4.304416656494141, "learning_rate": 2.6665375793142482e-06, "loss": 0.0634, "step": 480200 }, { "epoch": 4.72, "grad_norm": 18.252643585205078, "learning_rate": 2.66641345686e-06, "loss": 0.1297, "step": 480225 }, { "epoch": 4.72, "grad_norm": 10.853858947753906, "learning_rate": 2.6662893344057515e-06, "loss": 0.0849, "step": 480250 }, { "epoch": 4.72, "grad_norm": 10.860548973083496, "learning_rate": 2.666165211951503e-06, "loss": 0.0863, "step": 480275 }, { "epoch": 4.72, "grad_norm": 1.6578259468078613, "learning_rate": 2.6660410894972543e-06, "loss": 0.0537, "step": 480300 }, { "epoch": 4.72, "grad_norm": 10.956957817077637, "learning_rate": 2.6659169670430064e-06, "loss": 0.169, "step": 480325 }, { "epoch": 4.72, "grad_norm": 7.858048915863037, "learning_rate": 2.6657928445887576e-06, "loss": 0.0603, "step": 480350 }, { "epoch": 4.72, "grad_norm": 11.80666732788086, "learning_rate": 2.665668722134509e-06, "loss": 0.1287, "step": 480375 }, { "epoch": 4.72, "grad_norm": 25.26770782470703, "learning_rate": 2.665544599680261e-06, "loss": 0.0852, "step": 480400 }, { "epoch": 4.72, "grad_norm": 22.200851440429688, "learning_rate": 2.665420477226012e-06, "loss": 0.1203, "step": 480425 }, { "epoch": 4.72, "grad_norm": 14.097066879272461, "learning_rate": 2.6652963547717637e-06, "loss": 0.0609, "step": 480450 }, { "epoch": 4.72, "grad_norm": 19.714311599731445, "learning_rate": 2.6651722323175154e-06, "loss": 0.1289, "step": 480475 }, { "epoch": 4.72, "grad_norm": 6.552694797515869, "learning_rate": 2.665048109863267e-06, "loss": 0.0927, "step": 480500 }, { "epoch": 4.72, "grad_norm": 18.530601501464844, "learning_rate": 2.664923987409018e-06, "loss": 0.0997, "step": 480525 }, { "epoch": 4.72, "grad_norm": 3.6102676391601562, "learning_rate": 2.6647998649547703e-06, "loss": 0.0952, "step": 480550 }, { "epoch": 4.73, "grad_norm": 12.367671966552734, "learning_rate": 2.6646757425005215e-06, "loss": 0.1108, "step": 480575 }, { "epoch": 4.73, "grad_norm": 1.737704873085022, "learning_rate": 2.6645516200462727e-06, "loss": 0.0886, "step": 480600 }, { "epoch": 4.73, "grad_norm": 11.734610557556152, "learning_rate": 2.6644274975920247e-06, "loss": 0.0904, "step": 480625 }, { "epoch": 4.73, "grad_norm": 5.200456619262695, "learning_rate": 2.664303375137776e-06, "loss": 0.0815, "step": 480650 }, { "epoch": 4.73, "grad_norm": 15.534777641296387, "learning_rate": 2.6641792526835276e-06, "loss": 0.1085, "step": 480675 }, { "epoch": 4.73, "grad_norm": 13.271716117858887, "learning_rate": 2.6640551302292792e-06, "loss": 0.074, "step": 480700 }, { "epoch": 4.73, "grad_norm": 5.707950115203857, "learning_rate": 2.663931007775031e-06, "loss": 0.1108, "step": 480725 }, { "epoch": 4.73, "grad_norm": 5.4881157875061035, "learning_rate": 2.663806885320782e-06, "loss": 0.0534, "step": 480750 }, { "epoch": 4.73, "grad_norm": 11.9806547164917, "learning_rate": 2.663682762866534e-06, "loss": 0.1058, "step": 480775 }, { "epoch": 4.73, "grad_norm": 0.5139893293380737, "learning_rate": 2.6635586404122853e-06, "loss": 0.0794, "step": 480800 }, { "epoch": 4.73, "grad_norm": 9.456842422485352, "learning_rate": 2.6634345179580366e-06, "loss": 0.1443, "step": 480825 }, { "epoch": 4.73, "grad_norm": 5.934783458709717, "learning_rate": 2.6633103955037886e-06, "loss": 0.0682, "step": 480850 }, { "epoch": 4.73, "grad_norm": 12.938960075378418, "learning_rate": 2.66318627304954e-06, "loss": 0.1023, "step": 480875 }, { "epoch": 4.73, "grad_norm": 0.747621476650238, "learning_rate": 2.6630621505952915e-06, "loss": 0.0777, "step": 480900 }, { "epoch": 4.73, "grad_norm": 5.813045501708984, "learning_rate": 2.662938028141043e-06, "loss": 0.1248, "step": 480925 }, { "epoch": 4.73, "grad_norm": 4.824777126312256, "learning_rate": 2.6628139056867947e-06, "loss": 0.0796, "step": 480950 }, { "epoch": 4.73, "grad_norm": 9.593060493469238, "learning_rate": 2.662689783232546e-06, "loss": 0.0858, "step": 480975 }, { "epoch": 4.73, "grad_norm": 1.5275102853775024, "learning_rate": 2.662565660778298e-06, "loss": 0.1142, "step": 481000 }, { "epoch": 4.73, "grad_norm": 10.655900001525879, "learning_rate": 2.6624415383240492e-06, "loss": 0.0968, "step": 481025 }, { "epoch": 4.73, "grad_norm": 3.720114231109619, "learning_rate": 2.662317415869801e-06, "loss": 0.075, "step": 481050 }, { "epoch": 4.73, "grad_norm": 15.43164348602295, "learning_rate": 2.662193293415552e-06, "loss": 0.1102, "step": 481075 }, { "epoch": 4.73, "grad_norm": 7.976570129394531, "learning_rate": 2.6620691709613037e-06, "loss": 0.0772, "step": 481100 }, { "epoch": 4.73, "grad_norm": 11.043536186218262, "learning_rate": 2.6619450485070553e-06, "loss": 0.0807, "step": 481125 }, { "epoch": 4.73, "grad_norm": 5.390683174133301, "learning_rate": 2.6618209260528065e-06, "loss": 0.0783, "step": 481150 }, { "epoch": 4.73, "grad_norm": 17.648250579833984, "learning_rate": 2.6616968035985586e-06, "loss": 0.0723, "step": 481175 }, { "epoch": 4.73, "grad_norm": 7.1834893226623535, "learning_rate": 2.66157268114431e-06, "loss": 0.0789, "step": 481200 }, { "epoch": 4.73, "grad_norm": 9.860811233520508, "learning_rate": 2.6614485586900614e-06, "loss": 0.1198, "step": 481225 }, { "epoch": 4.73, "grad_norm": 6.8292765617370605, "learning_rate": 2.661324436235813e-06, "loss": 0.0677, "step": 481250 }, { "epoch": 4.73, "grad_norm": 9.700483322143555, "learning_rate": 2.6612003137815647e-06, "loss": 0.0866, "step": 481275 }, { "epoch": 4.73, "grad_norm": 6.152102470397949, "learning_rate": 2.661076191327316e-06, "loss": 0.0607, "step": 481300 }, { "epoch": 4.73, "grad_norm": 16.052061080932617, "learning_rate": 2.660952068873068e-06, "loss": 0.1043, "step": 481325 }, { "epoch": 4.73, "grad_norm": 0.991225004196167, "learning_rate": 2.660827946418819e-06, "loss": 0.0727, "step": 481350 }, { "epoch": 4.73, "grad_norm": 2.959970235824585, "learning_rate": 2.6607038239645704e-06, "loss": 0.1099, "step": 481375 }, { "epoch": 4.73, "grad_norm": 2.4842512607574463, "learning_rate": 2.6605797015103225e-06, "loss": 0.0777, "step": 481400 }, { "epoch": 4.73, "grad_norm": 10.16227912902832, "learning_rate": 2.6604555790560737e-06, "loss": 0.0872, "step": 481425 }, { "epoch": 4.73, "grad_norm": 0.6662618517875671, "learning_rate": 2.6603314566018253e-06, "loss": 0.089, "step": 481450 }, { "epoch": 4.73, "grad_norm": 11.729066848754883, "learning_rate": 2.660207334147577e-06, "loss": 0.0989, "step": 481475 }, { "epoch": 4.73, "grad_norm": 9.313045501708984, "learning_rate": 2.6600832116933286e-06, "loss": 0.061, "step": 481500 }, { "epoch": 4.73, "grad_norm": 11.484936714172363, "learning_rate": 2.65995908923908e-06, "loss": 0.1199, "step": 481525 }, { "epoch": 4.73, "grad_norm": 17.166240692138672, "learning_rate": 2.659834966784832e-06, "loss": 0.0725, "step": 481550 }, { "epoch": 4.73, "grad_norm": 1.0791184902191162, "learning_rate": 2.659710844330583e-06, "loss": 0.0762, "step": 481575 }, { "epoch": 4.74, "grad_norm": 0.18573272228240967, "learning_rate": 2.6595867218763343e-06, "loss": 0.0785, "step": 481600 }, { "epoch": 4.74, "grad_norm": 13.576618194580078, "learning_rate": 2.6594625994220863e-06, "loss": 0.088, "step": 481625 }, { "epoch": 4.74, "grad_norm": 1.5460258722305298, "learning_rate": 2.6593384769678375e-06, "loss": 0.0548, "step": 481650 }, { "epoch": 4.74, "grad_norm": 16.4778995513916, "learning_rate": 2.659214354513589e-06, "loss": 0.1317, "step": 481675 }, { "epoch": 4.74, "grad_norm": 7.025979518890381, "learning_rate": 2.659090232059341e-06, "loss": 0.1043, "step": 481700 }, { "epoch": 4.74, "grad_norm": 13.568647384643555, "learning_rate": 2.6589661096050925e-06, "loss": 0.1191, "step": 481725 }, { "epoch": 4.74, "grad_norm": 8.529142379760742, "learning_rate": 2.6588419871508437e-06, "loss": 0.068, "step": 481750 }, { "epoch": 4.74, "grad_norm": 13.353005409240723, "learning_rate": 2.6587178646965957e-06, "loss": 0.1222, "step": 481775 }, { "epoch": 4.74, "grad_norm": 7.355633735656738, "learning_rate": 2.658593742242347e-06, "loss": 0.087, "step": 481800 }, { "epoch": 4.74, "grad_norm": 10.47298812866211, "learning_rate": 2.658469619788098e-06, "loss": 0.1048, "step": 481825 }, { "epoch": 4.74, "grad_norm": 0.26322728395462036, "learning_rate": 2.65834549733385e-06, "loss": 0.0673, "step": 481850 }, { "epoch": 4.74, "grad_norm": 5.313503742218018, "learning_rate": 2.6582213748796014e-06, "loss": 0.1133, "step": 481875 }, { "epoch": 4.74, "grad_norm": 3.8272602558135986, "learning_rate": 2.658097252425353e-06, "loss": 0.0838, "step": 481900 }, { "epoch": 4.74, "grad_norm": 19.28863525390625, "learning_rate": 2.6579731299711043e-06, "loss": 0.1156, "step": 481925 }, { "epoch": 4.74, "grad_norm": 6.143051624298096, "learning_rate": 2.6578490075168563e-06, "loss": 0.0963, "step": 481950 }, { "epoch": 4.74, "grad_norm": 17.381519317626953, "learning_rate": 2.6577248850626075e-06, "loss": 0.1147, "step": 481975 }, { "epoch": 4.74, "grad_norm": 7.683910369873047, "learning_rate": 2.6576007626083587e-06, "loss": 0.0781, "step": 482000 }, { "epoch": 4.74, "grad_norm": 14.85884952545166, "learning_rate": 2.657476640154111e-06, "loss": 0.1172, "step": 482025 }, { "epoch": 4.74, "grad_norm": 5.019388198852539, "learning_rate": 2.657352517699862e-06, "loss": 0.0866, "step": 482050 }, { "epoch": 4.74, "grad_norm": 15.449397087097168, "learning_rate": 2.6572283952456136e-06, "loss": 0.134, "step": 482075 }, { "epoch": 4.74, "grad_norm": 0.48142167925834656, "learning_rate": 2.6571042727913653e-06, "loss": 0.0733, "step": 482100 }, { "epoch": 4.74, "grad_norm": 7.820773124694824, "learning_rate": 2.656980150337117e-06, "loss": 0.1, "step": 482125 }, { "epoch": 4.74, "grad_norm": 1.4251039028167725, "learning_rate": 2.656856027882868e-06, "loss": 0.1002, "step": 482150 }, { "epoch": 4.74, "grad_norm": 6.6339802742004395, "learning_rate": 2.65673190542862e-06, "loss": 0.107, "step": 482175 }, { "epoch": 4.74, "grad_norm": 5.471238613128662, "learning_rate": 2.6566077829743714e-06, "loss": 0.0944, "step": 482200 }, { "epoch": 4.74, "grad_norm": 24.882511138916016, "learning_rate": 2.6564836605201226e-06, "loss": 0.1437, "step": 482225 }, { "epoch": 4.74, "grad_norm": 2.9858100414276123, "learning_rate": 2.6563595380658747e-06, "loss": 0.0928, "step": 482250 }, { "epoch": 4.74, "grad_norm": 5.570662498474121, "learning_rate": 2.656235415611626e-06, "loss": 0.0991, "step": 482275 }, { "epoch": 4.74, "grad_norm": 7.681150913238525, "learning_rate": 2.6561112931573775e-06, "loss": 0.0856, "step": 482300 }, { "epoch": 4.74, "grad_norm": 10.587857246398926, "learning_rate": 2.655987170703129e-06, "loss": 0.0928, "step": 482325 }, { "epoch": 4.74, "grad_norm": 6.719884395599365, "learning_rate": 2.6558630482488808e-06, "loss": 0.102, "step": 482350 }, { "epoch": 4.74, "grad_norm": 13.627245903015137, "learning_rate": 2.655738925794632e-06, "loss": 0.0709, "step": 482375 }, { "epoch": 4.74, "grad_norm": 4.099522590637207, "learning_rate": 2.655614803340384e-06, "loss": 0.0853, "step": 482400 }, { "epoch": 4.74, "grad_norm": 9.740266799926758, "learning_rate": 2.6554906808861353e-06, "loss": 0.0998, "step": 482425 }, { "epoch": 4.74, "grad_norm": 3.022305727005005, "learning_rate": 2.6553665584318865e-06, "loss": 0.0677, "step": 482450 }, { "epoch": 4.74, "grad_norm": 10.465907096862793, "learning_rate": 2.6552424359776385e-06, "loss": 0.086, "step": 482475 }, { "epoch": 4.74, "grad_norm": 0.7668177485466003, "learning_rate": 2.6551183135233897e-06, "loss": 0.0705, "step": 482500 }, { "epoch": 4.74, "grad_norm": 9.747635841369629, "learning_rate": 2.6549941910691414e-06, "loss": 0.0942, "step": 482525 }, { "epoch": 4.74, "grad_norm": 7.117602348327637, "learning_rate": 2.654870068614893e-06, "loss": 0.0628, "step": 482550 }, { "epoch": 4.74, "grad_norm": 13.92874526977539, "learning_rate": 2.6547459461606447e-06, "loss": 0.0952, "step": 482575 }, { "epoch": 4.75, "grad_norm": 4.373654365539551, "learning_rate": 2.654621823706396e-06, "loss": 0.0769, "step": 482600 }, { "epoch": 4.75, "grad_norm": 3.0377368927001953, "learning_rate": 2.654497701252148e-06, "loss": 0.1173, "step": 482625 }, { "epoch": 4.75, "grad_norm": 0.8136001825332642, "learning_rate": 2.654373578797899e-06, "loss": 0.0921, "step": 482650 }, { "epoch": 4.75, "grad_norm": 14.896751403808594, "learning_rate": 2.6542494563436503e-06, "loss": 0.0917, "step": 482675 }, { "epoch": 4.75, "grad_norm": 6.079892635345459, "learning_rate": 2.6541253338894024e-06, "loss": 0.0791, "step": 482700 }, { "epoch": 4.75, "grad_norm": 10.664264678955078, "learning_rate": 2.6540012114351536e-06, "loss": 0.1016, "step": 482725 }, { "epoch": 4.75, "grad_norm": 4.994294166564941, "learning_rate": 2.6538770889809053e-06, "loss": 0.0764, "step": 482750 }, { "epoch": 4.75, "grad_norm": 7.702170372009277, "learning_rate": 2.6537529665266565e-06, "loss": 0.1015, "step": 482775 }, { "epoch": 4.75, "grad_norm": 7.628057479858398, "learning_rate": 2.6536288440724085e-06, "loss": 0.0701, "step": 482800 }, { "epoch": 4.75, "grad_norm": 27.780153274536133, "learning_rate": 2.6535047216181597e-06, "loss": 0.0921, "step": 482825 }, { "epoch": 4.75, "grad_norm": 6.210180759429932, "learning_rate": 2.653380599163911e-06, "loss": 0.0749, "step": 482850 }, { "epoch": 4.75, "grad_norm": 16.111820220947266, "learning_rate": 2.653256476709663e-06, "loss": 0.1005, "step": 482875 }, { "epoch": 4.75, "grad_norm": 7.2305145263671875, "learning_rate": 2.6531323542554142e-06, "loss": 0.0614, "step": 482900 }, { "epoch": 4.75, "grad_norm": 19.524303436279297, "learning_rate": 2.653008231801166e-06, "loss": 0.1289, "step": 482925 }, { "epoch": 4.75, "grad_norm": 13.209322929382324, "learning_rate": 2.6528841093469175e-06, "loss": 0.0893, "step": 482950 }, { "epoch": 4.75, "grad_norm": 10.479758262634277, "learning_rate": 2.652759986892669e-06, "loss": 0.1279, "step": 482975 }, { "epoch": 4.75, "grad_norm": 1.9473507404327393, "learning_rate": 2.6526358644384203e-06, "loss": 0.0584, "step": 483000 }, { "epoch": 4.75, "grad_norm": 15.438481330871582, "learning_rate": 2.6525117419841724e-06, "loss": 0.0999, "step": 483025 }, { "epoch": 4.75, "grad_norm": 0.11466123908758163, "learning_rate": 2.6523876195299236e-06, "loss": 0.0566, "step": 483050 }, { "epoch": 4.75, "grad_norm": 10.750046730041504, "learning_rate": 2.652263497075675e-06, "loss": 0.113, "step": 483075 }, { "epoch": 4.75, "grad_norm": 7.6131415367126465, "learning_rate": 2.652139374621427e-06, "loss": 0.0798, "step": 483100 }, { "epoch": 4.75, "grad_norm": 24.50585174560547, "learning_rate": 2.652015252167178e-06, "loss": 0.1746, "step": 483125 }, { "epoch": 4.75, "grad_norm": 7.188660144805908, "learning_rate": 2.6518911297129297e-06, "loss": 0.0676, "step": 483150 }, { "epoch": 4.75, "grad_norm": 10.138315200805664, "learning_rate": 2.6517670072586814e-06, "loss": 0.0933, "step": 483175 }, { "epoch": 4.75, "grad_norm": 2.619067430496216, "learning_rate": 2.651642884804433e-06, "loss": 0.0928, "step": 483200 }, { "epoch": 4.75, "grad_norm": 11.064399719238281, "learning_rate": 2.651518762350184e-06, "loss": 0.0938, "step": 483225 }, { "epoch": 4.75, "grad_norm": 9.920543670654297, "learning_rate": 2.651399604794106e-06, "loss": 0.0899, "step": 483250 }, { "epoch": 4.75, "grad_norm": 13.003085136413574, "learning_rate": 2.6512754823398573e-06, "loss": 0.0924, "step": 483275 }, { "epoch": 4.75, "grad_norm": 7.551977634429932, "learning_rate": 2.6511513598856093e-06, "loss": 0.0607, "step": 483300 }, { "epoch": 4.75, "grad_norm": 11.677491188049316, "learning_rate": 2.6510272374313606e-06, "loss": 0.0942, "step": 483325 }, { "epoch": 4.75, "grad_norm": 8.072288513183594, "learning_rate": 2.6509031149771118e-06, "loss": 0.0967, "step": 483350 }, { "epoch": 4.75, "grad_norm": 19.475116729736328, "learning_rate": 2.6507789925228634e-06, "loss": 0.1077, "step": 483375 }, { "epoch": 4.75, "grad_norm": 3.410831928253174, "learning_rate": 2.650654870068615e-06, "loss": 0.0769, "step": 483400 }, { "epoch": 4.75, "grad_norm": 25.961631774902344, "learning_rate": 2.6505307476143667e-06, "loss": 0.104, "step": 483425 }, { "epoch": 4.75, "grad_norm": 8.71091365814209, "learning_rate": 2.650406625160118e-06, "loss": 0.073, "step": 483450 }, { "epoch": 4.75, "grad_norm": 17.013473510742188, "learning_rate": 2.65028250270587e-06, "loss": 0.1417, "step": 483475 }, { "epoch": 4.75, "grad_norm": 3.940307855606079, "learning_rate": 2.650158380251621e-06, "loss": 0.0717, "step": 483500 }, { "epoch": 4.75, "grad_norm": 16.596643447875977, "learning_rate": 2.6500342577973724e-06, "loss": 0.1075, "step": 483525 }, { "epoch": 4.75, "grad_norm": 8.458046913146973, "learning_rate": 2.6499101353431244e-06, "loss": 0.0635, "step": 483550 }, { "epoch": 4.75, "grad_norm": 12.104341506958008, "learning_rate": 2.6497860128888756e-06, "loss": 0.1008, "step": 483575 }, { "epoch": 4.75, "grad_norm": 2.7663121223449707, "learning_rate": 2.6496618904346273e-06, "loss": 0.0731, "step": 483600 }, { "epoch": 4.76, "grad_norm": 16.449684143066406, "learning_rate": 2.649537767980379e-06, "loss": 0.1132, "step": 483625 }, { "epoch": 4.76, "grad_norm": 0.8524548411369324, "learning_rate": 2.6494136455261305e-06, "loss": 0.083, "step": 483650 }, { "epoch": 4.76, "grad_norm": 14.475418090820312, "learning_rate": 2.6492895230718817e-06, "loss": 0.1111, "step": 483675 }, { "epoch": 4.76, "grad_norm": 5.385391712188721, "learning_rate": 2.649165400617634e-06, "loss": 0.0659, "step": 483700 }, { "epoch": 4.76, "grad_norm": 17.56349754333496, "learning_rate": 2.649041278163385e-06, "loss": 0.1269, "step": 483725 }, { "epoch": 4.76, "grad_norm": 5.3631157875061035, "learning_rate": 2.6489171557091362e-06, "loss": 0.0859, "step": 483750 }, { "epoch": 4.76, "grad_norm": 12.690234184265137, "learning_rate": 2.6487930332548883e-06, "loss": 0.0976, "step": 483775 }, { "epoch": 4.76, "grad_norm": 4.029070854187012, "learning_rate": 2.6486689108006395e-06, "loss": 0.0647, "step": 483800 }, { "epoch": 4.76, "grad_norm": 8.740989685058594, "learning_rate": 2.648544788346391e-06, "loss": 0.1032, "step": 483825 }, { "epoch": 4.76, "grad_norm": 7.986529350280762, "learning_rate": 2.6484206658921428e-06, "loss": 0.0728, "step": 483850 }, { "epoch": 4.76, "grad_norm": 8.53061580657959, "learning_rate": 2.6482965434378944e-06, "loss": 0.0891, "step": 483875 }, { "epoch": 4.76, "grad_norm": 0.4792573153972626, "learning_rate": 2.6481724209836456e-06, "loss": 0.0819, "step": 483900 }, { "epoch": 4.76, "grad_norm": 5.246667385101318, "learning_rate": 2.6480482985293977e-06, "loss": 0.0799, "step": 483925 }, { "epoch": 4.76, "grad_norm": 6.178171157836914, "learning_rate": 2.647924176075149e-06, "loss": 0.094, "step": 483950 }, { "epoch": 4.76, "grad_norm": 17.414093017578125, "learning_rate": 2.6478000536209005e-06, "loss": 0.1159, "step": 483975 }, { "epoch": 4.76, "grad_norm": 12.503373146057129, "learning_rate": 2.647675931166652e-06, "loss": 0.0759, "step": 484000 }, { "epoch": 4.76, "grad_norm": 11.043478965759277, "learning_rate": 2.647551808712404e-06, "loss": 0.1029, "step": 484025 }, { "epoch": 4.76, "grad_norm": 5.093389987945557, "learning_rate": 2.647427686258155e-06, "loss": 0.0882, "step": 484050 }, { "epoch": 4.76, "grad_norm": 19.286449432373047, "learning_rate": 2.647303563803907e-06, "loss": 0.0894, "step": 484075 }, { "epoch": 4.76, "grad_norm": 1.3761961460113525, "learning_rate": 2.6471794413496583e-06, "loss": 0.0648, "step": 484100 }, { "epoch": 4.76, "grad_norm": 13.809890747070312, "learning_rate": 2.6470553188954095e-06, "loss": 0.1198, "step": 484125 }, { "epoch": 4.76, "grad_norm": 4.813207626342773, "learning_rate": 2.6469311964411615e-06, "loss": 0.09, "step": 484150 }, { "epoch": 4.76, "grad_norm": 16.944175720214844, "learning_rate": 2.6468070739869128e-06, "loss": 0.1185, "step": 484175 }, { "epoch": 4.76, "grad_norm": 4.244927406311035, "learning_rate": 2.6466829515326644e-06, "loss": 0.0713, "step": 484200 }, { "epoch": 4.76, "grad_norm": 7.2426300048828125, "learning_rate": 2.6465588290784156e-06, "loss": 0.1114, "step": 484225 }, { "epoch": 4.76, "grad_norm": 10.04469108581543, "learning_rate": 2.6464347066241677e-06, "loss": 0.0632, "step": 484250 }, { "epoch": 4.76, "grad_norm": 12.834964752197266, "learning_rate": 2.646310584169919e-06, "loss": 0.1628, "step": 484275 }, { "epoch": 4.76, "grad_norm": 2.3240413665771484, "learning_rate": 2.64618646171567e-06, "loss": 0.049, "step": 484300 }, { "epoch": 4.76, "grad_norm": 10.726460456848145, "learning_rate": 2.646062339261422e-06, "loss": 0.1316, "step": 484325 }, { "epoch": 4.76, "grad_norm": 3.1176490783691406, "learning_rate": 2.6459382168071734e-06, "loss": 0.0868, "step": 484350 }, { "epoch": 4.76, "grad_norm": 7.690560340881348, "learning_rate": 2.645814094352925e-06, "loss": 0.09, "step": 484375 }, { "epoch": 4.76, "grad_norm": 0.054078616201877594, "learning_rate": 2.6456899718986766e-06, "loss": 0.0632, "step": 484400 }, { "epoch": 4.76, "grad_norm": 12.610539436340332, "learning_rate": 2.6455658494444283e-06, "loss": 0.1189, "step": 484425 }, { "epoch": 4.76, "grad_norm": 4.194905757904053, "learning_rate": 2.6454417269901795e-06, "loss": 0.0679, "step": 484450 }, { "epoch": 4.76, "grad_norm": 10.743916511535645, "learning_rate": 2.6453176045359315e-06, "loss": 0.114, "step": 484475 }, { "epoch": 4.76, "grad_norm": 2.784926414489746, "learning_rate": 2.6451934820816827e-06, "loss": 0.0879, "step": 484500 }, { "epoch": 4.76, "grad_norm": 8.764946937561035, "learning_rate": 2.645069359627434e-06, "loss": 0.0967, "step": 484525 }, { "epoch": 4.76, "grad_norm": 4.695706367492676, "learning_rate": 2.644945237173186e-06, "loss": 0.0962, "step": 484550 }, { "epoch": 4.76, "grad_norm": 13.644510269165039, "learning_rate": 2.6448211147189372e-06, "loss": 0.0966, "step": 484575 }, { "epoch": 4.76, "grad_norm": 2.435875415802002, "learning_rate": 2.644696992264689e-06, "loss": 0.0808, "step": 484600 }, { "epoch": 4.76, "grad_norm": 13.2854585647583, "learning_rate": 2.6445728698104405e-06, "loss": 0.1224, "step": 484625 }, { "epoch": 4.77, "grad_norm": 2.1038577556610107, "learning_rate": 2.644448747356192e-06, "loss": 0.0755, "step": 484650 }, { "epoch": 4.77, "grad_norm": 11.127453804016113, "learning_rate": 2.6443246249019433e-06, "loss": 0.0918, "step": 484675 }, { "epoch": 4.77, "grad_norm": 4.960681438446045, "learning_rate": 2.6442005024476954e-06, "loss": 0.0754, "step": 484700 }, { "epoch": 4.77, "grad_norm": 13.149819374084473, "learning_rate": 2.6440763799934466e-06, "loss": 0.113, "step": 484725 }, { "epoch": 4.77, "grad_norm": 8.073431968688965, "learning_rate": 2.643952257539198e-06, "loss": 0.0954, "step": 484750 }, { "epoch": 4.77, "grad_norm": 15.333642959594727, "learning_rate": 2.64382813508495e-06, "loss": 0.1119, "step": 484775 }, { "epoch": 4.77, "grad_norm": 0.2997889816761017, "learning_rate": 2.643704012630701e-06, "loss": 0.0883, "step": 484800 }, { "epoch": 4.77, "grad_norm": 15.025461196899414, "learning_rate": 2.6435798901764527e-06, "loss": 0.1425, "step": 484825 }, { "epoch": 4.77, "grad_norm": 8.079925537109375, "learning_rate": 2.6434557677222044e-06, "loss": 0.0873, "step": 484850 }, { "epoch": 4.77, "grad_norm": 17.69977378845215, "learning_rate": 2.643331645267956e-06, "loss": 0.1245, "step": 484875 }, { "epoch": 4.77, "grad_norm": 4.297965049743652, "learning_rate": 2.643207522813707e-06, "loss": 0.0919, "step": 484900 }, { "epoch": 4.77, "grad_norm": 13.124120712280273, "learning_rate": 2.6430834003594593e-06, "loss": 0.1116, "step": 484925 }, { "epoch": 4.77, "grad_norm": 0.1367677003145218, "learning_rate": 2.6429592779052105e-06, "loss": 0.0601, "step": 484950 }, { "epoch": 4.77, "grad_norm": 23.050825119018555, "learning_rate": 2.6428351554509617e-06, "loss": 0.1273, "step": 484975 }, { "epoch": 4.77, "grad_norm": 1.453568935394287, "learning_rate": 2.6427110329967137e-06, "loss": 0.0786, "step": 485000 }, { "epoch": 4.77, "grad_norm": 16.55893898010254, "learning_rate": 2.642586910542465e-06, "loss": 0.1293, "step": 485025 }, { "epoch": 4.77, "grad_norm": 1.7664635181427002, "learning_rate": 2.6424627880882166e-06, "loss": 0.092, "step": 485050 }, { "epoch": 4.77, "grad_norm": 8.718053817749023, "learning_rate": 2.642338665633968e-06, "loss": 0.0888, "step": 485075 }, { "epoch": 4.77, "grad_norm": 1.1323132514953613, "learning_rate": 2.64221454317972e-06, "loss": 0.0747, "step": 485100 }, { "epoch": 4.77, "grad_norm": 9.995630264282227, "learning_rate": 2.642090420725471e-06, "loss": 0.1261, "step": 485125 }, { "epoch": 4.77, "grad_norm": 7.359388828277588, "learning_rate": 2.6419662982712223e-06, "loss": 0.0745, "step": 485150 }, { "epoch": 4.77, "grad_norm": 11.896576881408691, "learning_rate": 2.6418421758169743e-06, "loss": 0.0962, "step": 485175 }, { "epoch": 4.77, "grad_norm": 4.15234375, "learning_rate": 2.6417180533627256e-06, "loss": 0.0582, "step": 485200 }, { "epoch": 4.77, "grad_norm": 11.244282722473145, "learning_rate": 2.641593930908477e-06, "loss": 0.1435, "step": 485225 }, { "epoch": 4.77, "grad_norm": 6.857761859893799, "learning_rate": 2.641469808454229e-06, "loss": 0.0674, "step": 485250 }, { "epoch": 4.77, "grad_norm": 12.485013008117676, "learning_rate": 2.6413456859999805e-06, "loss": 0.1319, "step": 485275 }, { "epoch": 4.77, "grad_norm": 3.3038604259490967, "learning_rate": 2.6412215635457317e-06, "loss": 0.0682, "step": 485300 }, { "epoch": 4.77, "grad_norm": 11.729077339172363, "learning_rate": 2.6410974410914837e-06, "loss": 0.117, "step": 485325 }, { "epoch": 4.77, "grad_norm": 0.6480989456176758, "learning_rate": 2.640973318637235e-06, "loss": 0.0759, "step": 485350 }, { "epoch": 4.77, "grad_norm": 15.374236106872559, "learning_rate": 2.640849196182986e-06, "loss": 0.1183, "step": 485375 }, { "epoch": 4.77, "grad_norm": 1.3990105390548706, "learning_rate": 2.640725073728738e-06, "loss": 0.082, "step": 485400 }, { "epoch": 4.77, "grad_norm": 16.50266456604004, "learning_rate": 2.6406009512744894e-06, "loss": 0.1371, "step": 485425 }, { "epoch": 4.77, "grad_norm": 5.383735656738281, "learning_rate": 2.640476828820241e-06, "loss": 0.0979, "step": 485450 }, { "epoch": 4.77, "grad_norm": 15.587899208068848, "learning_rate": 2.6403527063659927e-06, "loss": 0.1357, "step": 485475 }, { "epoch": 4.77, "grad_norm": 5.799475193023682, "learning_rate": 2.6402285839117443e-06, "loss": 0.0659, "step": 485500 }, { "epoch": 4.77, "grad_norm": 12.172022819519043, "learning_rate": 2.6401044614574955e-06, "loss": 0.1056, "step": 485525 }, { "epoch": 4.77, "grad_norm": 3.183568239212036, "learning_rate": 2.6399803390032476e-06, "loss": 0.0568, "step": 485550 }, { "epoch": 4.77, "grad_norm": 5.105783462524414, "learning_rate": 2.6398611814471686e-06, "loss": 0.0969, "step": 485575 }, { "epoch": 4.77, "grad_norm": 0.23652954399585724, "learning_rate": 2.6397370589929207e-06, "loss": 0.083, "step": 485600 }, { "epoch": 4.77, "grad_norm": 18.60116195678711, "learning_rate": 2.639612936538672e-06, "loss": 0.126, "step": 485625 }, { "epoch": 4.77, "grad_norm": 4.272345066070557, "learning_rate": 2.639488814084423e-06, "loss": 0.0716, "step": 485650 }, { "epoch": 4.78, "grad_norm": 5.773649215698242, "learning_rate": 2.6393646916301747e-06, "loss": 0.0858, "step": 485675 }, { "epoch": 4.78, "grad_norm": 4.495072364807129, "learning_rate": 2.6392405691759264e-06, "loss": 0.0796, "step": 485700 }, { "epoch": 4.78, "grad_norm": 15.756354331970215, "learning_rate": 2.639116446721678e-06, "loss": 0.1065, "step": 485725 }, { "epoch": 4.78, "grad_norm": 2.4452033042907715, "learning_rate": 2.6389923242674292e-06, "loss": 0.1023, "step": 485750 }, { "epoch": 4.78, "grad_norm": 14.829547882080078, "learning_rate": 2.6388682018131813e-06, "loss": 0.142, "step": 485775 }, { "epoch": 4.78, "grad_norm": 0.06380099058151245, "learning_rate": 2.6387440793589325e-06, "loss": 0.0716, "step": 485800 }, { "epoch": 4.78, "grad_norm": 9.932948112487793, "learning_rate": 2.6386199569046837e-06, "loss": 0.1066, "step": 485825 }, { "epoch": 4.78, "grad_norm": 2.322960138320923, "learning_rate": 2.6384958344504358e-06, "loss": 0.0806, "step": 485850 }, { "epoch": 4.78, "grad_norm": 12.934516906738281, "learning_rate": 2.638371711996187e-06, "loss": 0.1006, "step": 485875 }, { "epoch": 4.78, "grad_norm": 5.622056484222412, "learning_rate": 2.6382475895419386e-06, "loss": 0.062, "step": 485900 }, { "epoch": 4.78, "grad_norm": 9.763483047485352, "learning_rate": 2.6381234670876902e-06, "loss": 0.0876, "step": 485925 }, { "epoch": 4.78, "grad_norm": 2.2637505531311035, "learning_rate": 2.637999344633442e-06, "loss": 0.0908, "step": 485950 }, { "epoch": 4.78, "grad_norm": 6.842741966247559, "learning_rate": 2.637875222179193e-06, "loss": 0.0718, "step": 485975 }, { "epoch": 4.78, "grad_norm": 12.252246856689453, "learning_rate": 2.637751099724945e-06, "loss": 0.0878, "step": 486000 }, { "epoch": 4.78, "grad_norm": 15.853581428527832, "learning_rate": 2.6376269772706964e-06, "loss": 0.1115, "step": 486025 }, { "epoch": 4.78, "grad_norm": 12.96560287475586, "learning_rate": 2.6375028548164476e-06, "loss": 0.0889, "step": 486050 }, { "epoch": 4.78, "grad_norm": 9.898176193237305, "learning_rate": 2.6373787323621996e-06, "loss": 0.0873, "step": 486075 }, { "epoch": 4.78, "grad_norm": 6.019704341888428, "learning_rate": 2.637254609907951e-06, "loss": 0.0806, "step": 486100 }, { "epoch": 4.78, "grad_norm": 15.293501853942871, "learning_rate": 2.6371304874537025e-06, "loss": 0.1005, "step": 486125 }, { "epoch": 4.78, "grad_norm": 3.832904100418091, "learning_rate": 2.637006364999454e-06, "loss": 0.0742, "step": 486150 }, { "epoch": 4.78, "grad_norm": 6.390000343322754, "learning_rate": 2.6368822425452057e-06, "loss": 0.1245, "step": 486175 }, { "epoch": 4.78, "grad_norm": 2.0017905235290527, "learning_rate": 2.636758120090957e-06, "loss": 0.0579, "step": 486200 }, { "epoch": 4.78, "grad_norm": 5.978865146636963, "learning_rate": 2.636633997636709e-06, "loss": 0.1009, "step": 486225 }, { "epoch": 4.78, "grad_norm": 4.417173862457275, "learning_rate": 2.6365098751824602e-06, "loss": 0.0849, "step": 486250 }, { "epoch": 4.78, "grad_norm": 5.051865100860596, "learning_rate": 2.6363857527282114e-06, "loss": 0.1077, "step": 486275 }, { "epoch": 4.78, "grad_norm": 0.18978247046470642, "learning_rate": 2.6362616302739635e-06, "loss": 0.068, "step": 486300 }, { "epoch": 4.78, "grad_norm": 12.85881519317627, "learning_rate": 2.6361375078197147e-06, "loss": 0.1187, "step": 486325 }, { "epoch": 4.78, "grad_norm": 4.0650410652160645, "learning_rate": 2.6360133853654663e-06, "loss": 0.0865, "step": 486350 }, { "epoch": 4.78, "grad_norm": 13.573837280273438, "learning_rate": 2.635889262911218e-06, "loss": 0.0977, "step": 486375 }, { "epoch": 4.78, "grad_norm": 0.2968183159828186, "learning_rate": 2.6357651404569696e-06, "loss": 0.0696, "step": 486400 }, { "epoch": 4.78, "grad_norm": 17.547914505004883, "learning_rate": 2.635641018002721e-06, "loss": 0.119, "step": 486425 }, { "epoch": 4.78, "grad_norm": 5.571857929229736, "learning_rate": 2.635516895548473e-06, "loss": 0.0756, "step": 486450 }, { "epoch": 4.78, "grad_norm": 19.361785888671875, "learning_rate": 2.635392773094224e-06, "loss": 0.1307, "step": 486475 }, { "epoch": 4.78, "grad_norm": 4.9876790046691895, "learning_rate": 2.6352686506399753e-06, "loss": 0.0674, "step": 486500 }, { "epoch": 4.78, "grad_norm": 14.03460693359375, "learning_rate": 2.635144528185727e-06, "loss": 0.0982, "step": 486525 }, { "epoch": 4.78, "grad_norm": 4.17259407043457, "learning_rate": 2.6350204057314786e-06, "loss": 0.082, "step": 486550 }, { "epoch": 4.78, "grad_norm": 14.57357406616211, "learning_rate": 2.63489628327723e-06, "loss": 0.0799, "step": 486575 }, { "epoch": 4.78, "grad_norm": 0.7463424205780029, "learning_rate": 2.6347721608229814e-06, "loss": 0.105, "step": 486600 }, { "epoch": 4.78, "grad_norm": 8.615859031677246, "learning_rate": 2.6346480383687335e-06, "loss": 0.1023, "step": 486625 }, { "epoch": 4.78, "grad_norm": 0.262637197971344, "learning_rate": 2.6345239159144847e-06, "loss": 0.0771, "step": 486650 }, { "epoch": 4.79, "grad_norm": 13.108562469482422, "learning_rate": 2.634399793460236e-06, "loss": 0.121, "step": 486675 }, { "epoch": 4.79, "grad_norm": 0.060890987515449524, "learning_rate": 2.634275671005988e-06, "loss": 0.0785, "step": 486700 }, { "epoch": 4.79, "grad_norm": 6.910096645355225, "learning_rate": 2.634151548551739e-06, "loss": 0.1344, "step": 486725 }, { "epoch": 4.79, "grad_norm": 7.837105751037598, "learning_rate": 2.634027426097491e-06, "loss": 0.0774, "step": 486750 }, { "epoch": 4.79, "grad_norm": 11.97983169555664, "learning_rate": 2.6339033036432424e-06, "loss": 0.0912, "step": 486775 }, { "epoch": 4.79, "grad_norm": 3.074277400970459, "learning_rate": 2.633779181188994e-06, "loss": 0.0733, "step": 486800 }, { "epoch": 4.79, "grad_norm": 19.006072998046875, "learning_rate": 2.6336550587347453e-06, "loss": 0.1343, "step": 486825 }, { "epoch": 4.79, "grad_norm": 0.7068982720375061, "learning_rate": 2.6335309362804973e-06, "loss": 0.0637, "step": 486850 }, { "epoch": 4.79, "grad_norm": 13.221572875976562, "learning_rate": 2.6334068138262486e-06, "loss": 0.1692, "step": 486875 }, { "epoch": 4.79, "grad_norm": 2.5918092727661133, "learning_rate": 2.633282691372e-06, "loss": 0.0609, "step": 486900 }, { "epoch": 4.79, "grad_norm": 20.022871017456055, "learning_rate": 2.633158568917752e-06, "loss": 0.11, "step": 486925 }, { "epoch": 4.79, "grad_norm": 7.3646159172058105, "learning_rate": 2.6330344464635035e-06, "loss": 0.0701, "step": 486950 }, { "epoch": 4.79, "grad_norm": 16.311857223510742, "learning_rate": 2.6329103240092547e-06, "loss": 0.1164, "step": 486975 }, { "epoch": 4.79, "grad_norm": 3.269348382949829, "learning_rate": 2.6327862015550067e-06, "loss": 0.0874, "step": 487000 }, { "epoch": 4.79, "grad_norm": 15.193488121032715, "learning_rate": 2.632662079100758e-06, "loss": 0.1148, "step": 487025 }, { "epoch": 4.79, "grad_norm": 1.6365954875946045, "learning_rate": 2.632537956646509e-06, "loss": 0.0737, "step": 487050 }, { "epoch": 4.79, "grad_norm": 17.553956985473633, "learning_rate": 2.6324138341922612e-06, "loss": 0.1061, "step": 487075 }, { "epoch": 4.79, "grad_norm": 3.2101268768310547, "learning_rate": 2.6322897117380124e-06, "loss": 0.0743, "step": 487100 }, { "epoch": 4.79, "grad_norm": 9.19019889831543, "learning_rate": 2.632165589283764e-06, "loss": 0.1058, "step": 487125 }, { "epoch": 4.79, "grad_norm": 5.458634376525879, "learning_rate": 2.6320414668295157e-06, "loss": 0.1064, "step": 487150 }, { "epoch": 4.79, "grad_norm": 13.363836288452148, "learning_rate": 2.6319173443752673e-06, "loss": 0.0826, "step": 487175 }, { "epoch": 4.79, "grad_norm": 1.6472808122634888, "learning_rate": 2.6317932219210185e-06, "loss": 0.0608, "step": 487200 }, { "epoch": 4.79, "grad_norm": 10.544957160949707, "learning_rate": 2.6316690994667706e-06, "loss": 0.1008, "step": 487225 }, { "epoch": 4.79, "grad_norm": 4.321023464202881, "learning_rate": 2.631544977012522e-06, "loss": 0.0811, "step": 487250 }, { "epoch": 4.79, "grad_norm": 11.336556434631348, "learning_rate": 2.631420854558273e-06, "loss": 0.1264, "step": 487275 }, { "epoch": 4.79, "grad_norm": 7.311874866485596, "learning_rate": 2.631296732104025e-06, "loss": 0.0831, "step": 487300 }, { "epoch": 4.79, "grad_norm": 9.810210227966309, "learning_rate": 2.6311726096497763e-06, "loss": 0.0827, "step": 487325 }, { "epoch": 4.79, "grad_norm": 0.02945549413561821, "learning_rate": 2.631048487195528e-06, "loss": 0.0603, "step": 487350 }, { "epoch": 4.79, "grad_norm": 15.00711727142334, "learning_rate": 2.630924364741279e-06, "loss": 0.0804, "step": 487375 }, { "epoch": 4.79, "grad_norm": 2.447601795196533, "learning_rate": 2.630800242287031e-06, "loss": 0.0584, "step": 487400 }, { "epoch": 4.79, "grad_norm": 21.158594131469727, "learning_rate": 2.6306761198327824e-06, "loss": 0.0989, "step": 487425 }, { "epoch": 4.79, "grad_norm": 0.31328898668289185, "learning_rate": 2.6305519973785336e-06, "loss": 0.0695, "step": 487450 }, { "epoch": 4.79, "grad_norm": 8.219237327575684, "learning_rate": 2.6304278749242857e-06, "loss": 0.0948, "step": 487475 }, { "epoch": 4.79, "grad_norm": 1.7294093370437622, "learning_rate": 2.630303752470037e-06, "loss": 0.0679, "step": 487500 }, { "epoch": 4.79, "grad_norm": 12.8906831741333, "learning_rate": 2.6301796300157885e-06, "loss": 0.1268, "step": 487525 }, { "epoch": 4.79, "grad_norm": 2.9095420837402344, "learning_rate": 2.63005550756154e-06, "loss": 0.0722, "step": 487550 }, { "epoch": 4.79, "grad_norm": Infinity, "learning_rate": 2.6299363500054616e-06, "loss": 0.1254, "step": 487575 }, { "epoch": 4.79, "grad_norm": 9.654547691345215, "learning_rate": 2.6298122275512132e-06, "loss": 0.0822, "step": 487600 }, { "epoch": 4.79, "grad_norm": 5.271600723266602, "learning_rate": 2.629688105096965e-06, "loss": 0.1055, "step": 487625 }, { "epoch": 4.79, "grad_norm": 1.6327332258224487, "learning_rate": 2.629563982642716e-06, "loss": 0.0811, "step": 487650 }, { "epoch": 4.79, "grad_norm": 28.36138343811035, "learning_rate": 2.629439860188468e-06, "loss": 0.148, "step": 487675 }, { "epoch": 4.8, "grad_norm": 0.7201603651046753, "learning_rate": 2.6293157377342194e-06, "loss": 0.1064, "step": 487700 }, { "epoch": 4.8, "grad_norm": 13.052968978881836, "learning_rate": 2.6291916152799706e-06, "loss": 0.1052, "step": 487725 }, { "epoch": 4.8, "grad_norm": 5.584496021270752, "learning_rate": 2.6290674928257226e-06, "loss": 0.062, "step": 487750 }, { "epoch": 4.8, "grad_norm": 22.674209594726562, "learning_rate": 2.628943370371474e-06, "loss": 0.1193, "step": 487775 }, { "epoch": 4.8, "grad_norm": 2.7233426570892334, "learning_rate": 2.6288192479172255e-06, "loss": 0.0875, "step": 487800 }, { "epoch": 4.8, "grad_norm": 9.969324111938477, "learning_rate": 2.628695125462977e-06, "loss": 0.1211, "step": 487825 }, { "epoch": 4.8, "grad_norm": 12.047734260559082, "learning_rate": 2.6285710030087288e-06, "loss": 0.0772, "step": 487850 }, { "epoch": 4.8, "grad_norm": 17.032398223876953, "learning_rate": 2.62844688055448e-06, "loss": 0.0936, "step": 487875 }, { "epoch": 4.8, "grad_norm": 7.493403434753418, "learning_rate": 2.628322758100232e-06, "loss": 0.0768, "step": 487900 }, { "epoch": 4.8, "grad_norm": 8.911430358886719, "learning_rate": 2.6281986356459832e-06, "loss": 0.1234, "step": 487925 }, { "epoch": 4.8, "grad_norm": 4.600905895233154, "learning_rate": 2.6280745131917344e-06, "loss": 0.0688, "step": 487950 }, { "epoch": 4.8, "grad_norm": 14.07009220123291, "learning_rate": 2.627950390737486e-06, "loss": 0.1269, "step": 487975 }, { "epoch": 4.8, "grad_norm": 9.272533416748047, "learning_rate": 2.6278262682832377e-06, "loss": 0.0647, "step": 488000 }, { "epoch": 4.8, "grad_norm": 13.56459903717041, "learning_rate": 2.6277021458289893e-06, "loss": 0.1097, "step": 488025 }, { "epoch": 4.8, "grad_norm": 6.649669170379639, "learning_rate": 2.6275780233747406e-06, "loss": 0.066, "step": 488050 }, { "epoch": 4.8, "grad_norm": 7.722257614135742, "learning_rate": 2.6274539009204926e-06, "loss": 0.0966, "step": 488075 }, { "epoch": 4.8, "grad_norm": 0.7203507423400879, "learning_rate": 2.627329778466244e-06, "loss": 0.091, "step": 488100 }, { "epoch": 4.8, "grad_norm": 13.018193244934082, "learning_rate": 2.627205656011995e-06, "loss": 0.0955, "step": 488125 }, { "epoch": 4.8, "grad_norm": 1.163569688796997, "learning_rate": 2.627081533557747e-06, "loss": 0.0774, "step": 488150 }, { "epoch": 4.8, "grad_norm": 13.009547233581543, "learning_rate": 2.6269574111034983e-06, "loss": 0.0911, "step": 488175 }, { "epoch": 4.8, "grad_norm": 7.387982368469238, "learning_rate": 2.62683328864925e-06, "loss": 0.0729, "step": 488200 }, { "epoch": 4.8, "grad_norm": 9.491801261901855, "learning_rate": 2.6267091661950016e-06, "loss": 0.0976, "step": 488225 }, { "epoch": 4.8, "grad_norm": 7.695594787597656, "learning_rate": 2.6265850437407532e-06, "loss": 0.0733, "step": 488250 }, { "epoch": 4.8, "grad_norm": 12.145586967468262, "learning_rate": 2.6264609212865044e-06, "loss": 0.104, "step": 488275 }, { "epoch": 4.8, "grad_norm": 12.478610038757324, "learning_rate": 2.6263367988322565e-06, "loss": 0.0704, "step": 488300 }, { "epoch": 4.8, "grad_norm": 15.741987228393555, "learning_rate": 2.6262126763780077e-06, "loss": 0.1141, "step": 488325 }, { "epoch": 4.8, "grad_norm": 7.567936420440674, "learning_rate": 2.626088553923759e-06, "loss": 0.0752, "step": 488350 }, { "epoch": 4.8, "grad_norm": 19.85828971862793, "learning_rate": 2.625964431469511e-06, "loss": 0.1176, "step": 488375 }, { "epoch": 4.8, "grad_norm": 5.450253486633301, "learning_rate": 2.625840309015262e-06, "loss": 0.0869, "step": 488400 }, { "epoch": 4.8, "grad_norm": 14.496232032775879, "learning_rate": 2.625716186561014e-06, "loss": 0.1126, "step": 488425 }, { "epoch": 4.8, "grad_norm": 4.214502811431885, "learning_rate": 2.6255920641067654e-06, "loss": 0.0874, "step": 488450 }, { "epoch": 4.8, "grad_norm": 12.010868072509766, "learning_rate": 2.625467941652517e-06, "loss": 0.0891, "step": 488475 }, { "epoch": 4.8, "grad_norm": 0.43847882747650146, "learning_rate": 2.6253438191982683e-06, "loss": 0.0853, "step": 488500 }, { "epoch": 4.8, "grad_norm": 15.782691955566406, "learning_rate": 2.6252196967440204e-06, "loss": 0.0901, "step": 488525 }, { "epoch": 4.8, "grad_norm": 5.240828037261963, "learning_rate": 2.6250955742897716e-06, "loss": 0.0563, "step": 488550 }, { "epoch": 4.8, "grad_norm": 16.263568878173828, "learning_rate": 2.6249714518355228e-06, "loss": 0.1146, "step": 488575 }, { "epoch": 4.8, "grad_norm": 9.170028686523438, "learning_rate": 2.624847329381275e-06, "loss": 0.0953, "step": 488600 }, { "epoch": 4.8, "grad_norm": 15.2289400100708, "learning_rate": 2.624723206927026e-06, "loss": 0.1049, "step": 488625 }, { "epoch": 4.8, "grad_norm": 4.59166145324707, "learning_rate": 2.6245990844727777e-06, "loss": 0.0687, "step": 488650 }, { "epoch": 4.8, "grad_norm": 15.752370834350586, "learning_rate": 2.6244749620185293e-06, "loss": 0.1113, "step": 488675 }, { "epoch": 4.8, "grad_norm": 5.320990085601807, "learning_rate": 2.624350839564281e-06, "loss": 0.0731, "step": 488700 }, { "epoch": 4.81, "grad_norm": 6.501616477966309, "learning_rate": 2.624226717110032e-06, "loss": 0.1075, "step": 488725 }, { "epoch": 4.81, "grad_norm": 6.952688217163086, "learning_rate": 2.6241025946557842e-06, "loss": 0.0622, "step": 488750 }, { "epoch": 4.81, "grad_norm": 18.067825317382812, "learning_rate": 2.6239784722015354e-06, "loss": 0.1392, "step": 488775 }, { "epoch": 4.81, "grad_norm": 15.385336875915527, "learning_rate": 2.6238543497472866e-06, "loss": 0.0761, "step": 488800 }, { "epoch": 4.81, "grad_norm": 19.748247146606445, "learning_rate": 2.6237302272930383e-06, "loss": 0.1179, "step": 488825 }, { "epoch": 4.81, "grad_norm": 11.156068801879883, "learning_rate": 2.62360610483879e-06, "loss": 0.0803, "step": 488850 }, { "epoch": 4.81, "grad_norm": 10.307476043701172, "learning_rate": 2.6234819823845416e-06, "loss": 0.0936, "step": 488875 }, { "epoch": 4.81, "grad_norm": 2.72609543800354, "learning_rate": 2.6233578599302928e-06, "loss": 0.0566, "step": 488900 }, { "epoch": 4.81, "grad_norm": 4.234861850738525, "learning_rate": 2.623233737476045e-06, "loss": 0.0963, "step": 488925 }, { "epoch": 4.81, "grad_norm": 5.552265167236328, "learning_rate": 2.623109615021796e-06, "loss": 0.0809, "step": 488950 }, { "epoch": 4.81, "grad_norm": 5.402963638305664, "learning_rate": 2.6229854925675472e-06, "loss": 0.1117, "step": 488975 }, { "epoch": 4.81, "grad_norm": 8.246739387512207, "learning_rate": 2.6228613701132993e-06, "loss": 0.0743, "step": 489000 }, { "epoch": 4.81, "grad_norm": 19.785253524780273, "learning_rate": 2.6227372476590505e-06, "loss": 0.1124, "step": 489025 }, { "epoch": 4.81, "grad_norm": 5.151333332061768, "learning_rate": 2.622613125204802e-06, "loss": 0.0687, "step": 489050 }, { "epoch": 4.81, "grad_norm": 17.110979080200195, "learning_rate": 2.6224890027505538e-06, "loss": 0.1221, "step": 489075 }, { "epoch": 4.81, "grad_norm": 2.7444586753845215, "learning_rate": 2.6223648802963054e-06, "loss": 0.0784, "step": 489100 }, { "epoch": 4.81, "grad_norm": 11.293817520141602, "learning_rate": 2.6222407578420566e-06, "loss": 0.1215, "step": 489125 }, { "epoch": 4.81, "grad_norm": 2.0463626384735107, "learning_rate": 2.6221166353878087e-06, "loss": 0.0861, "step": 489150 }, { "epoch": 4.81, "grad_norm": 13.583905220031738, "learning_rate": 2.62199251293356e-06, "loss": 0.1095, "step": 489175 }, { "epoch": 4.81, "grad_norm": 11.888205528259277, "learning_rate": 2.621868390479311e-06, "loss": 0.0908, "step": 489200 }, { "epoch": 4.81, "grad_norm": 9.182477951049805, "learning_rate": 2.621744268025063e-06, "loss": 0.1135, "step": 489225 }, { "epoch": 4.81, "grad_norm": 9.38553237915039, "learning_rate": 2.6216201455708144e-06, "loss": 0.078, "step": 489250 }, { "epoch": 4.81, "grad_norm": 18.586299896240234, "learning_rate": 2.621496023116566e-06, "loss": 0.0949, "step": 489275 }, { "epoch": 4.81, "grad_norm": 6.664228439331055, "learning_rate": 2.6213719006623177e-06, "loss": 0.0679, "step": 489300 }, { "epoch": 4.81, "grad_norm": 15.858222961425781, "learning_rate": 2.6212477782080693e-06, "loss": 0.1105, "step": 489325 }, { "epoch": 4.81, "grad_norm": 6.7695136070251465, "learning_rate": 2.6211236557538205e-06, "loss": 0.0752, "step": 489350 }, { "epoch": 4.81, "grad_norm": 9.307640075683594, "learning_rate": 2.6209995332995726e-06, "loss": 0.0832, "step": 489375 }, { "epoch": 4.81, "grad_norm": 3.4537343978881836, "learning_rate": 2.6208754108453238e-06, "loss": 0.065, "step": 489400 }, { "epoch": 4.81, "grad_norm": 16.836376190185547, "learning_rate": 2.620751288391075e-06, "loss": 0.1215, "step": 489425 }, { "epoch": 4.81, "grad_norm": 1.9559835195541382, "learning_rate": 2.620627165936827e-06, "loss": 0.0638, "step": 489450 }, { "epoch": 4.81, "grad_norm": 13.978940963745117, "learning_rate": 2.6205030434825782e-06, "loss": 0.1061, "step": 489475 }, { "epoch": 4.81, "grad_norm": 0.22136394679546356, "learning_rate": 2.62037892102833e-06, "loss": 0.092, "step": 489500 }, { "epoch": 4.81, "grad_norm": 15.054190635681152, "learning_rate": 2.6202547985740815e-06, "loss": 0.0991, "step": 489525 }, { "epoch": 4.81, "grad_norm": 5.094934463500977, "learning_rate": 2.620130676119833e-06, "loss": 0.0779, "step": 489550 }, { "epoch": 4.81, "grad_norm": 18.00518798828125, "learning_rate": 2.6200065536655844e-06, "loss": 0.0941, "step": 489575 }, { "epoch": 4.81, "grad_norm": 4.511882305145264, "learning_rate": 2.6198824312113364e-06, "loss": 0.0636, "step": 489600 }, { "epoch": 4.81, "grad_norm": 43.271366119384766, "learning_rate": 2.6197583087570876e-06, "loss": 0.1339, "step": 489625 }, { "epoch": 4.81, "grad_norm": 4.000110626220703, "learning_rate": 2.619634186302839e-06, "loss": 0.0677, "step": 489650 }, { "epoch": 4.81, "grad_norm": 24.13425636291504, "learning_rate": 2.6195100638485905e-06, "loss": 0.1243, "step": 489675 }, { "epoch": 4.81, "grad_norm": 3.9680004119873047, "learning_rate": 2.619385941394342e-06, "loss": 0.0786, "step": 489700 }, { "epoch": 4.82, "grad_norm": 12.32233715057373, "learning_rate": 2.6192618189400938e-06, "loss": 0.1287, "step": 489725 }, { "epoch": 4.82, "grad_norm": 15.602558135986328, "learning_rate": 2.619137696485845e-06, "loss": 0.0743, "step": 489750 }, { "epoch": 4.82, "grad_norm": 9.333170890808105, "learning_rate": 2.619013574031597e-06, "loss": 0.0863, "step": 489775 }, { "epoch": 4.82, "grad_norm": 7.114638805389404, "learning_rate": 2.6188894515773482e-06, "loss": 0.0877, "step": 489800 }, { "epoch": 4.82, "grad_norm": 12.77872085571289, "learning_rate": 2.6187653291231e-06, "loss": 0.0999, "step": 489825 }, { "epoch": 4.82, "grad_norm": 8.896139144897461, "learning_rate": 2.6186412066688515e-06, "loss": 0.0859, "step": 489850 }, { "epoch": 4.82, "grad_norm": 8.00505542755127, "learning_rate": 2.618517084214603e-06, "loss": 0.1238, "step": 489875 }, { "epoch": 4.82, "grad_norm": 10.10107135772705, "learning_rate": 2.6183929617603543e-06, "loss": 0.0717, "step": 489900 }, { "epoch": 4.82, "grad_norm": 11.938508987426758, "learning_rate": 2.6182688393061064e-06, "loss": 0.0918, "step": 489925 }, { "epoch": 4.82, "grad_norm": 11.68140983581543, "learning_rate": 2.6181447168518576e-06, "loss": 0.0691, "step": 489950 }, { "epoch": 4.82, "grad_norm": 10.934882164001465, "learning_rate": 2.618020594397609e-06, "loss": 0.1159, "step": 489975 }, { "epoch": 4.82, "grad_norm": 7.487667083740234, "learning_rate": 2.617896471943361e-06, "loss": 0.0742, "step": 490000 }, { "epoch": 4.82, "grad_norm": 4.736860275268555, "learning_rate": 2.617772349489112e-06, "loss": 0.1221, "step": 490025 }, { "epoch": 4.82, "grad_norm": 4.1162919998168945, "learning_rate": 2.6176482270348637e-06, "loss": 0.0623, "step": 490050 }, { "epoch": 4.82, "grad_norm": 16.601858139038086, "learning_rate": 2.6175241045806154e-06, "loss": 0.0946, "step": 490075 }, { "epoch": 4.82, "grad_norm": 7.06284761428833, "learning_rate": 2.617399982126367e-06, "loss": 0.0817, "step": 490100 }, { "epoch": 4.82, "grad_norm": 7.91257381439209, "learning_rate": 2.6172758596721182e-06, "loss": 0.1029, "step": 490125 }, { "epoch": 4.82, "grad_norm": 0.47546130418777466, "learning_rate": 2.6171517372178703e-06, "loss": 0.0629, "step": 490150 }, { "epoch": 4.82, "grad_norm": 18.662351608276367, "learning_rate": 2.6170276147636215e-06, "loss": 0.1237, "step": 490175 }, { "epoch": 4.82, "grad_norm": 4.448851108551025, "learning_rate": 2.6169084572075434e-06, "loss": 0.0798, "step": 490200 }, { "epoch": 4.82, "grad_norm": 4.2348151206970215, "learning_rate": 2.6167843347532946e-06, "loss": 0.1373, "step": 490225 }, { "epoch": 4.82, "grad_norm": 7.622947692871094, "learning_rate": 2.6166602122990458e-06, "loss": 0.1026, "step": 490250 }, { "epoch": 4.82, "grad_norm": 13.528100967407227, "learning_rate": 2.6165360898447974e-06, "loss": 0.1219, "step": 490275 }, { "epoch": 4.82, "grad_norm": 3.524505853652954, "learning_rate": 2.616411967390549e-06, "loss": 0.0868, "step": 490300 }, { "epoch": 4.82, "grad_norm": 6.926988124847412, "learning_rate": 2.6162878449363007e-06, "loss": 0.0881, "step": 490325 }, { "epoch": 4.82, "grad_norm": 3.658295154571533, "learning_rate": 2.616163722482052e-06, "loss": 0.0748, "step": 490350 }, { "epoch": 4.82, "grad_norm": 14.632261276245117, "learning_rate": 2.616039600027804e-06, "loss": 0.1297, "step": 490375 }, { "epoch": 4.82, "grad_norm": 2.361013412475586, "learning_rate": 2.615915477573555e-06, "loss": 0.0818, "step": 490400 }, { "epoch": 4.82, "grad_norm": 12.032144546508789, "learning_rate": 2.6157913551193064e-06, "loss": 0.085, "step": 490425 }, { "epoch": 4.82, "grad_norm": 6.400413513183594, "learning_rate": 2.6156672326650584e-06, "loss": 0.0618, "step": 490450 }, { "epoch": 4.82, "grad_norm": 14.635899543762207, "learning_rate": 2.6155431102108097e-06, "loss": 0.1035, "step": 490475 }, { "epoch": 4.82, "grad_norm": 2.917990207672119, "learning_rate": 2.6154189877565613e-06, "loss": 0.0959, "step": 490500 }, { "epoch": 4.82, "grad_norm": 23.154621124267578, "learning_rate": 2.615294865302313e-06, "loss": 0.1189, "step": 490525 }, { "epoch": 4.82, "grad_norm": 0.17136870324611664, "learning_rate": 2.6151707428480646e-06, "loss": 0.0785, "step": 490550 }, { "epoch": 4.82, "grad_norm": 13.110177040100098, "learning_rate": 2.6150466203938158e-06, "loss": 0.0874, "step": 490575 }, { "epoch": 4.82, "grad_norm": 11.185181617736816, "learning_rate": 2.614922497939568e-06, "loss": 0.0852, "step": 490600 }, { "epoch": 4.82, "grad_norm": 13.57405948638916, "learning_rate": 2.614798375485319e-06, "loss": 0.1112, "step": 490625 }, { "epoch": 4.82, "grad_norm": 5.574165344238281, "learning_rate": 2.6146742530310702e-06, "loss": 0.1024, "step": 490650 }, { "epoch": 4.82, "grad_norm": 18.813669204711914, "learning_rate": 2.6145501305768223e-06, "loss": 0.1252, "step": 490675 }, { "epoch": 4.82, "grad_norm": 2.8223214149475098, "learning_rate": 2.6144260081225735e-06, "loss": 0.0769, "step": 490700 }, { "epoch": 4.82, "grad_norm": 9.937446594238281, "learning_rate": 2.614301885668325e-06, "loss": 0.0765, "step": 490725 }, { "epoch": 4.83, "grad_norm": 7.154203414916992, "learning_rate": 2.614177763214077e-06, "loss": 0.0815, "step": 490750 }, { "epoch": 4.83, "grad_norm": 21.42584228515625, "learning_rate": 2.6140536407598284e-06, "loss": 0.0992, "step": 490775 }, { "epoch": 4.83, "grad_norm": 5.183230400085449, "learning_rate": 2.6139295183055796e-06, "loss": 0.0951, "step": 490800 }, { "epoch": 4.83, "grad_norm": 25.109619140625, "learning_rate": 2.6138053958513317e-06, "loss": 0.1021, "step": 490825 }, { "epoch": 4.83, "grad_norm": 3.1846909523010254, "learning_rate": 2.613681273397083e-06, "loss": 0.0683, "step": 490850 }, { "epoch": 4.83, "grad_norm": 21.430023193359375, "learning_rate": 2.613557150942834e-06, "loss": 0.0977, "step": 490875 }, { "epoch": 4.83, "grad_norm": 0.16048258543014526, "learning_rate": 2.613433028488586e-06, "loss": 0.0614, "step": 490900 }, { "epoch": 4.83, "grad_norm": 14.856066703796387, "learning_rate": 2.6133089060343374e-06, "loss": 0.1122, "step": 490925 }, { "epoch": 4.83, "grad_norm": 6.216101169586182, "learning_rate": 2.613184783580089e-06, "loss": 0.0785, "step": 490950 }, { "epoch": 4.83, "grad_norm": 8.507107734680176, "learning_rate": 2.6130606611258407e-06, "loss": 0.069, "step": 490975 }, { "epoch": 4.83, "grad_norm": 0.09551307559013367, "learning_rate": 2.6129365386715923e-06, "loss": 0.0623, "step": 491000 }, { "epoch": 4.83, "grad_norm": 20.330936431884766, "learning_rate": 2.6128124162173435e-06, "loss": 0.1129, "step": 491025 }, { "epoch": 4.83, "grad_norm": 2.6083929538726807, "learning_rate": 2.6126882937630956e-06, "loss": 0.0937, "step": 491050 }, { "epoch": 4.83, "grad_norm": 13.291094779968262, "learning_rate": 2.6125641713088468e-06, "loss": 0.122, "step": 491075 }, { "epoch": 4.83, "grad_norm": 1.3402315378189087, "learning_rate": 2.612440048854598e-06, "loss": 0.0915, "step": 491100 }, { "epoch": 4.83, "grad_norm": 9.554115295410156, "learning_rate": 2.6123159264003496e-06, "loss": 0.1305, "step": 491125 }, { "epoch": 4.83, "grad_norm": 12.300808906555176, "learning_rate": 2.6121918039461013e-06, "loss": 0.0786, "step": 491150 }, { "epoch": 4.83, "grad_norm": 13.413928985595703, "learning_rate": 2.612067681491853e-06, "loss": 0.1139, "step": 491175 }, { "epoch": 4.83, "grad_norm": 0.17408660054206848, "learning_rate": 2.611943559037604e-06, "loss": 0.0711, "step": 491200 }, { "epoch": 4.83, "grad_norm": 13.059850692749023, "learning_rate": 2.611819436583356e-06, "loss": 0.1018, "step": 491225 }, { "epoch": 4.83, "grad_norm": 7.218940258026123, "learning_rate": 2.6116953141291074e-06, "loss": 0.0624, "step": 491250 }, { "epoch": 4.83, "grad_norm": 12.804052352905273, "learning_rate": 2.6115711916748586e-06, "loss": 0.1034, "step": 491275 }, { "epoch": 4.83, "grad_norm": 8.320191383361816, "learning_rate": 2.6114470692206106e-06, "loss": 0.0545, "step": 491300 }, { "epoch": 4.83, "grad_norm": 4.082000732421875, "learning_rate": 2.611322946766362e-06, "loss": 0.1038, "step": 491325 }, { "epoch": 4.83, "grad_norm": 5.216336250305176, "learning_rate": 2.6111988243121135e-06, "loss": 0.0891, "step": 491350 }, { "epoch": 4.83, "grad_norm": 13.942416191101074, "learning_rate": 2.611074701857865e-06, "loss": 0.1193, "step": 491375 }, { "epoch": 4.83, "grad_norm": 13.50179672241211, "learning_rate": 2.6109505794036168e-06, "loss": 0.0954, "step": 491400 }, { "epoch": 4.83, "grad_norm": 6.5565714836120605, "learning_rate": 2.610826456949368e-06, "loss": 0.088, "step": 491425 }, { "epoch": 4.83, "grad_norm": 2.930168628692627, "learning_rate": 2.61070233449512e-06, "loss": 0.0642, "step": 491450 }, { "epoch": 4.83, "grad_norm": 16.783714294433594, "learning_rate": 2.6105782120408712e-06, "loss": 0.1022, "step": 491475 }, { "epoch": 4.83, "grad_norm": 8.138213157653809, "learning_rate": 2.6104540895866225e-06, "loss": 0.0901, "step": 491500 }, { "epoch": 4.83, "grad_norm": 9.777311325073242, "learning_rate": 2.6103299671323745e-06, "loss": 0.1064, "step": 491525 }, { "epoch": 4.83, "grad_norm": 3.2735488414764404, "learning_rate": 2.6102058446781257e-06, "loss": 0.0661, "step": 491550 }, { "epoch": 4.83, "grad_norm": 12.20580768585205, "learning_rate": 2.6100817222238774e-06, "loss": 0.1336, "step": 491575 }, { "epoch": 4.83, "grad_norm": 0.36053553223609924, "learning_rate": 2.609957599769629e-06, "loss": 0.0718, "step": 491600 }, { "epoch": 4.83, "grad_norm": 10.765291213989258, "learning_rate": 2.6098334773153806e-06, "loss": 0.0701, "step": 491625 }, { "epoch": 4.83, "grad_norm": 1.0974361896514893, "learning_rate": 2.609709354861132e-06, "loss": 0.0702, "step": 491650 }, { "epoch": 4.83, "grad_norm": 12.83979320526123, "learning_rate": 2.609585232406884e-06, "loss": 0.103, "step": 491675 }, { "epoch": 4.83, "grad_norm": 8.411150932312012, "learning_rate": 2.609461109952635e-06, "loss": 0.1015, "step": 491700 }, { "epoch": 4.83, "grad_norm": 11.944860458374023, "learning_rate": 2.6093369874983863e-06, "loss": 0.0831, "step": 491725 }, { "epoch": 4.83, "grad_norm": 2.8950212001800537, "learning_rate": 2.6092128650441384e-06, "loss": 0.0918, "step": 491750 }, { "epoch": 4.84, "grad_norm": 20.446929931640625, "learning_rate": 2.6090887425898896e-06, "loss": 0.0853, "step": 491775 }, { "epoch": 4.84, "grad_norm": 12.46088981628418, "learning_rate": 2.6089646201356412e-06, "loss": 0.079, "step": 491800 }, { "epoch": 4.84, "grad_norm": 14.01620864868164, "learning_rate": 2.608840497681393e-06, "loss": 0.0893, "step": 491825 }, { "epoch": 4.84, "grad_norm": 3.6422359943389893, "learning_rate": 2.6087163752271445e-06, "loss": 0.0797, "step": 491850 }, { "epoch": 4.84, "grad_norm": 8.92725944519043, "learning_rate": 2.6085922527728957e-06, "loss": 0.0605, "step": 491875 }, { "epoch": 4.84, "grad_norm": 10.037351608276367, "learning_rate": 2.6084681303186478e-06, "loss": 0.0787, "step": 491900 }, { "epoch": 4.84, "grad_norm": 18.416027069091797, "learning_rate": 2.608344007864399e-06, "loss": 0.1037, "step": 491925 }, { "epoch": 4.84, "grad_norm": 3.4490461349487305, "learning_rate": 2.60821988541015e-06, "loss": 0.1076, "step": 491950 }, { "epoch": 4.84, "grad_norm": 13.863518714904785, "learning_rate": 2.608095762955902e-06, "loss": 0.0975, "step": 491975 }, { "epoch": 4.84, "grad_norm": 2.9512939453125, "learning_rate": 2.6079716405016535e-06, "loss": 0.0494, "step": 492000 }, { "epoch": 4.84, "grad_norm": 9.908269882202148, "learning_rate": 2.607847518047405e-06, "loss": 0.0993, "step": 492025 }, { "epoch": 4.84, "grad_norm": 3.0914769172668457, "learning_rate": 2.6077233955931563e-06, "loss": 0.0881, "step": 492050 }, { "epoch": 4.84, "grad_norm": 28.843997955322266, "learning_rate": 2.6075992731389084e-06, "loss": 0.0988, "step": 492075 }, { "epoch": 4.84, "grad_norm": 9.342037200927734, "learning_rate": 2.6074751506846596e-06, "loss": 0.084, "step": 492100 }, { "epoch": 4.84, "grad_norm": 12.914016723632812, "learning_rate": 2.6073510282304108e-06, "loss": 0.1137, "step": 492125 }, { "epoch": 4.84, "grad_norm": 0.5495375394821167, "learning_rate": 2.607226905776163e-06, "loss": 0.0773, "step": 492150 }, { "epoch": 4.84, "grad_norm": 17.208118438720703, "learning_rate": 2.607102783321914e-06, "loss": 0.1302, "step": 492175 }, { "epoch": 4.84, "grad_norm": 20.777345657348633, "learning_rate": 2.6069786608676657e-06, "loss": 0.0644, "step": 492200 }, { "epoch": 4.84, "grad_norm": 22.034440994262695, "learning_rate": 2.6068545384134173e-06, "loss": 0.1288, "step": 492225 }, { "epoch": 4.84, "grad_norm": 0.34478750824928284, "learning_rate": 2.606730415959169e-06, "loss": 0.052, "step": 492250 }, { "epoch": 4.84, "grad_norm": 9.992753028869629, "learning_rate": 2.60660629350492e-06, "loss": 0.085, "step": 492275 }, { "epoch": 4.84, "grad_norm": 3.1622445583343506, "learning_rate": 2.6064821710506722e-06, "loss": 0.0608, "step": 492300 }, { "epoch": 4.84, "grad_norm": 11.947453498840332, "learning_rate": 2.6063580485964234e-06, "loss": 0.1224, "step": 492325 }, { "epoch": 4.84, "grad_norm": 0.3869630694389343, "learning_rate": 2.6062388910403453e-06, "loss": 0.0749, "step": 492350 }, { "epoch": 4.84, "grad_norm": 4.492712020874023, "learning_rate": 2.6061147685860965e-06, "loss": 0.1143, "step": 492375 }, { "epoch": 4.84, "grad_norm": 1.4385671615600586, "learning_rate": 2.6059906461318477e-06, "loss": 0.0822, "step": 492400 }, { "epoch": 4.84, "grad_norm": 7.167002201080322, "learning_rate": 2.6058665236776e-06, "loss": 0.0916, "step": 492425 }, { "epoch": 4.84, "grad_norm": 6.887282848358154, "learning_rate": 2.605742401223351e-06, "loss": 0.0664, "step": 492450 }, { "epoch": 4.84, "grad_norm": 27.098121643066406, "learning_rate": 2.6056182787691026e-06, "loss": 0.1289, "step": 492475 }, { "epoch": 4.84, "grad_norm": 0.8801811337471008, "learning_rate": 2.6054941563148543e-06, "loss": 0.0731, "step": 492500 }, { "epoch": 4.84, "grad_norm": 27.865354537963867, "learning_rate": 2.605370033860606e-06, "loss": 0.1047, "step": 492525 }, { "epoch": 4.84, "grad_norm": 1.0901384353637695, "learning_rate": 2.605245911406357e-06, "loss": 0.068, "step": 492550 }, { "epoch": 4.84, "grad_norm": 11.802752494812012, "learning_rate": 2.6051217889521083e-06, "loss": 0.1039, "step": 492575 }, { "epoch": 4.84, "grad_norm": 0.01525636576116085, "learning_rate": 2.6049976664978604e-06, "loss": 0.0727, "step": 492600 }, { "epoch": 4.84, "grad_norm": 6.3406267166137695, "learning_rate": 2.6048735440436116e-06, "loss": 0.0717, "step": 492625 }, { "epoch": 4.84, "grad_norm": 6.500328540802002, "learning_rate": 2.6047494215893632e-06, "loss": 0.0731, "step": 492650 }, { "epoch": 4.84, "grad_norm": 15.179463386535645, "learning_rate": 2.604625299135115e-06, "loss": 0.1364, "step": 492675 }, { "epoch": 4.84, "grad_norm": 2.4558351039886475, "learning_rate": 2.6045011766808665e-06, "loss": 0.0773, "step": 492700 }, { "epoch": 4.84, "grad_norm": 15.403059005737305, "learning_rate": 2.6043770542266177e-06, "loss": 0.1237, "step": 492725 }, { "epoch": 4.84, "grad_norm": 2.728415012359619, "learning_rate": 2.6042529317723698e-06, "loss": 0.079, "step": 492750 }, { "epoch": 4.85, "grad_norm": 14.555229187011719, "learning_rate": 2.604128809318121e-06, "loss": 0.091, "step": 492775 }, { "epoch": 4.85, "grad_norm": 8.702500343322754, "learning_rate": 2.6040046868638726e-06, "loss": 0.0578, "step": 492800 }, { "epoch": 4.85, "grad_norm": 14.000969886779785, "learning_rate": 2.6038805644096243e-06, "loss": 0.0963, "step": 492825 }, { "epoch": 4.85, "grad_norm": 4.04077672958374, "learning_rate": 2.603756441955376e-06, "loss": 0.0802, "step": 492850 }, { "epoch": 4.85, "grad_norm": 14.926508903503418, "learning_rate": 2.603632319501127e-06, "loss": 0.1036, "step": 492875 }, { "epoch": 4.85, "grad_norm": 0.06842739135026932, "learning_rate": 2.603508197046879e-06, "loss": 0.0799, "step": 492900 }, { "epoch": 4.85, "grad_norm": 11.794663429260254, "learning_rate": 2.6033840745926304e-06, "loss": 0.0806, "step": 492925 }, { "epoch": 4.85, "grad_norm": 0.7936230897903442, "learning_rate": 2.6032599521383816e-06, "loss": 0.0817, "step": 492950 }, { "epoch": 4.85, "grad_norm": 10.213186264038086, "learning_rate": 2.6031358296841336e-06, "loss": 0.1158, "step": 492975 }, { "epoch": 4.85, "grad_norm": 3.0263895988464355, "learning_rate": 2.603011707229885e-06, "loss": 0.0794, "step": 493000 }, { "epoch": 4.85, "grad_norm": 6.5432353019714355, "learning_rate": 2.6028875847756365e-06, "loss": 0.098, "step": 493025 }, { "epoch": 4.85, "grad_norm": 5.733537197113037, "learning_rate": 2.602763462321388e-06, "loss": 0.0783, "step": 493050 }, { "epoch": 4.85, "grad_norm": 17.245830535888672, "learning_rate": 2.6026393398671398e-06, "loss": 0.1201, "step": 493075 }, { "epoch": 4.85, "grad_norm": 9.524784088134766, "learning_rate": 2.602515217412891e-06, "loss": 0.0813, "step": 493100 }, { "epoch": 4.85, "grad_norm": 11.715239524841309, "learning_rate": 2.602391094958643e-06, "loss": 0.1278, "step": 493125 }, { "epoch": 4.85, "grad_norm": 4.03532075881958, "learning_rate": 2.6022669725043942e-06, "loss": 0.0674, "step": 493150 }, { "epoch": 4.85, "grad_norm": 18.279529571533203, "learning_rate": 2.6021428500501455e-06, "loss": 0.0927, "step": 493175 }, { "epoch": 4.85, "grad_norm": 4.370944499969482, "learning_rate": 2.6020187275958975e-06, "loss": 0.0856, "step": 493200 }, { "epoch": 4.85, "grad_norm": 16.03314971923828, "learning_rate": 2.6018946051416487e-06, "loss": 0.1459, "step": 493225 }, { "epoch": 4.85, "grad_norm": 2.2569758892059326, "learning_rate": 2.6017704826874004e-06, "loss": 0.0715, "step": 493250 }, { "epoch": 4.85, "grad_norm": 15.083255767822266, "learning_rate": 2.601646360233152e-06, "loss": 0.1154, "step": 493275 }, { "epoch": 4.85, "grad_norm": 4.5549235343933105, "learning_rate": 2.6015222377789036e-06, "loss": 0.0721, "step": 493300 }, { "epoch": 4.85, "grad_norm": 27.766698837280273, "learning_rate": 2.601398115324655e-06, "loss": 0.1275, "step": 493325 }, { "epoch": 4.85, "grad_norm": 0.500356137752533, "learning_rate": 2.601273992870407e-06, "loss": 0.0785, "step": 493350 }, { "epoch": 4.85, "grad_norm": 11.386171340942383, "learning_rate": 2.601149870416158e-06, "loss": 0.0949, "step": 493375 }, { "epoch": 4.85, "grad_norm": 18.339521408081055, "learning_rate": 2.6010257479619093e-06, "loss": 0.0687, "step": 493400 }, { "epoch": 4.85, "grad_norm": 11.533121109008789, "learning_rate": 2.600901625507661e-06, "loss": 0.1038, "step": 493425 }, { "epoch": 4.85, "grad_norm": 1.8835127353668213, "learning_rate": 2.6007775030534126e-06, "loss": 0.0938, "step": 493450 }, { "epoch": 4.85, "grad_norm": 16.448699951171875, "learning_rate": 2.6006533805991642e-06, "loss": 0.0975, "step": 493475 }, { "epoch": 4.85, "grad_norm": 8.408082962036133, "learning_rate": 2.6005292581449154e-06, "loss": 0.0777, "step": 493500 }, { "epoch": 4.85, "grad_norm": 10.423763275146484, "learning_rate": 2.6004051356906675e-06, "loss": 0.1026, "step": 493525 }, { "epoch": 4.85, "grad_norm": 7.268336772918701, "learning_rate": 2.6002810132364187e-06, "loss": 0.0737, "step": 493550 }, { "epoch": 4.85, "grad_norm": 14.703543663024902, "learning_rate": 2.60015689078217e-06, "loss": 0.0971, "step": 493575 }, { "epoch": 4.85, "grad_norm": 4.8546271324157715, "learning_rate": 2.600032768327922e-06, "loss": 0.0715, "step": 493600 }, { "epoch": 4.85, "grad_norm": 24.235017776489258, "learning_rate": 2.599908645873673e-06, "loss": 0.1108, "step": 493625 }, { "epoch": 4.85, "grad_norm": 0.8720346093177795, "learning_rate": 2.599784523419425e-06, "loss": 0.0863, "step": 493650 }, { "epoch": 4.85, "grad_norm": 11.582626342773438, "learning_rate": 2.5996604009651765e-06, "loss": 0.0923, "step": 493675 }, { "epoch": 4.85, "grad_norm": 5.553333282470703, "learning_rate": 2.599536278510928e-06, "loss": 0.0606, "step": 493700 }, { "epoch": 4.85, "grad_norm": 11.23153018951416, "learning_rate": 2.5994121560566793e-06, "loss": 0.1303, "step": 493725 }, { "epoch": 4.85, "grad_norm": 0.3849255442619324, "learning_rate": 2.5992880336024314e-06, "loss": 0.0892, "step": 493750 }, { "epoch": 4.85, "grad_norm": 12.062322616577148, "learning_rate": 2.5991639111481826e-06, "loss": 0.1012, "step": 493775 }, { "epoch": 4.86, "grad_norm": 5.479954719543457, "learning_rate": 2.599039788693934e-06, "loss": 0.0791, "step": 493800 }, { "epoch": 4.86, "grad_norm": 9.553112983703613, "learning_rate": 2.598915666239686e-06, "loss": 0.1324, "step": 493825 }, { "epoch": 4.86, "grad_norm": 0.4516777992248535, "learning_rate": 2.598791543785437e-06, "loss": 0.0601, "step": 493850 }, { "epoch": 4.86, "grad_norm": 18.36497688293457, "learning_rate": 2.5986674213311887e-06, "loss": 0.1184, "step": 493875 }, { "epoch": 4.86, "grad_norm": 7.8991522789001465, "learning_rate": 2.5985432988769403e-06, "loss": 0.0848, "step": 493900 }, { "epoch": 4.86, "grad_norm": 9.059159278869629, "learning_rate": 2.598419176422692e-06, "loss": 0.1008, "step": 493925 }, { "epoch": 4.86, "grad_norm": 6.8910417556762695, "learning_rate": 2.598295053968443e-06, "loss": 0.0834, "step": 493950 }, { "epoch": 4.86, "grad_norm": 18.406558990478516, "learning_rate": 2.5981709315141952e-06, "loss": 0.1159, "step": 493975 }, { "epoch": 4.86, "grad_norm": 3.9948112964630127, "learning_rate": 2.5980468090599464e-06, "loss": 0.077, "step": 494000 }, { "epoch": 4.86, "grad_norm": 16.863666534423828, "learning_rate": 2.5979226866056977e-06, "loss": 0.0876, "step": 494025 }, { "epoch": 4.86, "grad_norm": 2.823072910308838, "learning_rate": 2.5977985641514497e-06, "loss": 0.0906, "step": 494050 }, { "epoch": 4.86, "grad_norm": 23.43779945373535, "learning_rate": 2.597674441697201e-06, "loss": 0.111, "step": 494075 }, { "epoch": 4.86, "grad_norm": 7.146966934204102, "learning_rate": 2.5975503192429526e-06, "loss": 0.0536, "step": 494100 }, { "epoch": 4.86, "grad_norm": 15.215749740600586, "learning_rate": 2.597426196788704e-06, "loss": 0.1073, "step": 494125 }, { "epoch": 4.86, "grad_norm": 9.735849380493164, "learning_rate": 2.597302074334456e-06, "loss": 0.089, "step": 494150 }, { "epoch": 4.86, "grad_norm": 17.89492416381836, "learning_rate": 2.597177951880207e-06, "loss": 0.0718, "step": 494175 }, { "epoch": 4.86, "grad_norm": 8.655608177185059, "learning_rate": 2.597053829425959e-06, "loss": 0.0726, "step": 494200 }, { "epoch": 4.86, "grad_norm": 13.668319702148438, "learning_rate": 2.5969297069717103e-06, "loss": 0.1137, "step": 494225 }, { "epoch": 4.86, "grad_norm": 9.444177627563477, "learning_rate": 2.5968055845174615e-06, "loss": 0.0775, "step": 494250 }, { "epoch": 4.86, "grad_norm": 14.347201347351074, "learning_rate": 2.596681462063213e-06, "loss": 0.1188, "step": 494275 }, { "epoch": 4.86, "grad_norm": 0.6255360841751099, "learning_rate": 2.596557339608965e-06, "loss": 0.0774, "step": 494300 }, { "epoch": 4.86, "grad_norm": 16.807300567626953, "learning_rate": 2.5964332171547164e-06, "loss": 0.0915, "step": 494325 }, { "epoch": 4.86, "grad_norm": 1.9677609205245972, "learning_rate": 2.5963090947004676e-06, "loss": 0.0785, "step": 494350 }, { "epoch": 4.86, "grad_norm": 24.082387924194336, "learning_rate": 2.5961849722462197e-06, "loss": 0.1038, "step": 494375 }, { "epoch": 4.86, "grad_norm": 0.5902963280677795, "learning_rate": 2.596060849791971e-06, "loss": 0.0819, "step": 494400 }, { "epoch": 4.86, "grad_norm": 5.571336269378662, "learning_rate": 2.595936727337722e-06, "loss": 0.0886, "step": 494425 }, { "epoch": 4.86, "grad_norm": 3.5945067405700684, "learning_rate": 2.595812604883474e-06, "loss": 0.0576, "step": 494450 }, { "epoch": 4.86, "grad_norm": 11.902385711669922, "learning_rate": 2.5956884824292254e-06, "loss": 0.1169, "step": 494475 }, { "epoch": 4.86, "grad_norm": 8.358928680419922, "learning_rate": 2.595564359974977e-06, "loss": 0.0715, "step": 494500 }, { "epoch": 4.86, "grad_norm": 17.42400550842285, "learning_rate": 2.5954402375207287e-06, "loss": 0.0939, "step": 494525 }, { "epoch": 4.86, "grad_norm": 5.042177200317383, "learning_rate": 2.5953161150664803e-06, "loss": 0.0815, "step": 494550 }, { "epoch": 4.86, "grad_norm": 17.85661506652832, "learning_rate": 2.5951969575104017e-06, "loss": 0.1346, "step": 494575 }, { "epoch": 4.86, "grad_norm": 1.7880046367645264, "learning_rate": 2.5950728350561534e-06, "loss": 0.063, "step": 494600 }, { "epoch": 4.86, "grad_norm": 13.869356155395508, "learning_rate": 2.5949487126019046e-06, "loss": 0.1081, "step": 494625 }, { "epoch": 4.86, "grad_norm": 4.757468223571777, "learning_rate": 2.5948245901476567e-06, "loss": 0.0741, "step": 494650 }, { "epoch": 4.86, "grad_norm": 8.116920471191406, "learning_rate": 2.594700467693408e-06, "loss": 0.0947, "step": 494675 }, { "epoch": 4.86, "grad_norm": 3.8877017498016357, "learning_rate": 2.594576345239159e-06, "loss": 0.0947, "step": 494700 }, { "epoch": 4.86, "grad_norm": 12.301840782165527, "learning_rate": 2.594452222784911e-06, "loss": 0.1226, "step": 494725 }, { "epoch": 4.86, "grad_norm": 6.785299777984619, "learning_rate": 2.5943281003306623e-06, "loss": 0.0989, "step": 494750 }, { "epoch": 4.86, "grad_norm": 15.156216621398926, "learning_rate": 2.594203977876414e-06, "loss": 0.0996, "step": 494775 }, { "epoch": 4.86, "grad_norm": 8.712793350219727, "learning_rate": 2.594079855422165e-06, "loss": 0.0936, "step": 494800 }, { "epoch": 4.87, "grad_norm": 9.329657554626465, "learning_rate": 2.5939557329679173e-06, "loss": 0.1127, "step": 494825 }, { "epoch": 4.87, "grad_norm": 2.5088634490966797, "learning_rate": 2.5938316105136685e-06, "loss": 0.0768, "step": 494850 }, { "epoch": 4.87, "grad_norm": 8.617937088012695, "learning_rate": 2.5937074880594197e-06, "loss": 0.1069, "step": 494875 }, { "epoch": 4.87, "grad_norm": 7.4841508865356445, "learning_rate": 2.5935833656051717e-06, "loss": 0.1137, "step": 494900 }, { "epoch": 4.87, "grad_norm": 10.885709762573242, "learning_rate": 2.593459243150923e-06, "loss": 0.1398, "step": 494925 }, { "epoch": 4.87, "grad_norm": 4.092050552368164, "learning_rate": 2.5933351206966746e-06, "loss": 0.0983, "step": 494950 }, { "epoch": 4.87, "grad_norm": 15.600005149841309, "learning_rate": 2.5932109982424262e-06, "loss": 0.1, "step": 494975 }, { "epoch": 4.87, "grad_norm": 1.3888859748840332, "learning_rate": 2.593086875788178e-06, "loss": 0.0775, "step": 495000 }, { "epoch": 4.87, "grad_norm": 17.310747146606445, "learning_rate": 2.592962753333929e-06, "loss": 0.0959, "step": 495025 }, { "epoch": 4.87, "grad_norm": 4.617021083831787, "learning_rate": 2.592838630879681e-06, "loss": 0.0946, "step": 495050 }, { "epoch": 4.87, "grad_norm": 34.994930267333984, "learning_rate": 2.5927145084254323e-06, "loss": 0.1048, "step": 495075 }, { "epoch": 4.87, "grad_norm": 4.137772083282471, "learning_rate": 2.5925903859711835e-06, "loss": 0.0787, "step": 495100 }, { "epoch": 4.87, "grad_norm": 22.9512939453125, "learning_rate": 2.5924662635169356e-06, "loss": 0.1357, "step": 495125 }, { "epoch": 4.87, "grad_norm": 4.117682456970215, "learning_rate": 2.592342141062687e-06, "loss": 0.0812, "step": 495150 }, { "epoch": 4.87, "grad_norm": 16.14056968688965, "learning_rate": 2.5922180186084384e-06, "loss": 0.1147, "step": 495175 }, { "epoch": 4.87, "grad_norm": 0.20330150425434113, "learning_rate": 2.59209389615419e-06, "loss": 0.0813, "step": 495200 }, { "epoch": 4.87, "grad_norm": 11.187410354614258, "learning_rate": 2.5919697736999417e-06, "loss": 0.0972, "step": 495225 }, { "epoch": 4.87, "grad_norm": 0.14233870804309845, "learning_rate": 2.591845651245693e-06, "loss": 0.0954, "step": 495250 }, { "epoch": 4.87, "grad_norm": 15.203686714172363, "learning_rate": 2.591721528791445e-06, "loss": 0.0987, "step": 495275 }, { "epoch": 4.87, "grad_norm": 3.7674360275268555, "learning_rate": 2.591597406337196e-06, "loss": 0.093, "step": 495300 }, { "epoch": 4.87, "grad_norm": 10.200838088989258, "learning_rate": 2.5914732838829474e-06, "loss": 0.1234, "step": 495325 }, { "epoch": 4.87, "grad_norm": 4.93850564956665, "learning_rate": 2.5913491614286995e-06, "loss": 0.0667, "step": 495350 }, { "epoch": 4.87, "grad_norm": 11.270395278930664, "learning_rate": 2.5912250389744507e-06, "loss": 0.0916, "step": 495375 }, { "epoch": 4.87, "grad_norm": 2.718576192855835, "learning_rate": 2.5911009165202023e-06, "loss": 0.0766, "step": 495400 }, { "epoch": 4.87, "grad_norm": 9.965771675109863, "learning_rate": 2.590976794065954e-06, "loss": 0.0961, "step": 495425 }, { "epoch": 4.87, "grad_norm": 1.7246501445770264, "learning_rate": 2.5908526716117056e-06, "loss": 0.0708, "step": 495450 }, { "epoch": 4.87, "grad_norm": 24.11124610900879, "learning_rate": 2.590728549157457e-06, "loss": 0.1119, "step": 495475 }, { "epoch": 4.87, "grad_norm": 4.407934665679932, "learning_rate": 2.590604426703209e-06, "loss": 0.1013, "step": 495500 }, { "epoch": 4.87, "grad_norm": 13.481596946716309, "learning_rate": 2.59048030424896e-06, "loss": 0.1129, "step": 495525 }, { "epoch": 4.87, "grad_norm": 0.6240629553794861, "learning_rate": 2.5903561817947113e-06, "loss": 0.0887, "step": 495550 }, { "epoch": 4.87, "grad_norm": 11.775444984436035, "learning_rate": 2.5902320593404633e-06, "loss": 0.1361, "step": 495575 }, { "epoch": 4.87, "grad_norm": 3.594144582748413, "learning_rate": 2.5901079368862145e-06, "loss": 0.0936, "step": 495600 }, { "epoch": 4.87, "grad_norm": 8.816679000854492, "learning_rate": 2.589983814431966e-06, "loss": 0.0972, "step": 495625 }, { "epoch": 4.87, "grad_norm": 1.9256657361984253, "learning_rate": 2.5898596919777174e-06, "loss": 0.0956, "step": 495650 }, { "epoch": 4.87, "grad_norm": 15.688878059387207, "learning_rate": 2.5897355695234695e-06, "loss": 0.1234, "step": 495675 }, { "epoch": 4.87, "grad_norm": 3.6968748569488525, "learning_rate": 2.5896114470692207e-06, "loss": 0.0777, "step": 495700 }, { "epoch": 4.87, "grad_norm": 12.43842601776123, "learning_rate": 2.5894873246149723e-06, "loss": 0.119, "step": 495725 }, { "epoch": 4.87, "grad_norm": 1.3248450756072998, "learning_rate": 2.589363202160724e-06, "loss": 0.0733, "step": 495750 }, { "epoch": 4.87, "grad_norm": 10.66917896270752, "learning_rate": 2.5892390797064756e-06, "loss": 0.1074, "step": 495775 }, { "epoch": 4.87, "grad_norm": 1.3836333751678467, "learning_rate": 2.5891149572522268e-06, "loss": 0.0895, "step": 495800 }, { "epoch": 4.88, "grad_norm": 15.55738639831543, "learning_rate": 2.588990834797979e-06, "loss": 0.0986, "step": 495825 }, { "epoch": 4.88, "grad_norm": 4.712611198425293, "learning_rate": 2.58886671234373e-06, "loss": 0.0887, "step": 495850 }, { "epoch": 4.88, "grad_norm": 17.18126106262207, "learning_rate": 2.5887425898894813e-06, "loss": 0.1268, "step": 495875 }, { "epoch": 4.88, "grad_norm": 3.483895778656006, "learning_rate": 2.5886184674352333e-06, "loss": 0.0856, "step": 495900 }, { "epoch": 4.88, "grad_norm": 15.185722351074219, "learning_rate": 2.5884943449809845e-06, "loss": 0.1233, "step": 495925 }, { "epoch": 4.88, "grad_norm": 8.643704414367676, "learning_rate": 2.588370222526736e-06, "loss": 0.0777, "step": 495950 }, { "epoch": 4.88, "grad_norm": 15.815512657165527, "learning_rate": 2.588246100072488e-06, "loss": 0.0982, "step": 495975 }, { "epoch": 4.88, "grad_norm": 0.3495536148548126, "learning_rate": 2.5881219776182394e-06, "loss": 0.0733, "step": 496000 }, { "epoch": 4.88, "grad_norm": 17.450345993041992, "learning_rate": 2.5879978551639906e-06, "loss": 0.1467, "step": 496025 }, { "epoch": 4.88, "grad_norm": 4.694895267486572, "learning_rate": 2.5878737327097427e-06, "loss": 0.0789, "step": 496050 }, { "epoch": 4.88, "grad_norm": 7.15519380569458, "learning_rate": 2.587749610255494e-06, "loss": 0.1232, "step": 496075 }, { "epoch": 4.88, "grad_norm": 3.474733352661133, "learning_rate": 2.587625487801245e-06, "loss": 0.0975, "step": 496100 }, { "epoch": 4.88, "grad_norm": 12.423628807067871, "learning_rate": 2.587501365346997e-06, "loss": 0.1198, "step": 496125 }, { "epoch": 4.88, "grad_norm": 1.7079710960388184, "learning_rate": 2.5873772428927484e-06, "loss": 0.0772, "step": 496150 }, { "epoch": 4.88, "grad_norm": 14.130637168884277, "learning_rate": 2.5872531204385e-06, "loss": 0.1253, "step": 496175 }, { "epoch": 4.88, "grad_norm": 2.981381893157959, "learning_rate": 2.5871289979842517e-06, "loss": 0.0717, "step": 496200 }, { "epoch": 4.88, "grad_norm": 18.480609893798828, "learning_rate": 2.5870048755300033e-06, "loss": 0.1103, "step": 496225 }, { "epoch": 4.88, "grad_norm": 1.7742440700531006, "learning_rate": 2.5868807530757545e-06, "loss": 0.0913, "step": 496250 }, { "epoch": 4.88, "grad_norm": 22.227174758911133, "learning_rate": 2.5867566306215066e-06, "loss": 0.1029, "step": 496275 }, { "epoch": 4.88, "grad_norm": 0.2753453850746155, "learning_rate": 2.5866325081672578e-06, "loss": 0.0648, "step": 496300 }, { "epoch": 4.88, "grad_norm": 8.60796070098877, "learning_rate": 2.586508385713009e-06, "loss": 0.1148, "step": 496325 }, { "epoch": 4.88, "grad_norm": 3.9011788368225098, "learning_rate": 2.586384263258761e-06, "loss": 0.0791, "step": 496350 }, { "epoch": 4.88, "grad_norm": 16.530698776245117, "learning_rate": 2.5862601408045123e-06, "loss": 0.1022, "step": 496375 }, { "epoch": 4.88, "grad_norm": 2.764572858810425, "learning_rate": 2.586136018350264e-06, "loss": 0.0641, "step": 496400 }, { "epoch": 4.88, "grad_norm": 15.449670791625977, "learning_rate": 2.5860118958960155e-06, "loss": 0.1092, "step": 496425 }, { "epoch": 4.88, "grad_norm": 8.381928443908691, "learning_rate": 2.585887773441767e-06, "loss": 0.0649, "step": 496450 }, { "epoch": 4.88, "grad_norm": 6.769093990325928, "learning_rate": 2.5857636509875184e-06, "loss": 0.0943, "step": 496475 }, { "epoch": 4.88, "grad_norm": 9.245407104492188, "learning_rate": 2.5856395285332704e-06, "loss": 0.0883, "step": 496500 }, { "epoch": 4.88, "grad_norm": 8.92396068572998, "learning_rate": 2.5855154060790217e-06, "loss": 0.1144, "step": 496525 }, { "epoch": 4.88, "grad_norm": 7.416862487792969, "learning_rate": 2.585391283624773e-06, "loss": 0.0828, "step": 496550 }, { "epoch": 4.88, "grad_norm": 10.224132537841797, "learning_rate": 2.5852671611705245e-06, "loss": 0.1341, "step": 496575 }, { "epoch": 4.88, "grad_norm": 0.16166691482067108, "learning_rate": 2.585143038716276e-06, "loss": 0.0938, "step": 496600 }, { "epoch": 4.88, "grad_norm": 12.93961238861084, "learning_rate": 2.5850189162620278e-06, "loss": 0.095, "step": 496625 }, { "epoch": 4.88, "grad_norm": 0.8697803020477295, "learning_rate": 2.584894793807779e-06, "loss": 0.0905, "step": 496650 }, { "epoch": 4.88, "grad_norm": 9.133623123168945, "learning_rate": 2.584770671353531e-06, "loss": 0.0889, "step": 496675 }, { "epoch": 4.88, "grad_norm": 7.594071388244629, "learning_rate": 2.5846465488992823e-06, "loss": 0.0776, "step": 496700 }, { "epoch": 4.88, "grad_norm": 12.826054573059082, "learning_rate": 2.5845224264450335e-06, "loss": 0.108, "step": 496725 }, { "epoch": 4.88, "grad_norm": 0.16070298850536346, "learning_rate": 2.5843983039907855e-06, "loss": 0.0749, "step": 496750 }, { "epoch": 4.88, "grad_norm": Infinity, "learning_rate": 2.5842791464347065e-06, "loss": 0.1109, "step": 496775 }, { "epoch": 4.88, "grad_norm": 3.4630982875823975, "learning_rate": 2.5841550239804586e-06, "loss": 0.0753, "step": 496800 }, { "epoch": 4.88, "grad_norm": 17.942068099975586, "learning_rate": 2.58403090152621e-06, "loss": 0.1032, "step": 496825 }, { "epoch": 4.89, "grad_norm": 4.441296100616455, "learning_rate": 2.5839067790719615e-06, "loss": 0.0665, "step": 496850 }, { "epoch": 4.89, "grad_norm": 14.984953880310059, "learning_rate": 2.583782656617713e-06, "loss": 0.1122, "step": 496875 }, { "epoch": 4.89, "grad_norm": 10.391251564025879, "learning_rate": 2.5836585341634647e-06, "loss": 0.0792, "step": 496900 }, { "epoch": 4.89, "grad_norm": 13.032259941101074, "learning_rate": 2.583534411709216e-06, "loss": 0.089, "step": 496925 }, { "epoch": 4.89, "grad_norm": 0.4384513795375824, "learning_rate": 2.583410289254968e-06, "loss": 0.0788, "step": 496950 }, { "epoch": 4.89, "grad_norm": 16.89545249938965, "learning_rate": 2.583286166800719e-06, "loss": 0.101, "step": 496975 }, { "epoch": 4.89, "grad_norm": 3.8415005207061768, "learning_rate": 2.5831620443464704e-06, "loss": 0.1028, "step": 497000 }, { "epoch": 4.89, "grad_norm": 11.497701644897461, "learning_rate": 2.5830379218922225e-06, "loss": 0.1238, "step": 497025 }, { "epoch": 4.89, "grad_norm": 9.762040138244629, "learning_rate": 2.5829137994379737e-06, "loss": 0.0879, "step": 497050 }, { "epoch": 4.89, "grad_norm": 12.639756202697754, "learning_rate": 2.5827896769837253e-06, "loss": 0.0919, "step": 497075 }, { "epoch": 4.89, "grad_norm": 3.167325973510742, "learning_rate": 2.5826655545294765e-06, "loss": 0.0795, "step": 497100 }, { "epoch": 4.89, "grad_norm": 6.404575824737549, "learning_rate": 2.5825414320752286e-06, "loss": 0.1017, "step": 497125 }, { "epoch": 4.89, "grad_norm": 3.578977346420288, "learning_rate": 2.58241730962098e-06, "loss": 0.1049, "step": 497150 }, { "epoch": 4.89, "grad_norm": 9.242355346679688, "learning_rate": 2.582293187166731e-06, "loss": 0.088, "step": 497175 }, { "epoch": 4.89, "grad_norm": 3.456984519958496, "learning_rate": 2.582169064712483e-06, "loss": 0.0722, "step": 497200 }, { "epoch": 4.89, "grad_norm": 12.8619966506958, "learning_rate": 2.5820449422582343e-06, "loss": 0.1499, "step": 497225 }, { "epoch": 4.89, "grad_norm": 2.6960031986236572, "learning_rate": 2.581920819803986e-06, "loss": 0.054, "step": 497250 }, { "epoch": 4.89, "grad_norm": 20.010602951049805, "learning_rate": 2.5817966973497376e-06, "loss": 0.1079, "step": 497275 }, { "epoch": 4.89, "grad_norm": 5.972963333129883, "learning_rate": 2.581672574895489e-06, "loss": 0.0808, "step": 497300 }, { "epoch": 4.89, "grad_norm": 14.549456596374512, "learning_rate": 2.5815484524412404e-06, "loss": 0.0944, "step": 497325 }, { "epoch": 4.89, "grad_norm": 6.213233947753906, "learning_rate": 2.5814243299869925e-06, "loss": 0.0773, "step": 497350 }, { "epoch": 4.89, "grad_norm": 12.933042526245117, "learning_rate": 2.5813002075327437e-06, "loss": 0.1195, "step": 497375 }, { "epoch": 4.89, "grad_norm": 6.226362705230713, "learning_rate": 2.581176085078495e-06, "loss": 0.0709, "step": 497400 }, { "epoch": 4.89, "grad_norm": 14.558158874511719, "learning_rate": 2.581051962624247e-06, "loss": 0.1248, "step": 497425 }, { "epoch": 4.89, "grad_norm": 4.832123756408691, "learning_rate": 2.580927840169998e-06, "loss": 0.0731, "step": 497450 }, { "epoch": 4.89, "grad_norm": 16.381643295288086, "learning_rate": 2.5808037177157498e-06, "loss": 0.1137, "step": 497475 }, { "epoch": 4.89, "grad_norm": 1.5827232599258423, "learning_rate": 2.5806795952615014e-06, "loss": 0.0822, "step": 497500 }, { "epoch": 4.89, "grad_norm": 5.592233180999756, "learning_rate": 2.580555472807253e-06, "loss": 0.0767, "step": 497525 }, { "epoch": 4.89, "grad_norm": 1.727211356163025, "learning_rate": 2.5804313503530043e-06, "loss": 0.0762, "step": 497550 }, { "epoch": 4.89, "grad_norm": 16.59841537475586, "learning_rate": 2.5803072278987563e-06, "loss": 0.0935, "step": 497575 }, { "epoch": 4.89, "grad_norm": 3.904456377029419, "learning_rate": 2.5801831054445075e-06, "loss": 0.0659, "step": 497600 }, { "epoch": 4.89, "grad_norm": 9.46236515045166, "learning_rate": 2.5800589829902588e-06, "loss": 0.1275, "step": 497625 }, { "epoch": 4.89, "grad_norm": 3.493161678314209, "learning_rate": 2.579934860536011e-06, "loss": 0.065, "step": 497650 }, { "epoch": 4.89, "grad_norm": 11.787558555603027, "learning_rate": 2.579810738081762e-06, "loss": 0.1173, "step": 497675 }, { "epoch": 4.89, "grad_norm": 4.35769510269165, "learning_rate": 2.5796866156275137e-06, "loss": 0.0785, "step": 497700 }, { "epoch": 4.89, "grad_norm": 9.20605754852295, "learning_rate": 2.5795624931732653e-06, "loss": 0.1334, "step": 497725 }, { "epoch": 4.89, "grad_norm": 6.1249895095825195, "learning_rate": 2.579438370719017e-06, "loss": 0.0742, "step": 497750 }, { "epoch": 4.89, "grad_norm": 20.640724182128906, "learning_rate": 2.579314248264768e-06, "loss": 0.1087, "step": 497775 }, { "epoch": 4.89, "grad_norm": 6.552541732788086, "learning_rate": 2.57919012581052e-06, "loss": 0.0759, "step": 497800 }, { "epoch": 4.89, "grad_norm": 12.502934455871582, "learning_rate": 2.5790660033562714e-06, "loss": 0.0824, "step": 497825 }, { "epoch": 4.89, "grad_norm": 5.263265132904053, "learning_rate": 2.5789418809020226e-06, "loss": 0.0633, "step": 497850 }, { "epoch": 4.9, "grad_norm": 8.520339012145996, "learning_rate": 2.5788177584477747e-06, "loss": 0.116, "step": 497875 }, { "epoch": 4.9, "grad_norm": 10.30544376373291, "learning_rate": 2.578693635993526e-06, "loss": 0.093, "step": 497900 }, { "epoch": 4.9, "grad_norm": 5.131813049316406, "learning_rate": 2.5785695135392775e-06, "loss": 0.1183, "step": 497925 }, { "epoch": 4.9, "grad_norm": 4.715885162353516, "learning_rate": 2.5784453910850287e-06, "loss": 0.0886, "step": 497950 }, { "epoch": 4.9, "grad_norm": 16.0768985748291, "learning_rate": 2.578321268630781e-06, "loss": 0.1027, "step": 497975 }, { "epoch": 4.9, "grad_norm": 10.903060913085938, "learning_rate": 2.578197146176532e-06, "loss": 0.07, "step": 498000 }, { "epoch": 4.9, "grad_norm": 10.639378547668457, "learning_rate": 2.5780730237222832e-06, "loss": 0.1075, "step": 498025 }, { "epoch": 4.9, "grad_norm": 5.702491760253906, "learning_rate": 2.5779489012680353e-06, "loss": 0.0779, "step": 498050 }, { "epoch": 4.9, "grad_norm": 15.546380996704102, "learning_rate": 2.5778247788137865e-06, "loss": 0.0918, "step": 498075 }, { "epoch": 4.9, "grad_norm": 7.253015518188477, "learning_rate": 2.577700656359538e-06, "loss": 0.0426, "step": 498100 }, { "epoch": 4.9, "grad_norm": 10.323698043823242, "learning_rate": 2.5775765339052898e-06, "loss": 0.104, "step": 498125 }, { "epoch": 4.9, "grad_norm": 0.909375011920929, "learning_rate": 2.5774524114510414e-06, "loss": 0.0702, "step": 498150 }, { "epoch": 4.9, "grad_norm": 12.283987045288086, "learning_rate": 2.5773282889967926e-06, "loss": 0.1186, "step": 498175 }, { "epoch": 4.9, "grad_norm": 0.9846435785293579, "learning_rate": 2.5772041665425447e-06, "loss": 0.0891, "step": 498200 }, { "epoch": 4.9, "grad_norm": 8.382851600646973, "learning_rate": 2.577080044088296e-06, "loss": 0.1345, "step": 498225 }, { "epoch": 4.9, "grad_norm": 6.730684757232666, "learning_rate": 2.576955921634047e-06, "loss": 0.0812, "step": 498250 }, { "epoch": 4.9, "grad_norm": 16.738950729370117, "learning_rate": 2.576831799179799e-06, "loss": 0.0959, "step": 498275 }, { "epoch": 4.9, "grad_norm": 1.6825358867645264, "learning_rate": 2.5767076767255504e-06, "loss": 0.0725, "step": 498300 }, { "epoch": 4.9, "grad_norm": 7.9722771644592285, "learning_rate": 2.576583554271302e-06, "loss": 0.1081, "step": 498325 }, { "epoch": 4.9, "grad_norm": 1.0528219938278198, "learning_rate": 2.5764594318170536e-06, "loss": 0.0805, "step": 498350 }, { "epoch": 4.9, "grad_norm": 6.415780544281006, "learning_rate": 2.5763353093628053e-06, "loss": 0.1327, "step": 498375 }, { "epoch": 4.9, "grad_norm": 9.262669563293457, "learning_rate": 2.5762111869085565e-06, "loss": 0.0656, "step": 498400 }, { "epoch": 4.9, "grad_norm": 5.969720840454102, "learning_rate": 2.5760870644543085e-06, "loss": 0.1043, "step": 498425 }, { "epoch": 4.9, "grad_norm": 2.8976361751556396, "learning_rate": 2.5759629420000597e-06, "loss": 0.0913, "step": 498450 }, { "epoch": 4.9, "grad_norm": 11.899158477783203, "learning_rate": 2.5758388195458114e-06, "loss": 0.1365, "step": 498475 }, { "epoch": 4.9, "grad_norm": 0.291053831577301, "learning_rate": 2.575714697091563e-06, "loss": 0.0773, "step": 498500 }, { "epoch": 4.9, "grad_norm": 12.482786178588867, "learning_rate": 2.5755905746373146e-06, "loss": 0.086, "step": 498525 }, { "epoch": 4.9, "grad_norm": 8.404814720153809, "learning_rate": 2.575466452183066e-06, "loss": 0.0741, "step": 498550 }, { "epoch": 4.9, "grad_norm": 7.454885482788086, "learning_rate": 2.5753423297288175e-06, "loss": 0.1076, "step": 498575 }, { "epoch": 4.9, "grad_norm": 5.4201130867004395, "learning_rate": 2.575218207274569e-06, "loss": 0.0708, "step": 498600 }, { "epoch": 4.9, "grad_norm": 23.6652889251709, "learning_rate": 2.5750940848203203e-06, "loss": 0.0997, "step": 498625 }, { "epoch": 4.9, "grad_norm": 7.084901332855225, "learning_rate": 2.5749699623660724e-06, "loss": 0.0935, "step": 498650 }, { "epoch": 4.9, "grad_norm": 16.064096450805664, "learning_rate": 2.5748458399118236e-06, "loss": 0.1265, "step": 498675 }, { "epoch": 4.9, "grad_norm": 5.859637260437012, "learning_rate": 2.5747217174575752e-06, "loss": 0.0872, "step": 498700 }, { "epoch": 4.9, "grad_norm": 11.055614471435547, "learning_rate": 2.574597595003327e-06, "loss": 0.1324, "step": 498725 }, { "epoch": 4.9, "grad_norm": 5.133054256439209, "learning_rate": 2.5744734725490785e-06, "loss": 0.0795, "step": 498750 }, { "epoch": 4.9, "grad_norm": 14.682321548461914, "learning_rate": 2.5743493500948297e-06, "loss": 0.1036, "step": 498775 }, { "epoch": 4.9, "grad_norm": 13.250938415527344, "learning_rate": 2.574225227640581e-06, "loss": 0.0802, "step": 498800 }, { "epoch": 4.9, "grad_norm": 11.256823539733887, "learning_rate": 2.574101105186333e-06, "loss": 0.1015, "step": 498825 }, { "epoch": 4.9, "grad_norm": 1.1829197406768799, "learning_rate": 2.573976982732084e-06, "loss": 0.0919, "step": 498850 }, { "epoch": 4.91, "grad_norm": 17.92029571533203, "learning_rate": 2.573852860277836e-06, "loss": 0.0877, "step": 498875 }, { "epoch": 4.91, "grad_norm": 6.1915082931518555, "learning_rate": 2.5737287378235875e-06, "loss": 0.0882, "step": 498900 }, { "epoch": 4.91, "grad_norm": 12.19522762298584, "learning_rate": 2.573604615369339e-06, "loss": 0.1011, "step": 498925 }, { "epoch": 4.91, "grad_norm": 7.765982627868652, "learning_rate": 2.5734854578132606e-06, "loss": 0.0707, "step": 498950 }, { "epoch": 4.91, "grad_norm": 15.654764175415039, "learning_rate": 2.573361335359012e-06, "loss": 0.1029, "step": 498975 }, { "epoch": 4.91, "grad_norm": 0.4811989665031433, "learning_rate": 2.5732372129047634e-06, "loss": 0.0951, "step": 499000 }, { "epoch": 4.91, "grad_norm": 14.362791061401367, "learning_rate": 2.5731130904505155e-06, "loss": 0.093, "step": 499025 }, { "epoch": 4.91, "grad_norm": 13.397397994995117, "learning_rate": 2.5729889679962667e-06, "loss": 0.0894, "step": 499050 }, { "epoch": 4.91, "grad_norm": 9.828377723693848, "learning_rate": 2.572864845542018e-06, "loss": 0.114, "step": 499075 }, { "epoch": 4.91, "grad_norm": 4.428963661193848, "learning_rate": 2.57274072308777e-06, "loss": 0.0749, "step": 499100 }, { "epoch": 4.91, "grad_norm": 14.092964172363281, "learning_rate": 2.572616600633521e-06, "loss": 0.1142, "step": 499125 }, { "epoch": 4.91, "grad_norm": 4.097193241119385, "learning_rate": 2.572492478179273e-06, "loss": 0.0552, "step": 499150 }, { "epoch": 4.91, "grad_norm": 13.318451881408691, "learning_rate": 2.5723683557250244e-06, "loss": 0.0943, "step": 499175 }, { "epoch": 4.91, "grad_norm": 4.689572811126709, "learning_rate": 2.572244233270776e-06, "loss": 0.0851, "step": 499200 }, { "epoch": 4.91, "grad_norm": 14.982301712036133, "learning_rate": 2.5721201108165273e-06, "loss": 0.1263, "step": 499225 }, { "epoch": 4.91, "grad_norm": 5.985453128814697, "learning_rate": 2.5719959883622793e-06, "loss": 0.0672, "step": 499250 }, { "epoch": 4.91, "grad_norm": 16.6020565032959, "learning_rate": 2.5718718659080305e-06, "loss": 0.1109, "step": 499275 }, { "epoch": 4.91, "grad_norm": 8.313044548034668, "learning_rate": 2.5717477434537818e-06, "loss": 0.0889, "step": 499300 }, { "epoch": 4.91, "grad_norm": 8.96241283416748, "learning_rate": 2.571623620999534e-06, "loss": 0.091, "step": 499325 }, { "epoch": 4.91, "grad_norm": 0.08117540180683136, "learning_rate": 2.571499498545285e-06, "loss": 0.0825, "step": 499350 }, { "epoch": 4.91, "grad_norm": 4.196479797363281, "learning_rate": 2.5713753760910367e-06, "loss": 0.0922, "step": 499375 }, { "epoch": 4.91, "grad_norm": 5.45128059387207, "learning_rate": 2.571251253636788e-06, "loss": 0.0689, "step": 499400 }, { "epoch": 4.91, "grad_norm": 9.205389976501465, "learning_rate": 2.57112713118254e-06, "loss": 0.0978, "step": 499425 }, { "epoch": 4.91, "grad_norm": 7.127246856689453, "learning_rate": 2.571003008728291e-06, "loss": 0.0632, "step": 499450 }, { "epoch": 4.91, "grad_norm": 16.463916778564453, "learning_rate": 2.5708788862740424e-06, "loss": 0.1152, "step": 499475 }, { "epoch": 4.91, "grad_norm": 11.418049812316895, "learning_rate": 2.5707547638197944e-06, "loss": 0.0745, "step": 499500 }, { "epoch": 4.91, "grad_norm": 21.332796096801758, "learning_rate": 2.5706306413655456e-06, "loss": 0.1139, "step": 499525 }, { "epoch": 4.91, "grad_norm": 5.1650261878967285, "learning_rate": 2.5705065189112973e-06, "loss": 0.0776, "step": 499550 }, { "epoch": 4.91, "grad_norm": 10.593050956726074, "learning_rate": 2.570382396457049e-06, "loss": 0.1287, "step": 499575 }, { "epoch": 4.91, "grad_norm": 0.7969070672988892, "learning_rate": 2.5702582740028005e-06, "loss": 0.0596, "step": 499600 }, { "epoch": 4.91, "grad_norm": 8.923689842224121, "learning_rate": 2.5701341515485517e-06, "loss": 0.0709, "step": 499625 }, { "epoch": 4.91, "grad_norm": 5.134480953216553, "learning_rate": 2.570010029094304e-06, "loss": 0.084, "step": 499650 }, { "epoch": 4.91, "grad_norm": 4.631814956665039, "learning_rate": 2.569885906640055e-06, "loss": 0.0895, "step": 499675 }, { "epoch": 4.91, "grad_norm": 10.219529151916504, "learning_rate": 2.5697617841858062e-06, "loss": 0.0741, "step": 499700 }, { "epoch": 4.91, "grad_norm": 17.996061325073242, "learning_rate": 2.5696376617315583e-06, "loss": 0.1256, "step": 499725 }, { "epoch": 4.91, "grad_norm": 7.7513298988342285, "learning_rate": 2.5695135392773095e-06, "loss": 0.0962, "step": 499750 }, { "epoch": 4.91, "grad_norm": 7.681100845336914, "learning_rate": 2.569389416823061e-06, "loss": 0.109, "step": 499775 }, { "epoch": 4.91, "grad_norm": 6.64680814743042, "learning_rate": 2.5692652943688128e-06, "loss": 0.0968, "step": 499800 }, { "epoch": 4.91, "grad_norm": 9.203245162963867, "learning_rate": 2.5691411719145644e-06, "loss": 0.1095, "step": 499825 }, { "epoch": 4.91, "grad_norm": 11.24288558959961, "learning_rate": 2.5690170494603156e-06, "loss": 0.0824, "step": 499850 }, { "epoch": 4.91, "grad_norm": 10.750167846679688, "learning_rate": 2.5688929270060677e-06, "loss": 0.1056, "step": 499875 }, { "epoch": 4.92, "grad_norm": 2.699314594268799, "learning_rate": 2.568768804551819e-06, "loss": 0.0672, "step": 499900 }, { "epoch": 4.92, "grad_norm": 17.5266170501709, "learning_rate": 2.56864468209757e-06, "loss": 0.104, "step": 499925 }, { "epoch": 4.92, "grad_norm": 3.265422821044922, "learning_rate": 2.568520559643322e-06, "loss": 0.0798, "step": 499950 }, { "epoch": 4.92, "grad_norm": 18.570144653320312, "learning_rate": 2.5683964371890734e-06, "loss": 0.1155, "step": 499975 }, { "epoch": 4.92, "grad_norm": 5.532454013824463, "learning_rate": 2.568272314734825e-06, "loss": 0.0758, "step": 500000 }, { "epoch": 4.92, "eval_loss": 0.7359766364097595, "eval_runtime": 6142.8671, "eval_samples_per_second": 1.541, "eval_steps_per_second": 0.193, "eval_wer": 0.11530290349578318, "step": 500000 }, { "epoch": 4.92, "grad_norm": 20.91490936279297, "learning_rate": 2.5681481922805766e-06, "loss": 0.1027, "step": 500025 }, { "epoch": 4.92, "grad_norm": 5.972024917602539, "learning_rate": 2.5680240698263283e-06, "loss": 0.0927, "step": 500050 }, { "epoch": 4.92, "grad_norm": 11.078657150268555, "learning_rate": 2.5678999473720795e-06, "loss": 0.1082, "step": 500075 }, { "epoch": 4.92, "grad_norm": 0.018568312749266624, "learning_rate": 2.5677758249178315e-06, "loss": 0.0802, "step": 500100 }, { "epoch": 4.92, "grad_norm": 14.934954643249512, "learning_rate": 2.5676517024635827e-06, "loss": 0.1128, "step": 500125 }, { "epoch": 4.92, "grad_norm": 7.536742210388184, "learning_rate": 2.567527580009334e-06, "loss": 0.0772, "step": 500150 }, { "epoch": 4.92, "grad_norm": 13.038228034973145, "learning_rate": 2.567403457555086e-06, "loss": 0.1017, "step": 500175 }, { "epoch": 4.92, "grad_norm": 1.9117374420166016, "learning_rate": 2.5672793351008372e-06, "loss": 0.0538, "step": 500200 }, { "epoch": 4.92, "grad_norm": 16.588451385498047, "learning_rate": 2.567155212646589e-06, "loss": 0.1107, "step": 500225 }, { "epoch": 4.92, "grad_norm": 7.355580806732178, "learning_rate": 2.56703109019234e-06, "loss": 0.0825, "step": 500250 }, { "epoch": 4.92, "grad_norm": 15.59213638305664, "learning_rate": 2.566906967738092e-06, "loss": 0.1092, "step": 500275 }, { "epoch": 4.92, "grad_norm": 3.547325849533081, "learning_rate": 2.5667828452838433e-06, "loss": 0.0784, "step": 500300 }, { "epoch": 4.92, "grad_norm": 16.843416213989258, "learning_rate": 2.5666587228295946e-06, "loss": 0.1087, "step": 500325 }, { "epoch": 4.92, "grad_norm": 5.430677890777588, "learning_rate": 2.5665346003753466e-06, "loss": 0.0515, "step": 500350 }, { "epoch": 4.92, "grad_norm": 11.724499702453613, "learning_rate": 2.566410477921098e-06, "loss": 0.1133, "step": 500375 }, { "epoch": 4.92, "grad_norm": 0.4651775658130646, "learning_rate": 2.5662863554668495e-06, "loss": 0.0788, "step": 500400 }, { "epoch": 4.92, "grad_norm": 13.987895965576172, "learning_rate": 2.566162233012601e-06, "loss": 0.1102, "step": 500425 }, { "epoch": 4.92, "grad_norm": 0.806754469871521, "learning_rate": 2.5660381105583527e-06, "loss": 0.07, "step": 500450 }, { "epoch": 4.92, "grad_norm": 9.25757122039795, "learning_rate": 2.565913988104104e-06, "loss": 0.107, "step": 500475 }, { "epoch": 4.92, "grad_norm": 19.468935012817383, "learning_rate": 2.565789865649856e-06, "loss": 0.0723, "step": 500500 }, { "epoch": 4.92, "grad_norm": 6.075979232788086, "learning_rate": 2.5656657431956072e-06, "loss": 0.0944, "step": 500525 }, { "epoch": 4.92, "grad_norm": 3.7837631702423096, "learning_rate": 2.5655416207413584e-06, "loss": 0.0804, "step": 500550 }, { "epoch": 4.92, "grad_norm": 15.119010925292969, "learning_rate": 2.5654174982871105e-06, "loss": 0.1008, "step": 500575 }, { "epoch": 4.92, "grad_norm": 6.714234352111816, "learning_rate": 2.5652933758328617e-06, "loss": 0.0692, "step": 500600 }, { "epoch": 4.92, "grad_norm": 10.705604553222656, "learning_rate": 2.5651692533786133e-06, "loss": 0.1226, "step": 500625 }, { "epoch": 4.92, "grad_norm": 8.412518501281738, "learning_rate": 2.565045130924365e-06, "loss": 0.083, "step": 500650 }, { "epoch": 4.92, "grad_norm": 10.723499298095703, "learning_rate": 2.5649210084701166e-06, "loss": 0.0904, "step": 500675 }, { "epoch": 4.92, "grad_norm": 8.463471412658691, "learning_rate": 2.564796886015868e-06, "loss": 0.0726, "step": 500700 }, { "epoch": 4.92, "grad_norm": 13.617188453674316, "learning_rate": 2.56467276356162e-06, "loss": 0.1176, "step": 500725 }, { "epoch": 4.92, "grad_norm": 2.635298728942871, "learning_rate": 2.564548641107371e-06, "loss": 0.0679, "step": 500750 }, { "epoch": 4.92, "grad_norm": 14.960779190063477, "learning_rate": 2.5644245186531223e-06, "loss": 0.1185, "step": 500775 }, { "epoch": 4.92, "grad_norm": 0.25072363018989563, "learning_rate": 2.5643003961988743e-06, "loss": 0.0584, "step": 500800 }, { "epoch": 4.92, "grad_norm": 10.198226928710938, "learning_rate": 2.5641762737446256e-06, "loss": 0.0903, "step": 500825 }, { "epoch": 4.92, "grad_norm": 1.1589144468307495, "learning_rate": 2.564052151290377e-06, "loss": 0.086, "step": 500850 }, { "epoch": 4.92, "grad_norm": 19.938310623168945, "learning_rate": 2.563928028836129e-06, "loss": 0.1155, "step": 500875 }, { "epoch": 4.92, "grad_norm": 10.29901123046875, "learning_rate": 2.5638039063818805e-06, "loss": 0.1186, "step": 500900 }, { "epoch": 4.93, "grad_norm": 12.922367095947266, "learning_rate": 2.5636797839276317e-06, "loss": 0.1054, "step": 500925 }, { "epoch": 4.93, "grad_norm": 8.916671752929688, "learning_rate": 2.5635556614733837e-06, "loss": 0.0695, "step": 500950 }, { "epoch": 4.93, "grad_norm": 9.72850227355957, "learning_rate": 2.563431539019135e-06, "loss": 0.0966, "step": 500975 }, { "epoch": 4.93, "grad_norm": 2.6503124237060547, "learning_rate": 2.563307416564886e-06, "loss": 0.08, "step": 501000 }, { "epoch": 4.93, "grad_norm": 19.51009750366211, "learning_rate": 2.5631832941106382e-06, "loss": 0.1342, "step": 501025 }, { "epoch": 4.93, "grad_norm": 6.769705295562744, "learning_rate": 2.5630591716563894e-06, "loss": 0.0649, "step": 501050 }, { "epoch": 4.93, "grad_norm": 10.039315223693848, "learning_rate": 2.562935049202141e-06, "loss": 0.0999, "step": 501075 }, { "epoch": 4.93, "grad_norm": 0.8807147741317749, "learning_rate": 2.5628109267478923e-06, "loss": 0.0754, "step": 501100 }, { "epoch": 4.93, "grad_norm": 8.641931533813477, "learning_rate": 2.5626868042936443e-06, "loss": 0.1025, "step": 501125 }, { "epoch": 4.93, "grad_norm": 7.443461894989014, "learning_rate": 2.5625626818393955e-06, "loss": 0.0989, "step": 501150 }, { "epoch": 4.93, "grad_norm": 8.299935340881348, "learning_rate": 2.5624385593851468e-06, "loss": 0.0822, "step": 501175 }, { "epoch": 4.93, "grad_norm": 8.795333862304688, "learning_rate": 2.562314436930899e-06, "loss": 0.0847, "step": 501200 }, { "epoch": 4.93, "grad_norm": 11.528221130371094, "learning_rate": 2.56219031447665e-06, "loss": 0.0942, "step": 501225 }, { "epoch": 4.93, "grad_norm": 5.1661906242370605, "learning_rate": 2.562071156920572e-06, "loss": 0.0882, "step": 501250 }, { "epoch": 4.93, "grad_norm": 17.206281661987305, "learning_rate": 2.561947034466323e-06, "loss": 0.1135, "step": 501275 }, { "epoch": 4.93, "grad_norm": 11.030579566955566, "learning_rate": 2.5618229120120747e-06, "loss": 0.0833, "step": 501300 }, { "epoch": 4.93, "grad_norm": 10.079895973205566, "learning_rate": 2.5616987895578264e-06, "loss": 0.1463, "step": 501325 }, { "epoch": 4.93, "grad_norm": 7.098401069641113, "learning_rate": 2.561574667103578e-06, "loss": 0.0778, "step": 501350 }, { "epoch": 4.93, "grad_norm": 17.356592178344727, "learning_rate": 2.5614505446493292e-06, "loss": 0.0765, "step": 501375 }, { "epoch": 4.93, "grad_norm": 8.002222061157227, "learning_rate": 2.5613264221950813e-06, "loss": 0.0748, "step": 501400 }, { "epoch": 4.93, "grad_norm": 13.10833740234375, "learning_rate": 2.5612072646390023e-06, "loss": 0.0931, "step": 501425 }, { "epoch": 4.93, "grad_norm": 16.834081649780273, "learning_rate": 2.5610831421847535e-06, "loss": 0.102, "step": 501450 }, { "epoch": 4.93, "grad_norm": 17.149974822998047, "learning_rate": 2.5609590197305056e-06, "loss": 0.0793, "step": 501475 }, { "epoch": 4.93, "grad_norm": 7.455080986022949, "learning_rate": 2.560834897276257e-06, "loss": 0.0754, "step": 501500 }, { "epoch": 4.93, "grad_norm": 16.85595703125, "learning_rate": 2.5607107748220084e-06, "loss": 0.1078, "step": 501525 }, { "epoch": 4.93, "grad_norm": 0.17744608223438263, "learning_rate": 2.56058665236776e-06, "loss": 0.0974, "step": 501550 }, { "epoch": 4.93, "grad_norm": 14.76049518585205, "learning_rate": 2.5604625299135117e-06, "loss": 0.1095, "step": 501575 }, { "epoch": 4.93, "grad_norm": 2.3904290199279785, "learning_rate": 2.560338407459263e-06, "loss": 0.0795, "step": 501600 }, { "epoch": 4.93, "grad_norm": 15.5628080368042, "learning_rate": 2.560214285005015e-06, "loss": 0.1293, "step": 501625 }, { "epoch": 4.93, "grad_norm": 3.5962960720062256, "learning_rate": 2.560090162550766e-06, "loss": 0.0879, "step": 501650 }, { "epoch": 4.93, "grad_norm": 15.598039627075195, "learning_rate": 2.559966040096518e-06, "loss": 0.1056, "step": 501675 }, { "epoch": 4.93, "grad_norm": 7.371152400970459, "learning_rate": 2.5598419176422695e-06, "loss": 0.075, "step": 501700 }, { "epoch": 4.93, "grad_norm": 12.969439506530762, "learning_rate": 2.559717795188021e-06, "loss": 0.0924, "step": 501725 }, { "epoch": 4.93, "grad_norm": 1.5305458307266235, "learning_rate": 2.5595936727337723e-06, "loss": 0.0834, "step": 501750 }, { "epoch": 4.93, "grad_norm": 15.558770179748535, "learning_rate": 2.5594695502795244e-06, "loss": 0.1206, "step": 501775 }, { "epoch": 4.93, "grad_norm": 4.775977611541748, "learning_rate": 2.5593454278252756e-06, "loss": 0.0804, "step": 501800 }, { "epoch": 4.93, "grad_norm": 12.435940742492676, "learning_rate": 2.5592213053710268e-06, "loss": 0.0984, "step": 501825 }, { "epoch": 4.93, "grad_norm": 0.21698309481143951, "learning_rate": 2.559097182916779e-06, "loss": 0.0722, "step": 501850 }, { "epoch": 4.93, "grad_norm": 14.393207550048828, "learning_rate": 2.55897306046253e-06, "loss": 0.1035, "step": 501875 }, { "epoch": 4.93, "grad_norm": 6.088675498962402, "learning_rate": 2.5588489380082817e-06, "loss": 0.0932, "step": 501900 }, { "epoch": 4.94, "grad_norm": 14.898679733276367, "learning_rate": 2.5587248155540333e-06, "loss": 0.1121, "step": 501925 }, { "epoch": 4.94, "grad_norm": 2.6713240146636963, "learning_rate": 2.558600693099785e-06, "loss": 0.0609, "step": 501950 }, { "epoch": 4.94, "grad_norm": 14.131621360778809, "learning_rate": 2.558476570645536e-06, "loss": 0.101, "step": 501975 }, { "epoch": 4.94, "grad_norm": 8.72896957397461, "learning_rate": 2.5583524481912882e-06, "loss": 0.0768, "step": 502000 }, { "epoch": 4.94, "grad_norm": 16.548755645751953, "learning_rate": 2.5582283257370394e-06, "loss": 0.1019, "step": 502025 }, { "epoch": 4.94, "grad_norm": 9.42222785949707, "learning_rate": 2.5581042032827906e-06, "loss": 0.0667, "step": 502050 }, { "epoch": 4.94, "grad_norm": 15.471997261047363, "learning_rate": 2.5579800808285427e-06, "loss": 0.1034, "step": 502075 }, { "epoch": 4.94, "grad_norm": 2.383708953857422, "learning_rate": 2.557855958374294e-06, "loss": 0.0761, "step": 502100 }, { "epoch": 4.94, "grad_norm": 7.045681476593018, "learning_rate": 2.5577318359200456e-06, "loss": 0.1049, "step": 502125 }, { "epoch": 4.94, "grad_norm": 0.7912421822547913, "learning_rate": 2.557607713465797e-06, "loss": 0.0609, "step": 502150 }, { "epoch": 4.94, "grad_norm": 22.57560157775879, "learning_rate": 2.557483591011549e-06, "loss": 0.1198, "step": 502175 }, { "epoch": 4.94, "grad_norm": 11.120296478271484, "learning_rate": 2.5573594685573e-06, "loss": 0.0747, "step": 502200 }, { "epoch": 4.94, "grad_norm": 13.227070808410645, "learning_rate": 2.557235346103052e-06, "loss": 0.1164, "step": 502225 }, { "epoch": 4.94, "grad_norm": 5.457668304443359, "learning_rate": 2.5571112236488033e-06, "loss": 0.0944, "step": 502250 }, { "epoch": 4.94, "grad_norm": 16.09123992919922, "learning_rate": 2.5569871011945545e-06, "loss": 0.1488, "step": 502275 }, { "epoch": 4.94, "grad_norm": 3.636164426803589, "learning_rate": 2.556862978740306e-06, "loss": 0.0737, "step": 502300 }, { "epoch": 4.94, "grad_norm": 11.863370895385742, "learning_rate": 2.5567388562860578e-06, "loss": 0.128, "step": 502325 }, { "epoch": 4.94, "grad_norm": 6.046020030975342, "learning_rate": 2.5566147338318094e-06, "loss": 0.0619, "step": 502350 }, { "epoch": 4.94, "grad_norm": 15.893251419067383, "learning_rate": 2.5564906113775606e-06, "loss": 0.1052, "step": 502375 }, { "epoch": 4.94, "grad_norm": 5.250802993774414, "learning_rate": 2.5563664889233127e-06, "loss": 0.081, "step": 502400 }, { "epoch": 4.94, "grad_norm": 10.1185884475708, "learning_rate": 2.556242366469064e-06, "loss": 0.0997, "step": 502425 }, { "epoch": 4.94, "grad_norm": 5.611918926239014, "learning_rate": 2.556118244014815e-06, "loss": 0.0717, "step": 502450 }, { "epoch": 4.94, "grad_norm": 16.935657501220703, "learning_rate": 2.555994121560567e-06, "loss": 0.0966, "step": 502475 }, { "epoch": 4.94, "grad_norm": 0.9015265107154846, "learning_rate": 2.5558699991063184e-06, "loss": 0.0713, "step": 502500 }, { "epoch": 4.94, "grad_norm": 10.837794303894043, "learning_rate": 2.55574587665207e-06, "loss": 0.0753, "step": 502525 }, { "epoch": 4.94, "grad_norm": 0.8711047172546387, "learning_rate": 2.5556217541978217e-06, "loss": 0.0883, "step": 502550 }, { "epoch": 4.94, "grad_norm": 19.547954559326172, "learning_rate": 2.5554976317435733e-06, "loss": 0.0934, "step": 502575 }, { "epoch": 4.94, "grad_norm": 11.876042366027832, "learning_rate": 2.5553735092893245e-06, "loss": 0.0707, "step": 502600 }, { "epoch": 4.94, "grad_norm": 12.449907302856445, "learning_rate": 2.5552493868350766e-06, "loss": 0.1064, "step": 502625 }, { "epoch": 4.94, "grad_norm": 7.114109992980957, "learning_rate": 2.5551252643808278e-06, "loss": 0.0705, "step": 502650 }, { "epoch": 4.94, "grad_norm": 9.26966667175293, "learning_rate": 2.555001141926579e-06, "loss": 0.1254, "step": 502675 }, { "epoch": 4.94, "grad_norm": 4.070488452911377, "learning_rate": 2.554877019472331e-06, "loss": 0.0917, "step": 502700 }, { "epoch": 4.94, "grad_norm": 13.788753509521484, "learning_rate": 2.5547528970180823e-06, "loss": 0.0953, "step": 502725 }, { "epoch": 4.94, "grad_norm": 9.628828048706055, "learning_rate": 2.554628774563834e-06, "loss": 0.0697, "step": 502750 }, { "epoch": 4.94, "grad_norm": 16.579853057861328, "learning_rate": 2.5545046521095855e-06, "loss": 0.106, "step": 502775 }, { "epoch": 4.94, "grad_norm": 8.33928394317627, "learning_rate": 2.554380529655337e-06, "loss": 0.0651, "step": 502800 }, { "epoch": 4.94, "grad_norm": 16.52341079711914, "learning_rate": 2.5542564072010884e-06, "loss": 0.1113, "step": 502825 }, { "epoch": 4.94, "grad_norm": 0.8288333415985107, "learning_rate": 2.5541322847468404e-06, "loss": 0.0749, "step": 502850 }, { "epoch": 4.94, "grad_norm": 16.462783813476562, "learning_rate": 2.5540081622925916e-06, "loss": 0.1696, "step": 502875 }, { "epoch": 4.94, "grad_norm": 3.908740997314453, "learning_rate": 2.553884039838343e-06, "loss": 0.0755, "step": 502900 }, { "epoch": 4.94, "grad_norm": 8.53416919708252, "learning_rate": 2.553759917384095e-06, "loss": 0.094, "step": 502925 }, { "epoch": 4.95, "grad_norm": 4.277796745300293, "learning_rate": 2.553635794929846e-06, "loss": 0.0862, "step": 502950 }, { "epoch": 4.95, "grad_norm": 25.179380416870117, "learning_rate": 2.5535116724755978e-06, "loss": 0.1226, "step": 502975 }, { "epoch": 4.95, "grad_norm": 8.068902969360352, "learning_rate": 2.5533875500213494e-06, "loss": 0.0912, "step": 503000 }, { "epoch": 4.95, "grad_norm": 12.174849510192871, "learning_rate": 2.553263427567101e-06, "loss": 0.1334, "step": 503025 }, { "epoch": 4.95, "grad_norm": 9.167900085449219, "learning_rate": 2.5531393051128522e-06, "loss": 0.1087, "step": 503050 }, { "epoch": 4.95, "grad_norm": 3.8082611560821533, "learning_rate": 2.5530151826586043e-06, "loss": 0.0935, "step": 503075 }, { "epoch": 4.95, "grad_norm": 3.9866983890533447, "learning_rate": 2.5528910602043555e-06, "loss": 0.082, "step": 503100 }, { "epoch": 4.95, "grad_norm": 14.37166690826416, "learning_rate": 2.5527669377501067e-06, "loss": 0.1217, "step": 503125 }, { "epoch": 4.95, "grad_norm": 0.7375143766403198, "learning_rate": 2.5526428152958584e-06, "loss": 0.0599, "step": 503150 }, { "epoch": 4.95, "grad_norm": 12.205242156982422, "learning_rate": 2.55251869284161e-06, "loss": 0.0928, "step": 503175 }, { "epoch": 4.95, "grad_norm": 0.2951927185058594, "learning_rate": 2.5523945703873616e-06, "loss": 0.0662, "step": 503200 }, { "epoch": 4.95, "grad_norm": 20.298933029174805, "learning_rate": 2.552270447933113e-06, "loss": 0.1219, "step": 503225 }, { "epoch": 4.95, "grad_norm": 11.603089332580566, "learning_rate": 2.552146325478865e-06, "loss": 0.0882, "step": 503250 }, { "epoch": 4.95, "grad_norm": 8.049714088439941, "learning_rate": 2.552022203024616e-06, "loss": 0.0976, "step": 503275 }, { "epoch": 4.95, "grad_norm": 0.46827059984207153, "learning_rate": 2.5518980805703673e-06, "loss": 0.06, "step": 503300 }, { "epoch": 4.95, "grad_norm": 11.932405471801758, "learning_rate": 2.5517739581161194e-06, "loss": 0.1186, "step": 503325 }, { "epoch": 4.95, "grad_norm": 4.650352954864502, "learning_rate": 2.5516498356618706e-06, "loss": 0.0641, "step": 503350 }, { "epoch": 4.95, "grad_norm": 21.73917007446289, "learning_rate": 2.5515257132076222e-06, "loss": 0.1014, "step": 503375 }, { "epoch": 4.95, "grad_norm": 2.5566866397857666, "learning_rate": 2.551401590753374e-06, "loss": 0.0598, "step": 503400 }, { "epoch": 4.95, "grad_norm": 13.057016372680664, "learning_rate": 2.5512774682991255e-06, "loss": 0.1019, "step": 503425 }, { "epoch": 4.95, "grad_norm": 5.481081485748291, "learning_rate": 2.5511533458448767e-06, "loss": 0.0797, "step": 503450 }, { "epoch": 4.95, "grad_norm": 10.470135688781738, "learning_rate": 2.5510292233906288e-06, "loss": 0.0884, "step": 503475 }, { "epoch": 4.95, "grad_norm": 6.652272701263428, "learning_rate": 2.55090510093638e-06, "loss": 0.0648, "step": 503500 }, { "epoch": 4.95, "grad_norm": 5.085610866546631, "learning_rate": 2.550780978482131e-06, "loss": 0.085, "step": 503525 }, { "epoch": 4.95, "grad_norm": 3.6106204986572266, "learning_rate": 2.5506568560278832e-06, "loss": 0.0867, "step": 503550 }, { "epoch": 4.95, "grad_norm": 16.629661560058594, "learning_rate": 2.5505327335736345e-06, "loss": 0.1222, "step": 503575 }, { "epoch": 4.95, "grad_norm": 11.641329765319824, "learning_rate": 2.550408611119386e-06, "loss": 0.1048, "step": 503600 }, { "epoch": 4.95, "grad_norm": 23.224119186401367, "learning_rate": 2.5502844886651377e-06, "loss": 0.0966, "step": 503625 }, { "epoch": 4.95, "grad_norm": 0.15340253710746765, "learning_rate": 2.5501603662108894e-06, "loss": 0.0652, "step": 503650 }, { "epoch": 4.95, "grad_norm": 14.483274459838867, "learning_rate": 2.5500362437566406e-06, "loss": 0.1061, "step": 503675 }, { "epoch": 4.95, "grad_norm": 6.449910640716553, "learning_rate": 2.5499121213023926e-06, "loss": 0.1113, "step": 503700 }, { "epoch": 4.95, "grad_norm": 9.406927108764648, "learning_rate": 2.549787998848144e-06, "loss": 0.1097, "step": 503725 }, { "epoch": 4.95, "grad_norm": 6.929140567779541, "learning_rate": 2.549663876393895e-06, "loss": 0.0879, "step": 503750 }, { "epoch": 4.95, "grad_norm": 29.271190643310547, "learning_rate": 2.549539753939647e-06, "loss": 0.1223, "step": 503775 }, { "epoch": 4.95, "grad_norm": 2.8695342540740967, "learning_rate": 2.5494156314853983e-06, "loss": 0.086, "step": 503800 }, { "epoch": 4.95, "grad_norm": 17.547435760498047, "learning_rate": 2.54929150903115e-06, "loss": 0.0962, "step": 503825 }, { "epoch": 4.95, "grad_norm": 7.9503679275512695, "learning_rate": 2.5491673865769016e-06, "loss": 0.0829, "step": 503850 }, { "epoch": 4.95, "grad_norm": 15.757586479187012, "learning_rate": 2.5490432641226532e-06, "loss": 0.1145, "step": 503875 }, { "epoch": 4.95, "grad_norm": 0.3155111074447632, "learning_rate": 2.5489191416684044e-06, "loss": 0.0797, "step": 503900 }, { "epoch": 4.95, "grad_norm": 12.874090194702148, "learning_rate": 2.5487950192141565e-06, "loss": 0.0784, "step": 503925 }, { "epoch": 4.95, "grad_norm": 3.7281649112701416, "learning_rate": 2.5486708967599077e-06, "loss": 0.098, "step": 503950 }, { "epoch": 4.96, "grad_norm": 13.451494216918945, "learning_rate": 2.548546774305659e-06, "loss": 0.1114, "step": 503975 }, { "epoch": 4.96, "grad_norm": 6.775666236877441, "learning_rate": 2.5484226518514106e-06, "loss": 0.1022, "step": 504000 }, { "epoch": 4.96, "grad_norm": 9.529200553894043, "learning_rate": 2.548298529397162e-06, "loss": 0.1144, "step": 504025 }, { "epoch": 4.96, "grad_norm": 10.112116813659668, "learning_rate": 2.548174406942914e-06, "loss": 0.0674, "step": 504050 }, { "epoch": 4.96, "grad_norm": 9.090315818786621, "learning_rate": 2.548050284488665e-06, "loss": 0.1227, "step": 504075 }, { "epoch": 4.96, "grad_norm": 5.570369243621826, "learning_rate": 2.547926162034417e-06, "loss": 0.0636, "step": 504100 }, { "epoch": 4.96, "grad_norm": 10.30397891998291, "learning_rate": 2.5478020395801683e-06, "loss": 0.1094, "step": 504125 }, { "epoch": 4.96, "grad_norm": 1.96220064163208, "learning_rate": 2.5476779171259195e-06, "loss": 0.0836, "step": 504150 }, { "epoch": 4.96, "grad_norm": 12.640068054199219, "learning_rate": 2.5475537946716716e-06, "loss": 0.1207, "step": 504175 }, { "epoch": 4.96, "grad_norm": 3.381969928741455, "learning_rate": 2.5474296722174228e-06, "loss": 0.0697, "step": 504200 }, { "epoch": 4.96, "grad_norm": 16.444799423217773, "learning_rate": 2.5473055497631744e-06, "loss": 0.0805, "step": 504225 }, { "epoch": 4.96, "grad_norm": 1.2118159532546997, "learning_rate": 2.547181427308926e-06, "loss": 0.0834, "step": 504250 }, { "epoch": 4.96, "grad_norm": 12.101166725158691, "learning_rate": 2.5470573048546777e-06, "loss": 0.1069, "step": 504275 }, { "epoch": 4.96, "grad_norm": 8.22015380859375, "learning_rate": 2.546933182400429e-06, "loss": 0.086, "step": 504300 }, { "epoch": 4.96, "grad_norm": 14.224800109863281, "learning_rate": 2.546809059946181e-06, "loss": 0.1063, "step": 504325 }, { "epoch": 4.96, "grad_norm": 3.500034809112549, "learning_rate": 2.546684937491932e-06, "loss": 0.0583, "step": 504350 }, { "epoch": 4.96, "grad_norm": 10.370345115661621, "learning_rate": 2.546560815037684e-06, "loss": 0.1115, "step": 504375 }, { "epoch": 4.96, "grad_norm": 5.858828067779541, "learning_rate": 2.5464366925834354e-06, "loss": 0.0842, "step": 504400 }, { "epoch": 4.96, "grad_norm": 14.273818969726562, "learning_rate": 2.546312570129187e-06, "loss": 0.1578, "step": 504425 }, { "epoch": 4.96, "grad_norm": 4.85318660736084, "learning_rate": 2.5461884476749383e-06, "loss": 0.065, "step": 504450 }, { "epoch": 4.96, "grad_norm": 11.907971382141113, "learning_rate": 2.5460643252206903e-06, "loss": 0.1154, "step": 504475 }, { "epoch": 4.96, "grad_norm": 9.073554039001465, "learning_rate": 2.5459402027664416e-06, "loss": 0.078, "step": 504500 }, { "epoch": 4.96, "grad_norm": 17.019853591918945, "learning_rate": 2.5458160803121928e-06, "loss": 0.1055, "step": 504525 }, { "epoch": 4.96, "grad_norm": 2.9378912448883057, "learning_rate": 2.545691957857945e-06, "loss": 0.093, "step": 504550 }, { "epoch": 4.96, "grad_norm": 10.392600059509277, "learning_rate": 2.545567835403696e-06, "loss": 0.0853, "step": 504575 }, { "epoch": 4.96, "grad_norm": 5.191160678863525, "learning_rate": 2.5454437129494477e-06, "loss": 0.0732, "step": 504600 }, { "epoch": 4.96, "grad_norm": 14.025772094726562, "learning_rate": 2.5453195904951993e-06, "loss": 0.1029, "step": 504625 }, { "epoch": 4.96, "grad_norm": 2.766417980194092, "learning_rate": 2.545195468040951e-06, "loss": 0.0657, "step": 504650 }, { "epoch": 4.96, "grad_norm": 12.991345405578613, "learning_rate": 2.545071345586702e-06, "loss": 0.1119, "step": 504675 }, { "epoch": 4.96, "grad_norm": 2.309993267059326, "learning_rate": 2.5449472231324542e-06, "loss": 0.061, "step": 504700 }, { "epoch": 4.96, "grad_norm": 10.085092544555664, "learning_rate": 2.5448231006782054e-06, "loss": 0.1129, "step": 504725 }, { "epoch": 4.96, "grad_norm": 2.7343289852142334, "learning_rate": 2.5446989782239566e-06, "loss": 0.1005, "step": 504750 }, { "epoch": 4.96, "grad_norm": 4.618655204772949, "learning_rate": 2.5445748557697087e-06, "loss": 0.1332, "step": 504775 }, { "epoch": 4.96, "grad_norm": 4.04594087600708, "learning_rate": 2.54445073331546e-06, "loss": 0.0756, "step": 504800 }, { "epoch": 4.96, "grad_norm": 15.906050682067871, "learning_rate": 2.5443266108612115e-06, "loss": 0.1328, "step": 504825 }, { "epoch": 4.96, "grad_norm": 8.831079483032227, "learning_rate": 2.5442024884069628e-06, "loss": 0.0628, "step": 504850 }, { "epoch": 4.96, "grad_norm": 21.976375579833984, "learning_rate": 2.544078365952715e-06, "loss": 0.1141, "step": 504875 }, { "epoch": 4.96, "grad_norm": 1.2551662921905518, "learning_rate": 2.543954243498466e-06, "loss": 0.0787, "step": 504900 }, { "epoch": 4.96, "grad_norm": 8.905317306518555, "learning_rate": 2.5438301210442172e-06, "loss": 0.1075, "step": 504925 }, { "epoch": 4.96, "grad_norm": 11.632533073425293, "learning_rate": 2.5437059985899693e-06, "loss": 0.0825, "step": 504950 }, { "epoch": 4.96, "grad_norm": 10.628735542297363, "learning_rate": 2.5435818761357205e-06, "loss": 0.1065, "step": 504975 }, { "epoch": 4.97, "grad_norm": 3.882924795150757, "learning_rate": 2.543457753681472e-06, "loss": 0.0638, "step": 505000 }, { "epoch": 4.97, "grad_norm": 9.30850887298584, "learning_rate": 2.5433336312272238e-06, "loss": 0.0872, "step": 505025 }, { "epoch": 4.97, "grad_norm": 5.740863800048828, "learning_rate": 2.5432095087729754e-06, "loss": 0.0838, "step": 505050 }, { "epoch": 4.97, "grad_norm": 10.800726890563965, "learning_rate": 2.5430853863187266e-06, "loss": 0.1318, "step": 505075 }, { "epoch": 4.97, "grad_norm": 3.755669116973877, "learning_rate": 2.5429612638644787e-06, "loss": 0.0887, "step": 505100 }, { "epoch": 4.97, "grad_norm": 4.995760440826416, "learning_rate": 2.54283714141023e-06, "loss": 0.121, "step": 505125 }, { "epoch": 4.97, "grad_norm": 3.9497807025909424, "learning_rate": 2.542713018955981e-06, "loss": 0.0632, "step": 505150 }, { "epoch": 4.97, "grad_norm": 8.469690322875977, "learning_rate": 2.542588896501733e-06, "loss": 0.1191, "step": 505175 }, { "epoch": 4.97, "grad_norm": 4.6947407722473145, "learning_rate": 2.5424647740474844e-06, "loss": 0.0805, "step": 505200 }, { "epoch": 4.97, "grad_norm": 10.39555549621582, "learning_rate": 2.542340651593236e-06, "loss": 0.108, "step": 505225 }, { "epoch": 4.97, "grad_norm": 7.979452133178711, "learning_rate": 2.5422165291389876e-06, "loss": 0.0962, "step": 505250 }, { "epoch": 4.97, "grad_norm": 17.071557998657227, "learning_rate": 2.5420924066847393e-06, "loss": 0.1087, "step": 505275 }, { "epoch": 4.97, "grad_norm": 2.5774152278900146, "learning_rate": 2.5419682842304905e-06, "loss": 0.0647, "step": 505300 }, { "epoch": 4.97, "grad_norm": 16.347820281982422, "learning_rate": 2.5418441617762425e-06, "loss": 0.1192, "step": 505325 }, { "epoch": 4.97, "grad_norm": 3.2904927730560303, "learning_rate": 2.5417200393219938e-06, "loss": 0.0706, "step": 505350 }, { "epoch": 4.97, "grad_norm": 16.619707107543945, "learning_rate": 2.541595916867745e-06, "loss": 0.111, "step": 505375 }, { "epoch": 4.97, "grad_norm": 3.9725842475891113, "learning_rate": 2.541471794413497e-06, "loss": 0.0812, "step": 505400 }, { "epoch": 4.97, "grad_norm": 10.757534980773926, "learning_rate": 2.5413476719592482e-06, "loss": 0.1017, "step": 505425 }, { "epoch": 4.97, "grad_norm": 8.313796043395996, "learning_rate": 2.541223549505e-06, "loss": 0.0902, "step": 505450 }, { "epoch": 4.97, "grad_norm": 17.032155990600586, "learning_rate": 2.5410994270507515e-06, "loss": 0.1272, "step": 505475 }, { "epoch": 4.97, "grad_norm": 4.441214561462402, "learning_rate": 2.540975304596503e-06, "loss": 0.0696, "step": 505500 }, { "epoch": 4.97, "grad_norm": 12.907886505126953, "learning_rate": 2.5408511821422544e-06, "loss": 0.1042, "step": 505525 }, { "epoch": 4.97, "grad_norm": 6.019027233123779, "learning_rate": 2.5407270596880064e-06, "loss": 0.0589, "step": 505550 }, { "epoch": 4.97, "grad_norm": 5.3167595863342285, "learning_rate": 2.5406029372337576e-06, "loss": 0.0802, "step": 505575 }, { "epoch": 4.97, "grad_norm": 2.9540812969207764, "learning_rate": 2.540478814779509e-06, "loss": 0.0678, "step": 505600 }, { "epoch": 4.97, "grad_norm": 8.473379135131836, "learning_rate": 2.540354692325261e-06, "loss": 0.1403, "step": 505625 }, { "epoch": 4.97, "grad_norm": 1.0915309190750122, "learning_rate": 2.540230569871012e-06, "loss": 0.0591, "step": 505650 }, { "epoch": 4.97, "grad_norm": 14.887496948242188, "learning_rate": 2.5401064474167637e-06, "loss": 0.0952, "step": 505675 }, { "epoch": 4.97, "grad_norm": 1.0535379648208618, "learning_rate": 2.539982324962515e-06, "loss": 0.0671, "step": 505700 }, { "epoch": 4.97, "grad_norm": 11.067549705505371, "learning_rate": 2.539858202508267e-06, "loss": 0.0955, "step": 505725 }, { "epoch": 4.97, "grad_norm": 3.050093412399292, "learning_rate": 2.5397340800540182e-06, "loss": 0.0683, "step": 505750 }, { "epoch": 4.97, "grad_norm": 22.67632484436035, "learning_rate": 2.5396099575997694e-06, "loss": 0.113, "step": 505775 }, { "epoch": 4.97, "grad_norm": 2.3938817977905273, "learning_rate": 2.5394858351455215e-06, "loss": 0.09, "step": 505800 }, { "epoch": 4.97, "grad_norm": 9.208266258239746, "learning_rate": 2.5393617126912727e-06, "loss": 0.0923, "step": 505825 }, { "epoch": 4.97, "grad_norm": 6.061522960662842, "learning_rate": 2.5392375902370243e-06, "loss": 0.0797, "step": 505850 }, { "epoch": 4.97, "grad_norm": 5.28408670425415, "learning_rate": 2.539113467782776e-06, "loss": 0.1227, "step": 505875 }, { "epoch": 4.97, "grad_norm": 8.292572975158691, "learning_rate": 2.5389893453285276e-06, "loss": 0.0819, "step": 505900 }, { "epoch": 4.97, "grad_norm": 7.68065881729126, "learning_rate": 2.538865222874279e-06, "loss": 0.1166, "step": 505925 }, { "epoch": 4.97, "grad_norm": 10.4385404586792, "learning_rate": 2.538741100420031e-06, "loss": 0.0688, "step": 505950 }, { "epoch": 4.97, "grad_norm": 9.069600105285645, "learning_rate": 2.538616977965782e-06, "loss": 0.0916, "step": 505975 }, { "epoch": 4.98, "grad_norm": 3.1351146697998047, "learning_rate": 2.5384928555115333e-06, "loss": 0.0869, "step": 506000 }, { "epoch": 4.98, "grad_norm": 9.301258087158203, "learning_rate": 2.5383687330572854e-06, "loss": 0.1012, "step": 506025 }, { "epoch": 4.98, "grad_norm": 1.7582124471664429, "learning_rate": 2.5382446106030366e-06, "loss": 0.0692, "step": 506050 }, { "epoch": 4.98, "grad_norm": 10.5264253616333, "learning_rate": 2.5381254530469584e-06, "loss": 0.1396, "step": 506075 }, { "epoch": 4.98, "grad_norm": 5.333520889282227, "learning_rate": 2.5380013305927097e-06, "loss": 0.0652, "step": 506100 }, { "epoch": 4.98, "grad_norm": 14.429850578308105, "learning_rate": 2.5378772081384613e-06, "loss": 0.1286, "step": 506125 }, { "epoch": 4.98, "grad_norm": 3.2623283863067627, "learning_rate": 2.537753085684213e-06, "loss": 0.0629, "step": 506150 }, { "epoch": 4.98, "grad_norm": 15.828767776489258, "learning_rate": 2.5376289632299646e-06, "loss": 0.1115, "step": 506175 }, { "epoch": 4.98, "grad_norm": 0.3808314800262451, "learning_rate": 2.5375048407757158e-06, "loss": 0.0705, "step": 506200 }, { "epoch": 4.98, "grad_norm": 11.81423568725586, "learning_rate": 2.537380718321468e-06, "loss": 0.1124, "step": 506225 }, { "epoch": 4.98, "grad_norm": 7.900761604309082, "learning_rate": 2.537256595867219e-06, "loss": 0.0606, "step": 506250 }, { "epoch": 4.98, "grad_norm": 25.281707763671875, "learning_rate": 2.5371324734129703e-06, "loss": 0.1072, "step": 506275 }, { "epoch": 4.98, "grad_norm": 6.993321418762207, "learning_rate": 2.537008350958722e-06, "loss": 0.0975, "step": 506300 }, { "epoch": 4.98, "grad_norm": 9.556936264038086, "learning_rate": 2.5368842285044735e-06, "loss": 0.1282, "step": 506325 }, { "epoch": 4.98, "grad_norm": 5.128305435180664, "learning_rate": 2.536760106050225e-06, "loss": 0.0991, "step": 506350 }, { "epoch": 4.98, "grad_norm": 17.666337966918945, "learning_rate": 2.5366359835959764e-06, "loss": 0.1042, "step": 506375 }, { "epoch": 4.98, "grad_norm": 3.5597915649414062, "learning_rate": 2.5365118611417284e-06, "loss": 0.0794, "step": 506400 }, { "epoch": 4.98, "grad_norm": 12.90438461303711, "learning_rate": 2.5363877386874796e-06, "loss": 0.1484, "step": 506425 }, { "epoch": 4.98, "grad_norm": 1.086478352546692, "learning_rate": 2.536263616233231e-06, "loss": 0.0535, "step": 506450 }, { "epoch": 4.98, "grad_norm": 28.909652709960938, "learning_rate": 2.536139493778983e-06, "loss": 0.0965, "step": 506475 }, { "epoch": 4.98, "grad_norm": 0.3120771646499634, "learning_rate": 2.536015371324734e-06, "loss": 0.0692, "step": 506500 }, { "epoch": 4.98, "grad_norm": 23.422443389892578, "learning_rate": 2.5358912488704858e-06, "loss": 0.1099, "step": 506525 }, { "epoch": 4.98, "grad_norm": 5.824596881866455, "learning_rate": 2.5357671264162374e-06, "loss": 0.0736, "step": 506550 }, { "epoch": 4.98, "grad_norm": 16.29058837890625, "learning_rate": 2.535643003961989e-06, "loss": 0.1226, "step": 506575 }, { "epoch": 4.98, "grad_norm": 1.0917881727218628, "learning_rate": 2.5355188815077402e-06, "loss": 0.079, "step": 506600 }, { "epoch": 4.98, "grad_norm": 8.729015350341797, "learning_rate": 2.5353947590534923e-06, "loss": 0.1129, "step": 506625 }, { "epoch": 4.98, "grad_norm": 4.245570182800293, "learning_rate": 2.5352706365992435e-06, "loss": 0.0756, "step": 506650 }, { "epoch": 4.98, "grad_norm": 13.30628776550293, "learning_rate": 2.5351465141449947e-06, "loss": 0.0959, "step": 506675 }, { "epoch": 4.98, "grad_norm": 6.013849258422852, "learning_rate": 2.5350223916907468e-06, "loss": 0.0901, "step": 506700 }, { "epoch": 4.98, "grad_norm": 6.057243347167969, "learning_rate": 2.534898269236498e-06, "loss": 0.1071, "step": 506725 }, { "epoch": 4.98, "grad_norm": 6.181284427642822, "learning_rate": 2.5347741467822496e-06, "loss": 0.0741, "step": 506750 }, { "epoch": 4.98, "grad_norm": 4.496691703796387, "learning_rate": 2.5346500243280013e-06, "loss": 0.1, "step": 506775 }, { "epoch": 4.98, "grad_norm": 0.6771295070648193, "learning_rate": 2.534525901873753e-06, "loss": 0.0943, "step": 506800 }, { "epoch": 4.98, "grad_norm": 7.180703639984131, "learning_rate": 2.534401779419504e-06, "loss": 0.1283, "step": 506825 }, { "epoch": 4.98, "grad_norm": 6.826681137084961, "learning_rate": 2.534277656965256e-06, "loss": 0.076, "step": 506850 }, { "epoch": 4.98, "grad_norm": 12.669336318969727, "learning_rate": 2.5341535345110074e-06, "loss": 0.0983, "step": 506875 }, { "epoch": 4.98, "grad_norm": 3.6322684288024902, "learning_rate": 2.5340294120567586e-06, "loss": 0.0869, "step": 506900 }, { "epoch": 4.98, "grad_norm": 9.93554401397705, "learning_rate": 2.5339052896025106e-06, "loss": 0.1255, "step": 506925 }, { "epoch": 4.98, "grad_norm": 5.38405179977417, "learning_rate": 2.533781167148262e-06, "loss": 0.0962, "step": 506950 }, { "epoch": 4.98, "grad_norm": 21.065555572509766, "learning_rate": 2.5336570446940135e-06, "loss": 0.0948, "step": 506975 }, { "epoch": 4.98, "grad_norm": 11.250155448913574, "learning_rate": 2.533532922239765e-06, "loss": 0.0876, "step": 507000 }, { "epoch": 4.99, "grad_norm": 11.223285675048828, "learning_rate": 2.5334087997855168e-06, "loss": 0.1414, "step": 507025 }, { "epoch": 4.99, "grad_norm": 3.39105486869812, "learning_rate": 2.533284677331268e-06, "loss": 0.061, "step": 507050 }, { "epoch": 4.99, "grad_norm": 14.506369590759277, "learning_rate": 2.53316055487702e-06, "loss": 0.1053, "step": 507075 }, { "epoch": 4.99, "grad_norm": 0.2513860762119293, "learning_rate": 2.5330364324227712e-06, "loss": 0.0822, "step": 507100 }, { "epoch": 4.99, "grad_norm": 27.55927276611328, "learning_rate": 2.5329123099685225e-06, "loss": 0.1206, "step": 507125 }, { "epoch": 4.99, "grad_norm": 3.8070480823516846, "learning_rate": 2.532788187514274e-06, "loss": 0.0783, "step": 507150 }, { "epoch": 4.99, "grad_norm": 13.48499870300293, "learning_rate": 2.5326640650600257e-06, "loss": 0.1026, "step": 507175 }, { "epoch": 4.99, "grad_norm": 6.774417877197266, "learning_rate": 2.5325399426057774e-06, "loss": 0.0751, "step": 507200 }, { "epoch": 4.99, "grad_norm": 11.150508880615234, "learning_rate": 2.5324158201515286e-06, "loss": 0.1102, "step": 507225 }, { "epoch": 4.99, "grad_norm": 7.1011152267456055, "learning_rate": 2.5322916976972806e-06, "loss": 0.0988, "step": 507250 }, { "epoch": 4.99, "grad_norm": 8.5447359085083, "learning_rate": 2.532167575243032e-06, "loss": 0.1192, "step": 507275 }, { "epoch": 4.99, "grad_norm": 0.19928383827209473, "learning_rate": 2.5320434527887835e-06, "loss": 0.0521, "step": 507300 }, { "epoch": 4.99, "grad_norm": 10.965597152709961, "learning_rate": 2.531919330334535e-06, "loss": 0.1006, "step": 507325 }, { "epoch": 4.99, "grad_norm": 8.245965003967285, "learning_rate": 2.5317952078802867e-06, "loss": 0.079, "step": 507350 }, { "epoch": 4.99, "grad_norm": 14.217975616455078, "learning_rate": 2.531671085426038e-06, "loss": 0.1129, "step": 507375 }, { "epoch": 4.99, "grad_norm": 2.7826485633850098, "learning_rate": 2.53154696297179e-06, "loss": 0.0929, "step": 507400 }, { "epoch": 4.99, "grad_norm": 14.743372917175293, "learning_rate": 2.5314228405175412e-06, "loss": 0.1117, "step": 507425 }, { "epoch": 4.99, "grad_norm": 2.2032294273376465, "learning_rate": 2.5312987180632924e-06, "loss": 0.0986, "step": 507450 }, { "epoch": 4.99, "grad_norm": 12.086423873901367, "learning_rate": 2.5311745956090445e-06, "loss": 0.0945, "step": 507475 }, { "epoch": 4.99, "grad_norm": 1.9127800464630127, "learning_rate": 2.5310504731547957e-06, "loss": 0.083, "step": 507500 }, { "epoch": 4.99, "grad_norm": 10.64537525177002, "learning_rate": 2.5309263507005473e-06, "loss": 0.098, "step": 507525 }, { "epoch": 4.99, "grad_norm": 9.674029350280762, "learning_rate": 2.530802228246299e-06, "loss": 0.0782, "step": 507550 }, { "epoch": 4.99, "grad_norm": 12.6636381149292, "learning_rate": 2.5306781057920506e-06, "loss": 0.112, "step": 507575 }, { "epoch": 4.99, "grad_norm": 16.050844192504883, "learning_rate": 2.530553983337802e-06, "loss": 0.0679, "step": 507600 }, { "epoch": 4.99, "grad_norm": 22.651628494262695, "learning_rate": 2.530429860883554e-06, "loss": 0.1358, "step": 507625 }, { "epoch": 4.99, "grad_norm": 0.6283589005470276, "learning_rate": 2.530305738429305e-06, "loss": 0.0808, "step": 507650 }, { "epoch": 4.99, "grad_norm": 9.74919605255127, "learning_rate": 2.5301816159750563e-06, "loss": 0.1219, "step": 507675 }, { "epoch": 4.99, "grad_norm": 1.8715142011642456, "learning_rate": 2.5300574935208084e-06, "loss": 0.0771, "step": 507700 }, { "epoch": 4.99, "grad_norm": 15.52814769744873, "learning_rate": 2.5299333710665596e-06, "loss": 0.08, "step": 507725 }, { "epoch": 4.99, "grad_norm": 6.5954060554504395, "learning_rate": 2.5298092486123112e-06, "loss": 0.0689, "step": 507750 }, { "epoch": 4.99, "grad_norm": 14.410663604736328, "learning_rate": 2.529685126158063e-06, "loss": 0.1051, "step": 507775 }, { "epoch": 4.99, "grad_norm": 5.192532539367676, "learning_rate": 2.5295610037038145e-06, "loss": 0.0756, "step": 507800 }, { "epoch": 4.99, "grad_norm": 10.144194602966309, "learning_rate": 2.5294368812495657e-06, "loss": 0.1077, "step": 507825 }, { "epoch": 4.99, "grad_norm": 4.2482733726501465, "learning_rate": 2.5293127587953178e-06, "loss": 0.0892, "step": 507850 }, { "epoch": 4.99, "grad_norm": 14.90321159362793, "learning_rate": 2.529188636341069e-06, "loss": 0.1402, "step": 507875 }, { "epoch": 4.99, "grad_norm": 5.960229873657227, "learning_rate": 2.52906451388682e-06, "loss": 0.0817, "step": 507900 }, { "epoch": 4.99, "grad_norm": 8.789935111999512, "learning_rate": 2.5289403914325722e-06, "loss": 0.1043, "step": 507925 }, { "epoch": 4.99, "grad_norm": 1.4918757677078247, "learning_rate": 2.5288162689783234e-06, "loss": 0.0712, "step": 507950 }, { "epoch": 4.99, "grad_norm": 13.042783737182617, "learning_rate": 2.528692146524075e-06, "loss": 0.1228, "step": 507975 }, { "epoch": 4.99, "grad_norm": 9.613635063171387, "learning_rate": 2.5285680240698263e-06, "loss": 0.0686, "step": 508000 }, { "epoch": 4.99, "grad_norm": 7.93799352645874, "learning_rate": 2.5284439016155784e-06, "loss": 0.1203, "step": 508025 }, { "epoch": 5.0, "grad_norm": 2.8545501232147217, "learning_rate": 2.5283197791613296e-06, "loss": 0.0593, "step": 508050 }, { "epoch": 5.0, "grad_norm": 16.543954849243164, "learning_rate": 2.5281956567070808e-06, "loss": 0.0938, "step": 508075 }, { "epoch": 5.0, "grad_norm": 1.1938800811767578, "learning_rate": 2.528071534252833e-06, "loss": 0.0751, "step": 508100 }, { "epoch": 5.0, "grad_norm": 19.60616111755371, "learning_rate": 2.527947411798584e-06, "loss": 0.1136, "step": 508125 }, { "epoch": 5.0, "grad_norm": 11.535478591918945, "learning_rate": 2.5278232893443357e-06, "loss": 0.1007, "step": 508150 }, { "epoch": 5.0, "grad_norm": 15.167548179626465, "learning_rate": 2.5276991668900873e-06, "loss": 0.0941, "step": 508175 }, { "epoch": 5.0, "grad_norm": 6.311356544494629, "learning_rate": 2.527575044435839e-06, "loss": 0.087, "step": 508200 }, { "epoch": 5.0, "grad_norm": 12.634756088256836, "learning_rate": 2.52745092198159e-06, "loss": 0.1141, "step": 508225 }, { "epoch": 5.0, "grad_norm": 0.10857699811458588, "learning_rate": 2.5273267995273422e-06, "loss": 0.0808, "step": 508250 }, { "epoch": 5.0, "grad_norm": 14.366003036499023, "learning_rate": 2.5272026770730934e-06, "loss": 0.1063, "step": 508275 }, { "epoch": 5.0, "grad_norm": 9.187129020690918, "learning_rate": 2.5270785546188446e-06, "loss": 0.0748, "step": 508300 }, { "epoch": 5.0, "grad_norm": Infinity, "learning_rate": 2.5269593970627665e-06, "loss": 0.1577, "step": 508325 }, { "epoch": 5.0, "grad_norm": 2.1624364852905273, "learning_rate": 2.5268352746085177e-06, "loss": 0.0657, "step": 508350 }, { "epoch": 5.0, "grad_norm": 13.838635444641113, "learning_rate": 2.5267111521542698e-06, "loss": 0.1147, "step": 508375 }, { "epoch": 5.0, "grad_norm": 15.764519691467285, "learning_rate": 2.526587029700021e-06, "loss": 0.0975, "step": 508400 }, { "epoch": 5.0, "grad_norm": 13.391133308410645, "learning_rate": 2.5264629072457726e-06, "loss": 0.0973, "step": 508425 }, { "epoch": 5.0, "grad_norm": 8.242147445678711, "learning_rate": 2.5263387847915243e-06, "loss": 0.0815, "step": 508450 }, { "epoch": 5.0, "grad_norm": 8.802597999572754, "learning_rate": 2.526214662337276e-06, "loss": 0.1125, "step": 508475 }, { "epoch": 5.0, "grad_norm": 8.0169677734375, "learning_rate": 2.526090539883027e-06, "loss": 0.08, "step": 508500 }, { "epoch": 5.0, "grad_norm": 21.668973922729492, "learning_rate": 2.525966417428779e-06, "loss": 0.1214, "step": 508525 }, { "epoch": 5.0, "grad_norm": 1.2659612894058228, "learning_rate": 2.5258422949745304e-06, "loss": 0.0837, "step": 508550 }, { "epoch": 5.0, "grad_norm": 13.102057456970215, "learning_rate": 2.5257181725202816e-06, "loss": 0.0425, "step": 508575 }, { "epoch": 5.0, "grad_norm": 6.071026802062988, "learning_rate": 2.5255940500660332e-06, "loss": 0.0745, "step": 508600 }, { "epoch": 5.0, "grad_norm": 0.5759941339492798, "learning_rate": 2.525469927611785e-06, "loss": 0.0374, "step": 508625 }, { "epoch": 5.0, "grad_norm": 3.6755478382110596, "learning_rate": 2.5253458051575365e-06, "loss": 0.0864, "step": 508650 }, { "epoch": 5.0, "grad_norm": 9.200544357299805, "learning_rate": 2.5252216827032877e-06, "loss": 0.0466, "step": 508675 }, { "epoch": 5.0, "grad_norm": 4.908565998077393, "learning_rate": 2.5250975602490398e-06, "loss": 0.0575, "step": 508700 }, { "epoch": 5.0, "grad_norm": 12.013866424560547, "learning_rate": 2.524973437794791e-06, "loss": 0.0405, "step": 508725 }, { "epoch": 5.0, "grad_norm": 3.8077306747436523, "learning_rate": 2.524849315340542e-06, "loss": 0.0729, "step": 508750 }, { "epoch": 5.0, "grad_norm": 9.212148666381836, "learning_rate": 2.5247251928862943e-06, "loss": 0.0552, "step": 508775 }, { "epoch": 5.0, "grad_norm": 0.37770184874534607, "learning_rate": 2.5246010704320455e-06, "loss": 0.0921, "step": 508800 }, { "epoch": 5.0, "grad_norm": 10.121431350708008, "learning_rate": 2.524476947977797e-06, "loss": 0.0493, "step": 508825 }, { "epoch": 5.0, "grad_norm": 2.0539050102233887, "learning_rate": 2.5243528255235487e-06, "loss": 0.0995, "step": 508850 }, { "epoch": 5.0, "grad_norm": 2.530120611190796, "learning_rate": 2.5242287030693004e-06, "loss": 0.0402, "step": 508875 }, { "epoch": 5.0, "grad_norm": 9.578958511352539, "learning_rate": 2.5241045806150516e-06, "loss": 0.0824, "step": 508900 }, { "epoch": 5.0, "grad_norm": 8.454696655273438, "learning_rate": 2.5239804581608036e-06, "loss": 0.048, "step": 508925 }, { "epoch": 5.0, "grad_norm": 0.8082860112190247, "learning_rate": 2.523856335706555e-06, "loss": 0.0689, "step": 508950 }, { "epoch": 5.0, "grad_norm": 8.100051879882812, "learning_rate": 2.523732213252306e-06, "loss": 0.0618, "step": 508975 }, { "epoch": 5.0, "grad_norm": 0.08711221814155579, "learning_rate": 2.523608090798058e-06, "loss": 0.0814, "step": 509000 }, { "epoch": 5.0, "grad_norm": 28.53204345703125, "learning_rate": 2.5234839683438093e-06, "loss": 0.0759, "step": 509025 }, { "epoch": 5.01, "grad_norm": 6.168527126312256, "learning_rate": 2.523359845889561e-06, "loss": 0.0719, "step": 509050 }, { "epoch": 5.01, "grad_norm": 10.931696891784668, "learning_rate": 2.5232357234353126e-06, "loss": 0.0469, "step": 509075 }, { "epoch": 5.01, "grad_norm": 1.8694303035736084, "learning_rate": 2.5231116009810642e-06, "loss": 0.071, "step": 509100 }, { "epoch": 5.01, "grad_norm": 11.570714950561523, "learning_rate": 2.5229874785268154e-06, "loss": 0.0672, "step": 509125 }, { "epoch": 5.01, "grad_norm": 0.05620869621634483, "learning_rate": 2.5228633560725675e-06, "loss": 0.0664, "step": 509150 }, { "epoch": 5.01, "grad_norm": 11.253681182861328, "learning_rate": 2.5227392336183187e-06, "loss": 0.0442, "step": 509175 }, { "epoch": 5.01, "grad_norm": 0.09785600751638412, "learning_rate": 2.52261511116407e-06, "loss": 0.0632, "step": 509200 }, { "epoch": 5.01, "grad_norm": 6.965235233306885, "learning_rate": 2.522490988709822e-06, "loss": 0.0402, "step": 509225 }, { "epoch": 5.01, "grad_norm": 1.6201815605163574, "learning_rate": 2.522366866255573e-06, "loss": 0.0959, "step": 509250 }, { "epoch": 5.01, "grad_norm": 7.512294292449951, "learning_rate": 2.522242743801325e-06, "loss": 0.0516, "step": 509275 }, { "epoch": 5.01, "grad_norm": 0.3119337558746338, "learning_rate": 2.5221186213470765e-06, "loss": 0.0818, "step": 509300 }, { "epoch": 5.01, "grad_norm": 7.265717029571533, "learning_rate": 2.521994498892828e-06, "loss": 0.044, "step": 509325 }, { "epoch": 5.01, "grad_norm": 6.654049873352051, "learning_rate": 2.5218703764385793e-06, "loss": 0.0868, "step": 509350 }, { "epoch": 5.01, "grad_norm": 10.372417449951172, "learning_rate": 2.5217462539843314e-06, "loss": 0.0616, "step": 509375 }, { "epoch": 5.01, "grad_norm": 2.5662436485290527, "learning_rate": 2.5216221315300826e-06, "loss": 0.0889, "step": 509400 }, { "epoch": 5.01, "grad_norm": 9.962557792663574, "learning_rate": 2.521498009075834e-06, "loss": 0.0697, "step": 509425 }, { "epoch": 5.01, "grad_norm": 1.0707359313964844, "learning_rate": 2.5213738866215854e-06, "loss": 0.0666, "step": 509450 }, { "epoch": 5.01, "grad_norm": 10.372011184692383, "learning_rate": 2.521249764167337e-06, "loss": 0.0603, "step": 509475 }, { "epoch": 5.01, "grad_norm": 2.2344796657562256, "learning_rate": 2.5211256417130887e-06, "loss": 0.0665, "step": 509500 }, { "epoch": 5.01, "grad_norm": 12.544995307922363, "learning_rate": 2.52100151925884e-06, "loss": 0.0347, "step": 509525 }, { "epoch": 5.01, "grad_norm": 6.516355037689209, "learning_rate": 2.520877396804592e-06, "loss": 0.0691, "step": 509550 }, { "epoch": 5.01, "grad_norm": 10.052913665771484, "learning_rate": 2.520753274350343e-06, "loss": 0.0385, "step": 509575 }, { "epoch": 5.01, "grad_norm": 0.03358524665236473, "learning_rate": 2.5206291518960944e-06, "loss": 0.0588, "step": 509600 }, { "epoch": 5.01, "grad_norm": 16.41343879699707, "learning_rate": 2.5205050294418465e-06, "loss": 0.0605, "step": 509625 }, { "epoch": 5.01, "grad_norm": 0.11975179612636566, "learning_rate": 2.5203809069875977e-06, "loss": 0.0505, "step": 509650 }, { "epoch": 5.01, "grad_norm": 20.671613693237305, "learning_rate": 2.5202567845333493e-06, "loss": 0.0445, "step": 509675 }, { "epoch": 5.01, "grad_norm": 6.406704425811768, "learning_rate": 2.520132662079101e-06, "loss": 0.0693, "step": 509700 }, { "epoch": 5.01, "grad_norm": 12.946861267089844, "learning_rate": 2.5200085396248526e-06, "loss": 0.041, "step": 509725 }, { "epoch": 5.01, "grad_norm": 6.964742660522461, "learning_rate": 2.5198844171706038e-06, "loss": 0.0725, "step": 509750 }, { "epoch": 5.01, "grad_norm": 6.272953987121582, "learning_rate": 2.519760294716356e-06, "loss": 0.0368, "step": 509775 }, { "epoch": 5.01, "grad_norm": 2.3950467109680176, "learning_rate": 2.519636172262107e-06, "loss": 0.0634, "step": 509800 }, { "epoch": 5.01, "grad_norm": 5.306807518005371, "learning_rate": 2.5195120498078583e-06, "loss": 0.0514, "step": 509825 }, { "epoch": 5.01, "grad_norm": 0.7071623206138611, "learning_rate": 2.5193879273536103e-06, "loss": 0.0859, "step": 509850 }, { "epoch": 5.01, "grad_norm": 18.09335708618164, "learning_rate": 2.5192638048993615e-06, "loss": 0.0379, "step": 509875 }, { "epoch": 5.01, "grad_norm": 2.0273971557617188, "learning_rate": 2.519139682445113e-06, "loss": 0.0758, "step": 509900 }, { "epoch": 5.01, "grad_norm": 6.981583595275879, "learning_rate": 2.519015559990865e-06, "loss": 0.0321, "step": 509925 }, { "epoch": 5.01, "grad_norm": 0.11134961247444153, "learning_rate": 2.5188914375366164e-06, "loss": 0.0804, "step": 509950 }, { "epoch": 5.01, "grad_norm": 7.085729598999023, "learning_rate": 2.5187673150823677e-06, "loss": 0.0506, "step": 509975 }, { "epoch": 5.01, "grad_norm": 3.3582425117492676, "learning_rate": 2.5186431926281197e-06, "loss": 0.0738, "step": 510000 }, { "epoch": 5.01, "grad_norm": 7.405513763427734, "learning_rate": 2.518519070173871e-06, "loss": 0.0497, "step": 510025 }, { "epoch": 5.01, "grad_norm": 0.8116998672485352, "learning_rate": 2.518394947719622e-06, "loss": 0.0799, "step": 510050 }, { "epoch": 5.02, "grad_norm": 21.76536750793457, "learning_rate": 2.518270825265374e-06, "loss": 0.049, "step": 510075 }, { "epoch": 5.02, "grad_norm": 1.0926109552383423, "learning_rate": 2.5181467028111254e-06, "loss": 0.0563, "step": 510100 }, { "epoch": 5.02, "grad_norm": 16.525156021118164, "learning_rate": 2.518022580356877e-06, "loss": 0.0546, "step": 510125 }, { "epoch": 5.02, "grad_norm": 1.1857070922851562, "learning_rate": 2.5178984579026287e-06, "loss": 0.0703, "step": 510150 }, { "epoch": 5.02, "grad_norm": 12.53164005279541, "learning_rate": 2.5177743354483803e-06, "loss": 0.0472, "step": 510175 }, { "epoch": 5.02, "grad_norm": 0.9339432716369629, "learning_rate": 2.5176502129941315e-06, "loss": 0.0845, "step": 510200 }, { "epoch": 5.02, "grad_norm": 15.477889060974121, "learning_rate": 2.5175260905398836e-06, "loss": 0.0423, "step": 510225 }, { "epoch": 5.02, "grad_norm": 4.823116302490234, "learning_rate": 2.5174019680856348e-06, "loss": 0.1029, "step": 510250 }, { "epoch": 5.02, "grad_norm": 6.472294330596924, "learning_rate": 2.5172778456313864e-06, "loss": 0.0535, "step": 510275 }, { "epoch": 5.02, "grad_norm": 0.27162352204322815, "learning_rate": 2.5171537231771376e-06, "loss": 0.0725, "step": 510300 }, { "epoch": 5.02, "grad_norm": 11.66258716583252, "learning_rate": 2.5170296007228897e-06, "loss": 0.0363, "step": 510325 }, { "epoch": 5.02, "grad_norm": 0.9550000429153442, "learning_rate": 2.516905478268641e-06, "loss": 0.0617, "step": 510350 }, { "epoch": 5.02, "grad_norm": 6.506527900695801, "learning_rate": 2.516781355814392e-06, "loss": 0.0375, "step": 510375 }, { "epoch": 5.02, "grad_norm": 0.26390036940574646, "learning_rate": 2.516657233360144e-06, "loss": 0.0887, "step": 510400 }, { "epoch": 5.02, "grad_norm": 24.926044464111328, "learning_rate": 2.5165331109058954e-06, "loss": 0.0416, "step": 510425 }, { "epoch": 5.02, "grad_norm": 0.1533380001783371, "learning_rate": 2.516408988451647e-06, "loss": 0.0805, "step": 510450 }, { "epoch": 5.02, "grad_norm": 16.946523666381836, "learning_rate": 2.5162848659973987e-06, "loss": 0.0449, "step": 510475 }, { "epoch": 5.02, "grad_norm": 2.819844961166382, "learning_rate": 2.5161607435431503e-06, "loss": 0.0947, "step": 510500 }, { "epoch": 5.02, "grad_norm": 32.48200225830078, "learning_rate": 2.5160366210889015e-06, "loss": 0.0363, "step": 510525 }, { "epoch": 5.02, "grad_norm": 8.292661666870117, "learning_rate": 2.5159174635328234e-06, "loss": 0.0763, "step": 510550 }, { "epoch": 5.02, "grad_norm": 12.908313751220703, "learning_rate": 2.5157933410785746e-06, "loss": 0.0425, "step": 510575 }, { "epoch": 5.02, "grad_norm": 3.072518825531006, "learning_rate": 2.5156692186243266e-06, "loss": 0.0904, "step": 510600 }, { "epoch": 5.02, "grad_norm": 9.863646507263184, "learning_rate": 2.515545096170078e-06, "loss": 0.0396, "step": 510625 }, { "epoch": 5.02, "grad_norm": 8.567710876464844, "learning_rate": 2.515420973715829e-06, "loss": 0.0838, "step": 510650 }, { "epoch": 5.02, "grad_norm": 7.922358512878418, "learning_rate": 2.515296851261581e-06, "loss": 0.0446, "step": 510675 }, { "epoch": 5.02, "grad_norm": 2.0642237663269043, "learning_rate": 2.5151727288073323e-06, "loss": 0.0657, "step": 510700 }, { "epoch": 5.02, "grad_norm": 7.173623085021973, "learning_rate": 2.515048606353084e-06, "loss": 0.048, "step": 510725 }, { "epoch": 5.02, "grad_norm": 0.8534094095230103, "learning_rate": 2.5149244838988356e-06, "loss": 0.0685, "step": 510750 }, { "epoch": 5.02, "grad_norm": 12.528136253356934, "learning_rate": 2.5148003614445872e-06, "loss": 0.0439, "step": 510775 }, { "epoch": 5.02, "grad_norm": 3.702709913253784, "learning_rate": 2.5146762389903385e-06, "loss": 0.0672, "step": 510800 }, { "epoch": 5.02, "grad_norm": 8.203169822692871, "learning_rate": 2.5145521165360905e-06, "loss": 0.0417, "step": 510825 }, { "epoch": 5.02, "grad_norm": 3.2746593952178955, "learning_rate": 2.5144279940818417e-06, "loss": 0.0699, "step": 510850 }, { "epoch": 5.02, "grad_norm": 14.854809761047363, "learning_rate": 2.514303871627593e-06, "loss": 0.0515, "step": 510875 }, { "epoch": 5.02, "grad_norm": 9.674771308898926, "learning_rate": 2.5141797491733446e-06, "loss": 0.074, "step": 510900 }, { "epoch": 5.02, "grad_norm": 15.298725128173828, "learning_rate": 2.514055626719096e-06, "loss": 0.0611, "step": 510925 }, { "epoch": 5.02, "grad_norm": 5.921000957489014, "learning_rate": 2.513931504264848e-06, "loss": 0.0826, "step": 510950 }, { "epoch": 5.02, "grad_norm": 15.621816635131836, "learning_rate": 2.513807381810599e-06, "loss": 0.0519, "step": 510975 }, { "epoch": 5.02, "grad_norm": 0.9568116068840027, "learning_rate": 2.513683259356351e-06, "loss": 0.0793, "step": 511000 }, { "epoch": 5.02, "grad_norm": 10.69095230102539, "learning_rate": 2.5135591369021023e-06, "loss": 0.0527, "step": 511025 }, { "epoch": 5.02, "grad_norm": 0.0046623265370726585, "learning_rate": 2.5134350144478535e-06, "loss": 0.083, "step": 511050 }, { "epoch": 5.02, "grad_norm": 10.049556732177734, "learning_rate": 2.5133108919936056e-06, "loss": 0.0498, "step": 511075 }, { "epoch": 5.03, "grad_norm": 2.613706111907959, "learning_rate": 2.513186769539357e-06, "loss": 0.0742, "step": 511100 }, { "epoch": 5.03, "grad_norm": 9.862987518310547, "learning_rate": 2.5130626470851084e-06, "loss": 0.0492, "step": 511125 }, { "epoch": 5.03, "grad_norm": 4.444701194763184, "learning_rate": 2.51293852463086e-06, "loss": 0.0709, "step": 511150 }, { "epoch": 5.03, "grad_norm": 4.4468536376953125, "learning_rate": 2.5128144021766117e-06, "loss": 0.0638, "step": 511175 }, { "epoch": 5.03, "grad_norm": 0.02305009216070175, "learning_rate": 2.512690279722363e-06, "loss": 0.0701, "step": 511200 }, { "epoch": 5.03, "grad_norm": 10.719961166381836, "learning_rate": 2.512566157268115e-06, "loss": 0.033, "step": 511225 }, { "epoch": 5.03, "grad_norm": 2.27421498298645, "learning_rate": 2.512442034813866e-06, "loss": 0.0719, "step": 511250 }, { "epoch": 5.03, "grad_norm": 13.914212226867676, "learning_rate": 2.5123179123596174e-06, "loss": 0.0449, "step": 511275 }, { "epoch": 5.03, "grad_norm": 10.01729679107666, "learning_rate": 2.5121937899053695e-06, "loss": 0.0885, "step": 511300 }, { "epoch": 5.03, "grad_norm": 16.240081787109375, "learning_rate": 2.5120696674511207e-06, "loss": 0.0309, "step": 511325 }, { "epoch": 5.03, "grad_norm": 1.717569351196289, "learning_rate": 2.5119455449968723e-06, "loss": 0.0864, "step": 511350 }, { "epoch": 5.03, "grad_norm": 10.395471572875977, "learning_rate": 2.511821422542624e-06, "loss": 0.0414, "step": 511375 }, { "epoch": 5.03, "grad_norm": 2.517688751220703, "learning_rate": 2.5116973000883756e-06, "loss": 0.0938, "step": 511400 }, { "epoch": 5.03, "grad_norm": 14.057960510253906, "learning_rate": 2.511573177634127e-06, "loss": 0.0466, "step": 511425 }, { "epoch": 5.03, "grad_norm": 1.6545497179031372, "learning_rate": 2.511449055179879e-06, "loss": 0.0724, "step": 511450 }, { "epoch": 5.03, "grad_norm": 7.156865119934082, "learning_rate": 2.51132493272563e-06, "loss": 0.0344, "step": 511475 }, { "epoch": 5.03, "grad_norm": 3.361318826675415, "learning_rate": 2.5112008102713813e-06, "loss": 0.067, "step": 511500 }, { "epoch": 5.03, "grad_norm": 8.962485313415527, "learning_rate": 2.5110766878171333e-06, "loss": 0.0377, "step": 511525 }, { "epoch": 5.03, "grad_norm": 7.836071491241455, "learning_rate": 2.5109525653628845e-06, "loss": 0.0676, "step": 511550 }, { "epoch": 5.03, "grad_norm": 9.182780265808105, "learning_rate": 2.510828442908636e-06, "loss": 0.0634, "step": 511575 }, { "epoch": 5.03, "grad_norm": 2.643069267272949, "learning_rate": 2.510704320454388e-06, "loss": 0.0689, "step": 511600 }, { "epoch": 5.03, "grad_norm": 9.442527770996094, "learning_rate": 2.5105801980001394e-06, "loss": 0.0436, "step": 511625 }, { "epoch": 5.03, "grad_norm": 2.409189462661743, "learning_rate": 2.5104560755458907e-06, "loss": 0.0705, "step": 511650 }, { "epoch": 5.03, "grad_norm": 11.029850006103516, "learning_rate": 2.5103319530916427e-06, "loss": 0.0409, "step": 511675 }, { "epoch": 5.03, "grad_norm": 3.000164270401001, "learning_rate": 2.510207830637394e-06, "loss": 0.0691, "step": 511700 }, { "epoch": 5.03, "grad_norm": 7.131477355957031, "learning_rate": 2.510083708183145e-06, "loss": 0.0467, "step": 511725 }, { "epoch": 5.03, "grad_norm": 2.650169610977173, "learning_rate": 2.5099595857288968e-06, "loss": 0.0807, "step": 511750 }, { "epoch": 5.03, "grad_norm": 4.413913726806641, "learning_rate": 2.5098354632746484e-06, "loss": 0.0458, "step": 511775 }, { "epoch": 5.03, "grad_norm": 0.40372157096862793, "learning_rate": 2.5097113408204e-06, "loss": 0.0801, "step": 511800 }, { "epoch": 5.03, "grad_norm": 6.227883815765381, "learning_rate": 2.5095872183661513e-06, "loss": 0.0376, "step": 511825 }, { "epoch": 5.03, "grad_norm": 4.577790260314941, "learning_rate": 2.5094630959119033e-06, "loss": 0.083, "step": 511850 }, { "epoch": 5.03, "grad_norm": 12.789322853088379, "learning_rate": 2.5093389734576545e-06, "loss": 0.049, "step": 511875 }, { "epoch": 5.03, "grad_norm": 0.09762686491012573, "learning_rate": 2.5092148510034057e-06, "loss": 0.1012, "step": 511900 }, { "epoch": 5.03, "grad_norm": 5.3212785720825195, "learning_rate": 2.509090728549158e-06, "loss": 0.0528, "step": 511925 }, { "epoch": 5.03, "grad_norm": 3.2197389602661133, "learning_rate": 2.508966606094909e-06, "loss": 0.0885, "step": 511950 }, { "epoch": 5.03, "grad_norm": 10.203093528747559, "learning_rate": 2.5088424836406606e-06, "loss": 0.0397, "step": 511975 }, { "epoch": 5.03, "grad_norm": 5.27146053314209, "learning_rate": 2.5087183611864123e-06, "loss": 0.0716, "step": 512000 }, { "epoch": 5.03, "grad_norm": 6.086180210113525, "learning_rate": 2.508594238732164e-06, "loss": 0.0342, "step": 512025 }, { "epoch": 5.03, "grad_norm": 1.6360433101654053, "learning_rate": 2.508470116277915e-06, "loss": 0.0808, "step": 512050 }, { "epoch": 5.03, "grad_norm": 9.309703826904297, "learning_rate": 2.508345993823667e-06, "loss": 0.0487, "step": 512075 }, { "epoch": 5.04, "grad_norm": 4.969243049621582, "learning_rate": 2.5082218713694184e-06, "loss": 0.067, "step": 512100 }, { "epoch": 5.04, "grad_norm": 7.3771514892578125, "learning_rate": 2.5080977489151696e-06, "loss": 0.0478, "step": 512125 }, { "epoch": 5.04, "grad_norm": 7.280672550201416, "learning_rate": 2.5079736264609217e-06, "loss": 0.0623, "step": 512150 }, { "epoch": 5.04, "grad_norm": 0.5748693943023682, "learning_rate": 2.507849504006673e-06, "loss": 0.0383, "step": 512175 }, { "epoch": 5.04, "grad_norm": 8.132317543029785, "learning_rate": 2.5077253815524245e-06, "loss": 0.0597, "step": 512200 }, { "epoch": 5.04, "grad_norm": 16.6998233795166, "learning_rate": 2.507601259098176e-06, "loss": 0.0491, "step": 512225 }, { "epoch": 5.04, "grad_norm": 3.6402266025543213, "learning_rate": 2.5074771366439278e-06, "loss": 0.0776, "step": 512250 }, { "epoch": 5.04, "grad_norm": 6.6139421463012695, "learning_rate": 2.507353014189679e-06, "loss": 0.0461, "step": 512275 }, { "epoch": 5.04, "grad_norm": 1.9913755655288696, "learning_rate": 2.507228891735431e-06, "loss": 0.0672, "step": 512300 }, { "epoch": 5.04, "grad_norm": 4.801117897033691, "learning_rate": 2.5071047692811823e-06, "loss": 0.0338, "step": 512325 }, { "epoch": 5.04, "grad_norm": 0.17287492752075195, "learning_rate": 2.5069806468269335e-06, "loss": 0.087, "step": 512350 }, { "epoch": 5.04, "grad_norm": 8.363669395446777, "learning_rate": 2.5068565243726855e-06, "loss": 0.0512, "step": 512375 }, { "epoch": 5.04, "grad_norm": 12.447562217712402, "learning_rate": 2.5067324019184367e-06, "loss": 0.0869, "step": 512400 }, { "epoch": 5.04, "grad_norm": 13.271937370300293, "learning_rate": 2.5066082794641884e-06, "loss": 0.0375, "step": 512425 }, { "epoch": 5.04, "grad_norm": 2.3575146198272705, "learning_rate": 2.50648415700994e-06, "loss": 0.0869, "step": 512450 }, { "epoch": 5.04, "grad_norm": 16.096500396728516, "learning_rate": 2.5063600345556916e-06, "loss": 0.0501, "step": 512475 }, { "epoch": 5.04, "grad_norm": 6.977282524108887, "learning_rate": 2.506235912101443e-06, "loss": 0.0918, "step": 512500 }, { "epoch": 5.04, "grad_norm": 16.68191909790039, "learning_rate": 2.506111789647195e-06, "loss": 0.0425, "step": 512525 }, { "epoch": 5.04, "grad_norm": 0.30075472593307495, "learning_rate": 2.505987667192946e-06, "loss": 0.0731, "step": 512550 }, { "epoch": 5.04, "grad_norm": 7.101375579833984, "learning_rate": 2.5058635447386973e-06, "loss": 0.0478, "step": 512575 }, { "epoch": 5.04, "grad_norm": 4.029916763305664, "learning_rate": 2.505739422284449e-06, "loss": 0.095, "step": 512600 }, { "epoch": 5.04, "grad_norm": 7.2705302238464355, "learning_rate": 2.5056152998302006e-06, "loss": 0.0376, "step": 512625 }, { "epoch": 5.04, "grad_norm": 1.075357437133789, "learning_rate": 2.5054911773759522e-06, "loss": 0.0875, "step": 512650 }, { "epoch": 5.04, "grad_norm": 12.601720809936523, "learning_rate": 2.5053670549217035e-06, "loss": 0.04, "step": 512675 }, { "epoch": 5.04, "grad_norm": 5.538980484008789, "learning_rate": 2.5052429324674555e-06, "loss": 0.0831, "step": 512700 }, { "epoch": 5.04, "grad_norm": 8.17455768585205, "learning_rate": 2.5051188100132067e-06, "loss": 0.0471, "step": 512725 }, { "epoch": 5.04, "grad_norm": 4.116062641143799, "learning_rate": 2.504994687558958e-06, "loss": 0.0943, "step": 512750 }, { "epoch": 5.04, "grad_norm": 10.543664932250977, "learning_rate": 2.50487056510471e-06, "loss": 0.0449, "step": 512775 }, { "epoch": 5.04, "grad_norm": 0.3734772205352783, "learning_rate": 2.504746442650461e-06, "loss": 0.0827, "step": 512800 }, { "epoch": 5.04, "grad_norm": 7.826226711273193, "learning_rate": 2.504622320196213e-06, "loss": 0.0524, "step": 512825 }, { "epoch": 5.04, "grad_norm": 1.6905181407928467, "learning_rate": 2.5044981977419645e-06, "loss": 0.0676, "step": 512850 }, { "epoch": 5.04, "grad_norm": 8.754634857177734, "learning_rate": 2.504374075287716e-06, "loss": 0.0681, "step": 512875 }, { "epoch": 5.04, "grad_norm": 5.195706367492676, "learning_rate": 2.5042499528334673e-06, "loss": 0.0946, "step": 512900 }, { "epoch": 5.04, "grad_norm": 13.977585792541504, "learning_rate": 2.5041258303792194e-06, "loss": 0.0405, "step": 512925 }, { "epoch": 5.04, "grad_norm": 0.7391538619995117, "learning_rate": 2.5040017079249706e-06, "loss": 0.0702, "step": 512950 }, { "epoch": 5.04, "grad_norm": 15.895633697509766, "learning_rate": 2.5038775854707222e-06, "loss": 0.0468, "step": 512975 }, { "epoch": 5.04, "grad_norm": 1.1192810535430908, "learning_rate": 2.503753463016474e-06, "loss": 0.0982, "step": 513000 }, { "epoch": 5.04, "grad_norm": 2.850041627883911, "learning_rate": 2.503629340562225e-06, "loss": 0.0291, "step": 513025 }, { "epoch": 5.04, "grad_norm": 2.0607004165649414, "learning_rate": 2.5035052181079767e-06, "loss": 0.0589, "step": 513050 }, { "epoch": 5.04, "grad_norm": 6.453369617462158, "learning_rate": 2.5033810956537283e-06, "loss": 0.0384, "step": 513075 }, { "epoch": 5.04, "grad_norm": 0.4888441264629364, "learning_rate": 2.50325697319948e-06, "loss": 0.0635, "step": 513100 }, { "epoch": 5.05, "grad_norm": 8.70775032043457, "learning_rate": 2.503132850745231e-06, "loss": 0.0512, "step": 513125 }, { "epoch": 5.05, "grad_norm": 3.999209403991699, "learning_rate": 2.5030087282909832e-06, "loss": 0.0607, "step": 513150 }, { "epoch": 5.05, "grad_norm": 9.74996566772461, "learning_rate": 2.5028846058367345e-06, "loss": 0.0467, "step": 513175 }, { "epoch": 5.05, "grad_norm": 9.608871459960938, "learning_rate": 2.502760483382486e-06, "loss": 0.105, "step": 513200 }, { "epoch": 5.05, "grad_norm": 4.162483215332031, "learning_rate": 2.5026363609282377e-06, "loss": 0.0626, "step": 513225 }, { "epoch": 5.05, "grad_norm": 0.09343353658914566, "learning_rate": 2.502517203372159e-06, "loss": 0.0797, "step": 513250 }, { "epoch": 5.05, "grad_norm": 9.825738906860352, "learning_rate": 2.5023930809179104e-06, "loss": 0.0402, "step": 513275 }, { "epoch": 5.05, "grad_norm": 0.18010111153125763, "learning_rate": 2.5022689584636625e-06, "loss": 0.0883, "step": 513300 }, { "epoch": 5.05, "grad_norm": 13.362610816955566, "learning_rate": 2.5021448360094137e-06, "loss": 0.0642, "step": 513325 }, { "epoch": 5.05, "grad_norm": 2.174410104751587, "learning_rate": 2.502020713555165e-06, "loss": 0.0766, "step": 513350 }, { "epoch": 5.05, "grad_norm": 7.770009994506836, "learning_rate": 2.501896591100917e-06, "loss": 0.0487, "step": 513375 }, { "epoch": 5.05, "grad_norm": 3.596748113632202, "learning_rate": 2.501772468646668e-06, "loss": 0.0694, "step": 513400 }, { "epoch": 5.05, "grad_norm": 8.64266586303711, "learning_rate": 2.5016483461924198e-06, "loss": 0.0481, "step": 513425 }, { "epoch": 5.05, "grad_norm": 0.6663753390312195, "learning_rate": 2.5015242237381714e-06, "loss": 0.0907, "step": 513450 }, { "epoch": 5.05, "grad_norm": 7.446663856506348, "learning_rate": 2.501400101283923e-06, "loss": 0.0387, "step": 513475 }, { "epoch": 5.05, "grad_norm": 4.966952800750732, "learning_rate": 2.5012759788296743e-06, "loss": 0.0764, "step": 513500 }, { "epoch": 5.05, "grad_norm": 9.065389633178711, "learning_rate": 2.5011518563754263e-06, "loss": 0.0475, "step": 513525 }, { "epoch": 5.05, "grad_norm": 0.27543580532073975, "learning_rate": 2.5010277339211775e-06, "loss": 0.0763, "step": 513550 }, { "epoch": 5.05, "grad_norm": 4.605442047119141, "learning_rate": 2.5009036114669287e-06, "loss": 0.0504, "step": 513575 }, { "epoch": 5.05, "grad_norm": 2.6487741470336914, "learning_rate": 2.500779489012681e-06, "loss": 0.0921, "step": 513600 }, { "epoch": 5.05, "grad_norm": 11.644571304321289, "learning_rate": 2.500655366558432e-06, "loss": 0.0322, "step": 513625 }, { "epoch": 5.05, "grad_norm": 1.8146570920944214, "learning_rate": 2.5005312441041836e-06, "loss": 0.0884, "step": 513650 }, { "epoch": 5.05, "grad_norm": 5.118401050567627, "learning_rate": 2.5004071216499353e-06, "loss": 0.0465, "step": 513675 }, { "epoch": 5.05, "grad_norm": 0.6647889614105225, "learning_rate": 2.500282999195687e-06, "loss": 0.0679, "step": 513700 }, { "epoch": 5.05, "grad_norm": 0.9721606373786926, "learning_rate": 2.500158876741438e-06, "loss": 0.0347, "step": 513725 }, { "epoch": 5.05, "grad_norm": 1.071851134300232, "learning_rate": 2.50003475428719e-06, "loss": 0.0886, "step": 513750 }, { "epoch": 5.05, "grad_norm": 15.12774658203125, "learning_rate": 2.4999106318329414e-06, "loss": 0.0569, "step": 513775 }, { "epoch": 5.05, "grad_norm": 3.759992837905884, "learning_rate": 2.4997865093786926e-06, "loss": 0.0817, "step": 513800 }, { "epoch": 5.05, "grad_norm": 13.413105964660645, "learning_rate": 2.4996623869244442e-06, "loss": 0.0373, "step": 513825 }, { "epoch": 5.05, "grad_norm": 0.9815235137939453, "learning_rate": 2.499538264470196e-06, "loss": 0.0951, "step": 513850 }, { "epoch": 5.05, "grad_norm": 15.865348815917969, "learning_rate": 2.4994141420159475e-06, "loss": 0.0578, "step": 513875 }, { "epoch": 5.05, "grad_norm": 0.11051543802022934, "learning_rate": 2.499290019561699e-06, "loss": 0.0767, "step": 513900 }, { "epoch": 5.05, "grad_norm": 8.129337310791016, "learning_rate": 2.4991658971074508e-06, "loss": 0.0452, "step": 513925 }, { "epoch": 5.05, "grad_norm": 2.446873426437378, "learning_rate": 2.499041774653202e-06, "loss": 0.0767, "step": 513950 }, { "epoch": 5.05, "grad_norm": 6.404628276824951, "learning_rate": 2.4989176521989536e-06, "loss": 0.054, "step": 513975 }, { "epoch": 5.05, "grad_norm": 1.7897088527679443, "learning_rate": 2.4987935297447053e-06, "loss": 0.083, "step": 514000 }, { "epoch": 5.05, "grad_norm": 12.668076515197754, "learning_rate": 2.4986694072904565e-06, "loss": 0.0311, "step": 514025 }, { "epoch": 5.05, "grad_norm": 10.7055082321167, "learning_rate": 2.498545284836208e-06, "loss": 0.0817, "step": 514050 }, { "epoch": 5.05, "grad_norm": 1.7924481630325317, "learning_rate": 2.4984211623819597e-06, "loss": 0.04, "step": 514075 }, { "epoch": 5.05, "grad_norm": 7.286639213562012, "learning_rate": 2.4982970399277114e-06, "loss": 0.0779, "step": 514100 }, { "epoch": 5.05, "grad_norm": 9.542351722717285, "learning_rate": 2.498172917473463e-06, "loss": 0.0506, "step": 514125 }, { "epoch": 5.06, "grad_norm": 2.114553928375244, "learning_rate": 2.4980487950192147e-06, "loss": 0.0748, "step": 514150 }, { "epoch": 5.06, "grad_norm": 18.67902374267578, "learning_rate": 2.497924672564966e-06, "loss": 0.0545, "step": 514175 }, { "epoch": 5.06, "grad_norm": 3.8704702854156494, "learning_rate": 2.4978005501107175e-06, "loss": 0.0729, "step": 514200 }, { "epoch": 5.06, "grad_norm": 8.54708480834961, "learning_rate": 2.4976764276564687e-06, "loss": 0.0576, "step": 514225 }, { "epoch": 5.06, "grad_norm": 5.1839189529418945, "learning_rate": 2.4975523052022203e-06, "loss": 0.0891, "step": 514250 }, { "epoch": 5.06, "grad_norm": 2.0949933528900146, "learning_rate": 2.497428182747972e-06, "loss": 0.0459, "step": 514275 }, { "epoch": 5.06, "grad_norm": 7.098334312438965, "learning_rate": 2.4973040602937236e-06, "loss": 0.0852, "step": 514300 }, { "epoch": 5.06, "grad_norm": 13.700770378112793, "learning_rate": 2.4971799378394753e-06, "loss": 0.0472, "step": 514325 }, { "epoch": 5.06, "grad_norm": 3.8135955333709717, "learning_rate": 2.497055815385227e-06, "loss": 0.0806, "step": 514350 }, { "epoch": 5.06, "grad_norm": 11.975516319274902, "learning_rate": 2.496931692930978e-06, "loss": 0.0405, "step": 514375 }, { "epoch": 5.06, "grad_norm": 2.8400039672851562, "learning_rate": 2.4968075704767297e-06, "loss": 0.0845, "step": 514400 }, { "epoch": 5.06, "grad_norm": 15.578469276428223, "learning_rate": 2.4966834480224814e-06, "loss": 0.0738, "step": 514425 }, { "epoch": 5.06, "grad_norm": 0.2938912808895111, "learning_rate": 2.4965593255682326e-06, "loss": 0.0764, "step": 514450 }, { "epoch": 5.06, "grad_norm": 6.721729755401611, "learning_rate": 2.4964352031139842e-06, "loss": 0.0463, "step": 514475 }, { "epoch": 5.06, "grad_norm": 0.012206321582198143, "learning_rate": 2.496311080659736e-06, "loss": 0.0875, "step": 514500 }, { "epoch": 5.06, "grad_norm": 3.1402835845947266, "learning_rate": 2.4961869582054875e-06, "loss": 0.0242, "step": 514525 }, { "epoch": 5.06, "grad_norm": 0.5636016726493835, "learning_rate": 2.496062835751239e-06, "loss": 0.0618, "step": 514550 }, { "epoch": 5.06, "grad_norm": 7.404031276702881, "learning_rate": 2.4959387132969908e-06, "loss": 0.0378, "step": 514575 }, { "epoch": 5.06, "grad_norm": 0.9467582702636719, "learning_rate": 2.495814590842742e-06, "loss": 0.078, "step": 514600 }, { "epoch": 5.06, "grad_norm": 11.947073936462402, "learning_rate": 2.4956904683884936e-06, "loss": 0.0502, "step": 514625 }, { "epoch": 5.06, "grad_norm": 7.210662841796875, "learning_rate": 2.495566345934245e-06, "loss": 0.0779, "step": 514650 }, { "epoch": 5.06, "grad_norm": 1.7858176231384277, "learning_rate": 2.4954422234799964e-06, "loss": 0.0498, "step": 514675 }, { "epoch": 5.06, "grad_norm": 2.244033098220825, "learning_rate": 2.495318101025748e-06, "loss": 0.076, "step": 514700 }, { "epoch": 5.06, "grad_norm": 15.888611793518066, "learning_rate": 2.4951939785714997e-06, "loss": 0.0534, "step": 514725 }, { "epoch": 5.06, "grad_norm": 2.122204303741455, "learning_rate": 2.4950698561172514e-06, "loss": 0.0888, "step": 514750 }, { "epoch": 5.06, "grad_norm": 6.9349446296691895, "learning_rate": 2.494945733663003e-06, "loss": 0.0453, "step": 514775 }, { "epoch": 5.06, "grad_norm": 3.8992087841033936, "learning_rate": 2.494821611208754e-06, "loss": 0.0803, "step": 514800 }, { "epoch": 5.06, "grad_norm": 8.673173904418945, "learning_rate": 2.494697488754506e-06, "loss": 0.0474, "step": 514825 }, { "epoch": 5.06, "grad_norm": 3.0591769218444824, "learning_rate": 2.4945733663002575e-06, "loss": 0.0853, "step": 514850 }, { "epoch": 5.06, "grad_norm": 6.338992118835449, "learning_rate": 2.4944492438460087e-06, "loss": 0.0375, "step": 514875 }, { "epoch": 5.06, "grad_norm": 6.102510929107666, "learning_rate": 2.4943251213917603e-06, "loss": 0.0882, "step": 514900 }, { "epoch": 5.06, "grad_norm": 24.806228637695312, "learning_rate": 2.494200998937512e-06, "loss": 0.0513, "step": 514925 }, { "epoch": 5.06, "grad_norm": 0.122382253408432, "learning_rate": 2.4940768764832636e-06, "loss": 0.098, "step": 514950 }, { "epoch": 5.06, "grad_norm": 10.86831283569336, "learning_rate": 2.4939527540290152e-06, "loss": 0.0502, "step": 514975 }, { "epoch": 5.06, "grad_norm": 3.0539379119873047, "learning_rate": 2.493828631574767e-06, "loss": 0.068, "step": 515000 }, { "epoch": 5.06, "grad_norm": 4.610551834106445, "learning_rate": 2.493704509120518e-06, "loss": 0.0354, "step": 515025 }, { "epoch": 5.06, "grad_norm": 0.24760086834430695, "learning_rate": 2.4935803866662697e-06, "loss": 0.0807, "step": 515050 }, { "epoch": 5.06, "grad_norm": 11.596527099609375, "learning_rate": 2.493456264212021e-06, "loss": 0.0487, "step": 515075 }, { "epoch": 5.06, "grad_norm": 0.29629597067832947, "learning_rate": 2.4933321417577725e-06, "loss": 0.0713, "step": 515100 }, { "epoch": 5.06, "grad_norm": 10.43742847442627, "learning_rate": 2.493208019303524e-06, "loss": 0.0544, "step": 515125 }, { "epoch": 5.07, "grad_norm": 2.185854911804199, "learning_rate": 2.493083896849276e-06, "loss": 0.0677, "step": 515150 }, { "epoch": 5.07, "grad_norm": 8.055647850036621, "learning_rate": 2.4929597743950275e-06, "loss": 0.0384, "step": 515175 }, { "epoch": 5.07, "grad_norm": 2.1208882331848145, "learning_rate": 2.492835651940779e-06, "loss": 0.074, "step": 515200 }, { "epoch": 5.07, "grad_norm": 7.906382083892822, "learning_rate": 2.4927115294865303e-06, "loss": 0.0556, "step": 515225 }, { "epoch": 5.07, "grad_norm": 1.7371656894683838, "learning_rate": 2.492587407032282e-06, "loss": 0.086, "step": 515250 }, { "epoch": 5.07, "grad_norm": 15.457063674926758, "learning_rate": 2.4924632845780336e-06, "loss": 0.0485, "step": 515275 }, { "epoch": 5.07, "grad_norm": 6.1160173416137695, "learning_rate": 2.4923391621237848e-06, "loss": 0.0644, "step": 515300 }, { "epoch": 5.07, "grad_norm": 10.577275276184082, "learning_rate": 2.4922150396695364e-06, "loss": 0.0537, "step": 515325 }, { "epoch": 5.07, "grad_norm": 0.038705769926309586, "learning_rate": 2.492090917215288e-06, "loss": 0.0625, "step": 515350 }, { "epoch": 5.07, "grad_norm": 14.864619255065918, "learning_rate": 2.4919667947610397e-06, "loss": 0.0557, "step": 515375 }, { "epoch": 5.07, "grad_norm": 0.5207843780517578, "learning_rate": 2.4918426723067913e-06, "loss": 0.0963, "step": 515400 }, { "epoch": 5.07, "grad_norm": 6.404531478881836, "learning_rate": 2.491718549852543e-06, "loss": 0.0513, "step": 515425 }, { "epoch": 5.07, "grad_norm": 3.501701831817627, "learning_rate": 2.491594427398294e-06, "loss": 0.1115, "step": 515450 }, { "epoch": 5.07, "grad_norm": 5.660739421844482, "learning_rate": 2.491470304944046e-06, "loss": 0.0411, "step": 515475 }, { "epoch": 5.07, "grad_norm": 4.640611171722412, "learning_rate": 2.491346182489797e-06, "loss": 0.0737, "step": 515500 }, { "epoch": 5.07, "grad_norm": 17.074087142944336, "learning_rate": 2.4912220600355486e-06, "loss": 0.0564, "step": 515525 }, { "epoch": 5.07, "grad_norm": 0.12748828530311584, "learning_rate": 2.4910979375813003e-06, "loss": 0.0741, "step": 515550 }, { "epoch": 5.07, "grad_norm": 15.417014122009277, "learning_rate": 2.490973815127052e-06, "loss": 0.0421, "step": 515575 }, { "epoch": 5.07, "grad_norm": 3.4259674549102783, "learning_rate": 2.4908496926728036e-06, "loss": 0.0954, "step": 515600 }, { "epoch": 5.07, "grad_norm": 10.659501075744629, "learning_rate": 2.490725570218555e-06, "loss": 0.056, "step": 515625 }, { "epoch": 5.07, "grad_norm": 5.806158065795898, "learning_rate": 2.4906014477643064e-06, "loss": 0.0847, "step": 515650 }, { "epoch": 5.07, "grad_norm": 4.791604995727539, "learning_rate": 2.490477325310058e-06, "loss": 0.0589, "step": 515675 }, { "epoch": 5.07, "grad_norm": 5.753006935119629, "learning_rate": 2.4903532028558097e-06, "loss": 0.0959, "step": 515700 }, { "epoch": 5.07, "grad_norm": 14.369690895080566, "learning_rate": 2.490229080401561e-06, "loss": 0.049, "step": 515725 }, { "epoch": 5.07, "grad_norm": 4.773589134216309, "learning_rate": 2.4901049579473125e-06, "loss": 0.1126, "step": 515750 }, { "epoch": 5.07, "grad_norm": 23.876474380493164, "learning_rate": 2.489980835493064e-06, "loss": 0.0689, "step": 515775 }, { "epoch": 5.07, "grad_norm": 8.824248313903809, "learning_rate": 2.4898567130388158e-06, "loss": 0.0762, "step": 515800 }, { "epoch": 5.07, "grad_norm": 7.269692420959473, "learning_rate": 2.4897325905845674e-06, "loss": 0.06, "step": 515825 }, { "epoch": 5.07, "grad_norm": 0.4210115075111389, "learning_rate": 2.489608468130319e-06, "loss": 0.0767, "step": 515850 }, { "epoch": 5.07, "grad_norm": 16.090373992919922, "learning_rate": 2.4894843456760703e-06, "loss": 0.0419, "step": 515875 }, { "epoch": 5.07, "grad_norm": 6.951277256011963, "learning_rate": 2.489360223221822e-06, "loss": 0.0707, "step": 515900 }, { "epoch": 5.07, "grad_norm": 7.961010456085205, "learning_rate": 2.4892361007675735e-06, "loss": 0.0576, "step": 515925 }, { "epoch": 5.07, "grad_norm": 0.37754809856414795, "learning_rate": 2.489111978313325e-06, "loss": 0.0874, "step": 515950 }, { "epoch": 5.07, "grad_norm": 5.290709018707275, "learning_rate": 2.488987855859077e-06, "loss": 0.0388, "step": 515975 }, { "epoch": 5.07, "grad_norm": 1.4240068197250366, "learning_rate": 2.488868698302998e-06, "loss": 0.0823, "step": 516000 }, { "epoch": 5.07, "grad_norm": 3.137192487716675, "learning_rate": 2.4887445758487495e-06, "loss": 0.0427, "step": 516025 }, { "epoch": 5.07, "grad_norm": 4.973603248596191, "learning_rate": 2.488620453394501e-06, "loss": 0.0985, "step": 516050 }, { "epoch": 5.07, "grad_norm": 9.095081329345703, "learning_rate": 2.4884963309402527e-06, "loss": 0.0371, "step": 516075 }, { "epoch": 5.07, "grad_norm": 3.979579448699951, "learning_rate": 2.488372208486004e-06, "loss": 0.071, "step": 516100 }, { "epoch": 5.07, "grad_norm": 14.487346649169922, "learning_rate": 2.4882480860317556e-06, "loss": 0.0722, "step": 516125 }, { "epoch": 5.07, "grad_norm": 0.45200875401496887, "learning_rate": 2.4881239635775072e-06, "loss": 0.0763, "step": 516150 }, { "epoch": 5.08, "grad_norm": 2.303379774093628, "learning_rate": 2.487999841123259e-06, "loss": 0.0455, "step": 516175 }, { "epoch": 5.08, "grad_norm": 4.088986396789551, "learning_rate": 2.4878757186690105e-06, "loss": 0.0961, "step": 516200 }, { "epoch": 5.08, "grad_norm": 3.0722692012786865, "learning_rate": 2.487751596214762e-06, "loss": 0.0437, "step": 516225 }, { "epoch": 5.08, "grad_norm": 1.4108670949935913, "learning_rate": 2.4876274737605133e-06, "loss": 0.1184, "step": 516250 }, { "epoch": 5.08, "grad_norm": 4.539365291595459, "learning_rate": 2.487503351306265e-06, "loss": 0.0405, "step": 516275 }, { "epoch": 5.08, "grad_norm": 1.8218899965286255, "learning_rate": 2.4873792288520166e-06, "loss": 0.0531, "step": 516300 }, { "epoch": 5.08, "grad_norm": 12.089374542236328, "learning_rate": 2.487255106397768e-06, "loss": 0.0523, "step": 516325 }, { "epoch": 5.08, "grad_norm": 0.47819069027900696, "learning_rate": 2.4871309839435195e-06, "loss": 0.1118, "step": 516350 }, { "epoch": 5.08, "grad_norm": 6.772538661956787, "learning_rate": 2.487006861489271e-06, "loss": 0.047, "step": 516375 }, { "epoch": 5.08, "grad_norm": 5.4744553565979, "learning_rate": 2.4868827390350227e-06, "loss": 0.1036, "step": 516400 }, { "epoch": 5.08, "grad_norm": 13.892984390258789, "learning_rate": 2.4867586165807744e-06, "loss": 0.0418, "step": 516425 }, { "epoch": 5.08, "grad_norm": 10.074262619018555, "learning_rate": 2.486634494126526e-06, "loss": 0.0793, "step": 516450 }, { "epoch": 5.08, "grad_norm": 12.836458206176758, "learning_rate": 2.486510371672277e-06, "loss": 0.0552, "step": 516475 }, { "epoch": 5.08, "grad_norm": 9.30930233001709, "learning_rate": 2.486386249218029e-06, "loss": 0.0929, "step": 516500 }, { "epoch": 5.08, "grad_norm": 9.595961570739746, "learning_rate": 2.48626212676378e-06, "loss": 0.0515, "step": 516525 }, { "epoch": 5.08, "grad_norm": 2.275583505630493, "learning_rate": 2.4861380043095317e-06, "loss": 0.1142, "step": 516550 }, { "epoch": 5.08, "grad_norm": 9.81759262084961, "learning_rate": 2.4860138818552833e-06, "loss": 0.0385, "step": 516575 }, { "epoch": 5.08, "grad_norm": 0.5929746627807617, "learning_rate": 2.485889759401035e-06, "loss": 0.1026, "step": 516600 }, { "epoch": 5.08, "grad_norm": 11.680702209472656, "learning_rate": 2.4857656369467866e-06, "loss": 0.0501, "step": 516625 }, { "epoch": 5.08, "grad_norm": 0.04926367104053497, "learning_rate": 2.4856415144925382e-06, "loss": 0.0724, "step": 516650 }, { "epoch": 5.08, "grad_norm": 7.637770652770996, "learning_rate": 2.4855173920382894e-06, "loss": 0.0506, "step": 516675 }, { "epoch": 5.08, "grad_norm": 1.9881417751312256, "learning_rate": 2.485393269584041e-06, "loss": 0.0822, "step": 516700 }, { "epoch": 5.08, "grad_norm": 6.773828983306885, "learning_rate": 2.4852691471297927e-06, "loss": 0.0504, "step": 516725 }, { "epoch": 5.08, "grad_norm": 0.05978565290570259, "learning_rate": 2.485145024675544e-06, "loss": 0.0526, "step": 516750 }, { "epoch": 5.08, "grad_norm": 10.390559196472168, "learning_rate": 2.4850209022212956e-06, "loss": 0.0532, "step": 516775 }, { "epoch": 5.08, "grad_norm": 4.350057601928711, "learning_rate": 2.484896779767047e-06, "loss": 0.0839, "step": 516800 }, { "epoch": 5.08, "grad_norm": 2.1471455097198486, "learning_rate": 2.484772657312799e-06, "loss": 0.0377, "step": 516825 }, { "epoch": 5.08, "grad_norm": 1.3506932258605957, "learning_rate": 2.4846485348585505e-06, "loss": 0.1229, "step": 516850 }, { "epoch": 5.08, "grad_norm": 13.92784309387207, "learning_rate": 2.484524412404302e-06, "loss": 0.0551, "step": 516875 }, { "epoch": 5.08, "grad_norm": 0.32521143555641174, "learning_rate": 2.4844002899500533e-06, "loss": 0.0614, "step": 516900 }, { "epoch": 5.08, "grad_norm": 14.04892349243164, "learning_rate": 2.484276167495805e-06, "loss": 0.0471, "step": 516925 }, { "epoch": 5.08, "grad_norm": 0.16992293298244476, "learning_rate": 2.484152045041556e-06, "loss": 0.0782, "step": 516950 }, { "epoch": 5.08, "grad_norm": 7.292040824890137, "learning_rate": 2.4840279225873078e-06, "loss": 0.0585, "step": 516975 }, { "epoch": 5.08, "grad_norm": 0.06887868791818619, "learning_rate": 2.4839038001330594e-06, "loss": 0.0588, "step": 517000 }, { "epoch": 5.08, "grad_norm": 5.9338555335998535, "learning_rate": 2.483779677678811e-06, "loss": 0.0486, "step": 517025 }, { "epoch": 5.08, "grad_norm": 0.08162838965654373, "learning_rate": 2.4836555552245627e-06, "loss": 0.0701, "step": 517050 }, { "epoch": 5.08, "grad_norm": 6.175993919372559, "learning_rate": 2.4835314327703143e-06, "loss": 0.0488, "step": 517075 }, { "epoch": 5.08, "grad_norm": 0.5954530239105225, "learning_rate": 2.4834073103160655e-06, "loss": 0.0762, "step": 517100 }, { "epoch": 5.08, "grad_norm": 3.0554463863372803, "learning_rate": 2.483283187861817e-06, "loss": 0.0363, "step": 517125 }, { "epoch": 5.08, "grad_norm": 0.020799782127141953, "learning_rate": 2.483159065407569e-06, "loss": 0.0841, "step": 517150 }, { "epoch": 5.08, "grad_norm": 5.553863525390625, "learning_rate": 2.48303494295332e-06, "loss": 0.0447, "step": 517175 }, { "epoch": 5.09, "grad_norm": 0.008297169581055641, "learning_rate": 2.4829108204990717e-06, "loss": 0.0885, "step": 517200 }, { "epoch": 5.09, "grad_norm": 19.943363189697266, "learning_rate": 2.4827866980448233e-06, "loss": 0.0557, "step": 517225 }, { "epoch": 5.09, "grad_norm": 5.438060283660889, "learning_rate": 2.482662575590575e-06, "loss": 0.1016, "step": 517250 }, { "epoch": 5.09, "grad_norm": 9.605262756347656, "learning_rate": 2.4825384531363266e-06, "loss": 0.0481, "step": 517275 }, { "epoch": 5.09, "grad_norm": 0.13721789419651031, "learning_rate": 2.482414330682078e-06, "loss": 0.0878, "step": 517300 }, { "epoch": 5.09, "grad_norm": 6.31912899017334, "learning_rate": 2.4822902082278294e-06, "loss": 0.0398, "step": 517325 }, { "epoch": 5.09, "grad_norm": 1.6926263570785522, "learning_rate": 2.482166085773581e-06, "loss": 0.0649, "step": 517350 }, { "epoch": 5.09, "grad_norm": 13.517926216125488, "learning_rate": 2.4820419633193323e-06, "loss": 0.0588, "step": 517375 }, { "epoch": 5.09, "grad_norm": 3.25905442237854, "learning_rate": 2.481917840865084e-06, "loss": 0.0548, "step": 517400 }, { "epoch": 5.09, "grad_norm": 6.513832092285156, "learning_rate": 2.4817937184108355e-06, "loss": 0.0576, "step": 517425 }, { "epoch": 5.09, "grad_norm": 4.247815132141113, "learning_rate": 2.481669595956587e-06, "loss": 0.0689, "step": 517450 }, { "epoch": 5.09, "grad_norm": 11.257926940917969, "learning_rate": 2.481545473502339e-06, "loss": 0.046, "step": 517475 }, { "epoch": 5.09, "grad_norm": 3.1873128414154053, "learning_rate": 2.4814213510480904e-06, "loss": 0.0711, "step": 517500 }, { "epoch": 5.09, "grad_norm": 5.366831302642822, "learning_rate": 2.4812972285938416e-06, "loss": 0.0381, "step": 517525 }, { "epoch": 5.09, "grad_norm": 0.05350109562277794, "learning_rate": 2.4811731061395933e-06, "loss": 0.0794, "step": 517550 }, { "epoch": 5.09, "grad_norm": 11.900477409362793, "learning_rate": 2.481048983685345e-06, "loss": 0.0279, "step": 517575 }, { "epoch": 5.09, "grad_norm": 1.5524345636367798, "learning_rate": 2.480924861231096e-06, "loss": 0.0649, "step": 517600 }, { "epoch": 5.09, "grad_norm": 3.7976343631744385, "learning_rate": 2.4808007387768478e-06, "loss": 0.0389, "step": 517625 }, { "epoch": 5.09, "grad_norm": 0.12659646570682526, "learning_rate": 2.4806766163225994e-06, "loss": 0.0675, "step": 517650 }, { "epoch": 5.09, "grad_norm": 11.245650291442871, "learning_rate": 2.480552493868351e-06, "loss": 0.0387, "step": 517675 }, { "epoch": 5.09, "grad_norm": 0.3060304522514343, "learning_rate": 2.4804283714141027e-06, "loss": 0.0738, "step": 517700 }, { "epoch": 5.09, "grad_norm": 12.555319786071777, "learning_rate": 2.4803042489598543e-06, "loss": 0.052, "step": 517725 }, { "epoch": 5.09, "grad_norm": 6.788815498352051, "learning_rate": 2.4801801265056055e-06, "loss": 0.0886, "step": 517750 }, { "epoch": 5.09, "grad_norm": 9.839305877685547, "learning_rate": 2.480056004051357e-06, "loss": 0.0464, "step": 517775 }, { "epoch": 5.09, "grad_norm": 0.40929466485977173, "learning_rate": 2.4799318815971084e-06, "loss": 0.0951, "step": 517800 }, { "epoch": 5.09, "grad_norm": 1.323111891746521, "learning_rate": 2.47980775914286e-06, "loss": 0.0545, "step": 517825 }, { "epoch": 5.09, "grad_norm": 2.339096784591675, "learning_rate": 2.4796836366886116e-06, "loss": 0.062, "step": 517850 }, { "epoch": 5.09, "grad_norm": 8.210098266601562, "learning_rate": 2.4795595142343633e-06, "loss": 0.0453, "step": 517875 }, { "epoch": 5.09, "grad_norm": 5.275934219360352, "learning_rate": 2.479435391780115e-06, "loss": 0.0989, "step": 517900 }, { "epoch": 5.09, "grad_norm": 9.499585151672363, "learning_rate": 2.4793112693258665e-06, "loss": 0.0571, "step": 517925 }, { "epoch": 5.09, "grad_norm": 2.2121737003326416, "learning_rate": 2.4791871468716177e-06, "loss": 0.0797, "step": 517950 }, { "epoch": 5.09, "grad_norm": 10.491006851196289, "learning_rate": 2.4790630244173694e-06, "loss": 0.0479, "step": 517975 }, { "epoch": 5.09, "grad_norm": 0.129624143242836, "learning_rate": 2.478938901963121e-06, "loss": 0.0697, "step": 518000 }, { "epoch": 5.09, "grad_norm": 11.21075439453125, "learning_rate": 2.4788147795088722e-06, "loss": 0.0507, "step": 518025 }, { "epoch": 5.09, "grad_norm": 0.7268576622009277, "learning_rate": 2.478690657054624e-06, "loss": 0.08, "step": 518050 }, { "epoch": 5.09, "grad_norm": 12.956698417663574, "learning_rate": 2.4785665346003755e-06, "loss": 0.0497, "step": 518075 }, { "epoch": 5.09, "grad_norm": 7.022928714752197, "learning_rate": 2.478442412146127e-06, "loss": 0.07, "step": 518100 }, { "epoch": 5.09, "grad_norm": 8.50759506225586, "learning_rate": 2.4783182896918788e-06, "loss": 0.0522, "step": 518125 }, { "epoch": 5.09, "grad_norm": 3.4440758228302, "learning_rate": 2.4781941672376304e-06, "loss": 0.0809, "step": 518150 }, { "epoch": 5.09, "grad_norm": 16.981855392456055, "learning_rate": 2.4780700447833816e-06, "loss": 0.0433, "step": 518175 }, { "epoch": 5.1, "grad_norm": 2.415996551513672, "learning_rate": 2.4779459223291332e-06, "loss": 0.0964, "step": 518200 }, { "epoch": 5.1, "grad_norm": 7.222164154052734, "learning_rate": 2.4778217998748845e-06, "loss": 0.0528, "step": 518225 }, { "epoch": 5.1, "grad_norm": 4.148660659790039, "learning_rate": 2.477697677420636e-06, "loss": 0.0799, "step": 518250 }, { "epoch": 5.1, "grad_norm": 5.376513957977295, "learning_rate": 2.4775735549663877e-06, "loss": 0.0488, "step": 518275 }, { "epoch": 5.1, "grad_norm": 6.749133110046387, "learning_rate": 2.4774494325121394e-06, "loss": 0.0804, "step": 518300 }, { "epoch": 5.1, "grad_norm": 13.91227912902832, "learning_rate": 2.477325310057891e-06, "loss": 0.0565, "step": 518325 }, { "epoch": 5.1, "grad_norm": 0.009690394625067711, "learning_rate": 2.4772061525018124e-06, "loss": 0.0932, "step": 518350 }, { "epoch": 5.1, "grad_norm": 4.589074611663818, "learning_rate": 2.477082030047564e-06, "loss": 0.042, "step": 518375 }, { "epoch": 5.1, "grad_norm": 0.3320138156414032, "learning_rate": 2.4769579075933153e-06, "loss": 0.0727, "step": 518400 }, { "epoch": 5.1, "grad_norm": 6.279666900634766, "learning_rate": 2.476833785139067e-06, "loss": 0.0596, "step": 518425 }, { "epoch": 5.1, "grad_norm": 5.768310070037842, "learning_rate": 2.4767096626848186e-06, "loss": 0.0921, "step": 518450 }, { "epoch": 5.1, "grad_norm": 10.264625549316406, "learning_rate": 2.4765855402305698e-06, "loss": 0.0392, "step": 518475 }, { "epoch": 5.1, "grad_norm": 0.14182117581367493, "learning_rate": 2.4764614177763214e-06, "loss": 0.0684, "step": 518500 }, { "epoch": 5.1, "grad_norm": 13.981040000915527, "learning_rate": 2.476337295322073e-06, "loss": 0.0516, "step": 518525 }, { "epoch": 5.1, "grad_norm": 8.09691047668457, "learning_rate": 2.4762131728678247e-06, "loss": 0.0662, "step": 518550 }, { "epoch": 5.1, "grad_norm": 13.255099296569824, "learning_rate": 2.4760890504135763e-06, "loss": 0.0424, "step": 518575 }, { "epoch": 5.1, "grad_norm": 4.7585577964782715, "learning_rate": 2.475964927959328e-06, "loss": 0.0874, "step": 518600 }, { "epoch": 5.1, "grad_norm": 1.2946157455444336, "learning_rate": 2.475840805505079e-06, "loss": 0.0431, "step": 518625 }, { "epoch": 5.1, "grad_norm": 6.947099685668945, "learning_rate": 2.475716683050831e-06, "loss": 0.0754, "step": 518650 }, { "epoch": 5.1, "grad_norm": 10.16558837890625, "learning_rate": 2.4755925605965824e-06, "loss": 0.0503, "step": 518675 }, { "epoch": 5.1, "grad_norm": 8.384181022644043, "learning_rate": 2.4754684381423336e-06, "loss": 0.0695, "step": 518700 }, { "epoch": 5.1, "grad_norm": 5.885188102722168, "learning_rate": 2.4753443156880853e-06, "loss": 0.0446, "step": 518725 }, { "epoch": 5.1, "grad_norm": 4.084747314453125, "learning_rate": 2.475220193233837e-06, "loss": 0.0876, "step": 518750 }, { "epoch": 5.1, "grad_norm": 5.235370635986328, "learning_rate": 2.4750960707795885e-06, "loss": 0.0421, "step": 518775 }, { "epoch": 5.1, "grad_norm": 1.2235972881317139, "learning_rate": 2.47497194832534e-06, "loss": 0.0815, "step": 518800 }, { "epoch": 5.1, "grad_norm": 7.297369956970215, "learning_rate": 2.4748478258710914e-06, "loss": 0.054, "step": 518825 }, { "epoch": 5.1, "grad_norm": 0.7856004238128662, "learning_rate": 2.474723703416843e-06, "loss": 0.1008, "step": 518850 }, { "epoch": 5.1, "grad_norm": 3.81372332572937, "learning_rate": 2.4745995809625947e-06, "loss": 0.0381, "step": 518875 }, { "epoch": 5.1, "grad_norm": 1.147692322731018, "learning_rate": 2.4744754585083463e-06, "loss": 0.0703, "step": 518900 }, { "epoch": 5.1, "grad_norm": 10.619967460632324, "learning_rate": 2.474351336054098e-06, "loss": 0.0603, "step": 518925 }, { "epoch": 5.1, "grad_norm": 0.05515359342098236, "learning_rate": 2.4742272135998496e-06, "loss": 0.065, "step": 518950 }, { "epoch": 5.1, "grad_norm": 12.281474113464355, "learning_rate": 2.4741030911456008e-06, "loss": 0.0546, "step": 518975 }, { "epoch": 5.1, "grad_norm": 0.2662196159362793, "learning_rate": 2.4739789686913524e-06, "loss": 0.078, "step": 519000 }, { "epoch": 5.1, "grad_norm": 5.345423221588135, "learning_rate": 2.473854846237104e-06, "loss": 0.0567, "step": 519025 }, { "epoch": 5.1, "grad_norm": 3.2328972816467285, "learning_rate": 2.4737307237828553e-06, "loss": 0.0927, "step": 519050 }, { "epoch": 5.1, "grad_norm": 7.956839084625244, "learning_rate": 2.473606601328607e-06, "loss": 0.0523, "step": 519075 }, { "epoch": 5.1, "grad_norm": 0.552409827709198, "learning_rate": 2.4734824788743585e-06, "loss": 0.0948, "step": 519100 }, { "epoch": 5.1, "grad_norm": 6.1877641677856445, "learning_rate": 2.47335835642011e-06, "loss": 0.0632, "step": 519125 }, { "epoch": 5.1, "grad_norm": 1.0679705142974854, "learning_rate": 2.473234233965862e-06, "loss": 0.092, "step": 519150 }, { "epoch": 5.1, "grad_norm": 12.653056144714355, "learning_rate": 2.4731101115116134e-06, "loss": 0.061, "step": 519175 }, { "epoch": 5.1, "grad_norm": 0.06673029810190201, "learning_rate": 2.4729859890573646e-06, "loss": 0.0763, "step": 519200 }, { "epoch": 5.11, "grad_norm": 6.92647647857666, "learning_rate": 2.4728618666031163e-06, "loss": 0.0335, "step": 519225 }, { "epoch": 5.11, "grad_norm": 4.9120283126831055, "learning_rate": 2.4727377441488675e-06, "loss": 0.0778, "step": 519250 }, { "epoch": 5.11, "grad_norm": 16.56147003173828, "learning_rate": 2.472613621694619e-06, "loss": 0.0406, "step": 519275 }, { "epoch": 5.11, "grad_norm": 4.269128322601318, "learning_rate": 2.4724894992403708e-06, "loss": 0.0736, "step": 519300 }, { "epoch": 5.11, "grad_norm": 3.357414484024048, "learning_rate": 2.4723653767861224e-06, "loss": 0.0584, "step": 519325 }, { "epoch": 5.11, "grad_norm": 0.2767917513847351, "learning_rate": 2.472241254331874e-06, "loss": 0.072, "step": 519350 }, { "epoch": 5.11, "grad_norm": 23.752914428710938, "learning_rate": 2.4721171318776257e-06, "loss": 0.0637, "step": 519375 }, { "epoch": 5.11, "grad_norm": 2.054722309112549, "learning_rate": 2.471993009423377e-06, "loss": 0.0951, "step": 519400 }, { "epoch": 5.11, "grad_norm": 11.101065635681152, "learning_rate": 2.4718688869691285e-06, "loss": 0.0395, "step": 519425 }, { "epoch": 5.11, "grad_norm": 1.647445559501648, "learning_rate": 2.47174476451488e-06, "loss": 0.0798, "step": 519450 }, { "epoch": 5.11, "grad_norm": 12.400115013122559, "learning_rate": 2.4716206420606314e-06, "loss": 0.0549, "step": 519475 }, { "epoch": 5.11, "grad_norm": 4.8852410316467285, "learning_rate": 2.471496519606383e-06, "loss": 0.0768, "step": 519500 }, { "epoch": 5.11, "grad_norm": 4.817668437957764, "learning_rate": 2.4713723971521346e-06, "loss": 0.0662, "step": 519525 }, { "epoch": 5.11, "grad_norm": 0.4816308915615082, "learning_rate": 2.4712482746978863e-06, "loss": 0.0936, "step": 519550 }, { "epoch": 5.11, "grad_norm": 8.446330070495605, "learning_rate": 2.471124152243638e-06, "loss": 0.0428, "step": 519575 }, { "epoch": 5.11, "grad_norm": 0.5983853340148926, "learning_rate": 2.4710000297893895e-06, "loss": 0.0681, "step": 519600 }, { "epoch": 5.11, "grad_norm": 13.955511093139648, "learning_rate": 2.4708759073351407e-06, "loss": 0.0465, "step": 519625 }, { "epoch": 5.11, "grad_norm": 0.4709753692150116, "learning_rate": 2.4707517848808924e-06, "loss": 0.0631, "step": 519650 }, { "epoch": 5.11, "grad_norm": 14.921905517578125, "learning_rate": 2.4706276624266436e-06, "loss": 0.0611, "step": 519675 }, { "epoch": 5.11, "grad_norm": 0.009659641422331333, "learning_rate": 2.4705035399723952e-06, "loss": 0.0824, "step": 519700 }, { "epoch": 5.11, "grad_norm": 11.743191719055176, "learning_rate": 2.470379417518147e-06, "loss": 0.0518, "step": 519725 }, { "epoch": 5.11, "grad_norm": 1.08765709400177, "learning_rate": 2.4702552950638985e-06, "loss": 0.078, "step": 519750 }, { "epoch": 5.11, "grad_norm": 13.87224292755127, "learning_rate": 2.47013117260965e-06, "loss": 0.0319, "step": 519775 }, { "epoch": 5.11, "grad_norm": 0.5061531662940979, "learning_rate": 2.4700070501554018e-06, "loss": 0.0536, "step": 519800 }, { "epoch": 5.11, "grad_norm": 9.88637924194336, "learning_rate": 2.469882927701153e-06, "loss": 0.0649, "step": 519825 }, { "epoch": 5.11, "grad_norm": 0.08082258701324463, "learning_rate": 2.4697588052469046e-06, "loss": 0.0612, "step": 519850 }, { "epoch": 5.11, "grad_norm": 13.338142395019531, "learning_rate": 2.4696346827926562e-06, "loss": 0.0529, "step": 519875 }, { "epoch": 5.11, "grad_norm": 1.5032832622528076, "learning_rate": 2.4695105603384075e-06, "loss": 0.0712, "step": 519900 }, { "epoch": 5.11, "grad_norm": 11.418819427490234, "learning_rate": 2.469386437884159e-06, "loss": 0.0454, "step": 519925 }, { "epoch": 5.11, "grad_norm": 1.7328933477401733, "learning_rate": 2.4692623154299107e-06, "loss": 0.0587, "step": 519950 }, { "epoch": 5.11, "grad_norm": 5.7106242179870605, "learning_rate": 2.4691381929756624e-06, "loss": 0.0422, "step": 519975 }, { "epoch": 5.11, "grad_norm": 1.563698410987854, "learning_rate": 2.469014070521414e-06, "loss": 0.061, "step": 520000 }, { "epoch": 5.11, "eval_loss": 0.7593790292739868, "eval_runtime": 6138.1444, "eval_samples_per_second": 1.542, "eval_steps_per_second": 0.193, "eval_wer": 0.1160754522629241, "step": 520000 }, { "epoch": 5.11, "grad_norm": 10.239516258239746, "learning_rate": 2.4688899480671656e-06, "loss": 0.0526, "step": 520025 }, { "epoch": 5.11, "grad_norm": 0.36382952332496643, "learning_rate": 2.468765825612917e-06, "loss": 0.0752, "step": 520050 }, { "epoch": 5.11, "grad_norm": 9.73759937286377, "learning_rate": 2.4686417031586685e-06, "loss": 0.0485, "step": 520075 }, { "epoch": 5.11, "grad_norm": 1.5386954545974731, "learning_rate": 2.4685175807044197e-06, "loss": 0.0652, "step": 520100 }, { "epoch": 5.11, "grad_norm": 18.88768768310547, "learning_rate": 2.4683934582501713e-06, "loss": 0.0484, "step": 520125 }, { "epoch": 5.11, "grad_norm": 0.0859910398721695, "learning_rate": 2.468269335795923e-06, "loss": 0.0658, "step": 520150 }, { "epoch": 5.11, "grad_norm": 4.5205583572387695, "learning_rate": 2.4681452133416746e-06, "loss": 0.0565, "step": 520175 }, { "epoch": 5.11, "grad_norm": 1.2536283731460571, "learning_rate": 2.4680210908874262e-06, "loss": 0.0858, "step": 520200 }, { "epoch": 5.11, "grad_norm": 9.018129348754883, "learning_rate": 2.467896968433178e-06, "loss": 0.0456, "step": 520225 }, { "epoch": 5.12, "grad_norm": 2.2264249324798584, "learning_rate": 2.467772845978929e-06, "loss": 0.0932, "step": 520250 }, { "epoch": 5.12, "grad_norm": 14.94660472869873, "learning_rate": 2.4676487235246807e-06, "loss": 0.0631, "step": 520275 }, { "epoch": 5.12, "grad_norm": 6.374142169952393, "learning_rate": 2.4675246010704323e-06, "loss": 0.0652, "step": 520300 }, { "epoch": 5.12, "grad_norm": 18.150379180908203, "learning_rate": 2.4674004786161836e-06, "loss": 0.0579, "step": 520325 }, { "epoch": 5.12, "grad_norm": 1.153442621231079, "learning_rate": 2.467276356161935e-06, "loss": 0.0605, "step": 520350 }, { "epoch": 5.12, "grad_norm": 8.891427040100098, "learning_rate": 2.467152233707687e-06, "loss": 0.0453, "step": 520375 }, { "epoch": 5.12, "grad_norm": 2.035508394241333, "learning_rate": 2.4670281112534385e-06, "loss": 0.0698, "step": 520400 }, { "epoch": 5.12, "grad_norm": 1.0595173835754395, "learning_rate": 2.46690398879919e-06, "loss": 0.0381, "step": 520425 }, { "epoch": 5.12, "grad_norm": 4.149305820465088, "learning_rate": 2.4667848312431116e-06, "loss": 0.077, "step": 520450 }, { "epoch": 5.12, "grad_norm": 10.273738861083984, "learning_rate": 2.466660708788863e-06, "loss": 0.0463, "step": 520475 }, { "epoch": 5.12, "grad_norm": 0.3620421290397644, "learning_rate": 2.4665365863346144e-06, "loss": 0.0803, "step": 520500 }, { "epoch": 5.12, "grad_norm": 4.452524662017822, "learning_rate": 2.466412463880366e-06, "loss": 0.0366, "step": 520525 }, { "epoch": 5.12, "grad_norm": 0.9101026058197021, "learning_rate": 2.4662883414261177e-06, "loss": 0.0817, "step": 520550 }, { "epoch": 5.12, "grad_norm": 27.92616844177246, "learning_rate": 2.466164218971869e-06, "loss": 0.0499, "step": 520575 }, { "epoch": 5.12, "grad_norm": 4.085776329040527, "learning_rate": 2.4660400965176205e-06, "loss": 0.0558, "step": 520600 }, { "epoch": 5.12, "grad_norm": 10.993273735046387, "learning_rate": 2.465915974063372e-06, "loss": 0.0657, "step": 520625 }, { "epoch": 5.12, "grad_norm": 0.33505117893218994, "learning_rate": 2.4657918516091238e-06, "loss": 0.0659, "step": 520650 }, { "epoch": 5.12, "grad_norm": 11.92681884765625, "learning_rate": 2.4656677291548754e-06, "loss": 0.0399, "step": 520675 }, { "epoch": 5.12, "grad_norm": 1.2804992198944092, "learning_rate": 2.4655436067006266e-06, "loss": 0.0678, "step": 520700 }, { "epoch": 5.12, "grad_norm": 15.028890609741211, "learning_rate": 2.4654194842463783e-06, "loss": 0.0498, "step": 520725 }, { "epoch": 5.12, "grad_norm": 7.304421424865723, "learning_rate": 2.46529536179213e-06, "loss": 0.0714, "step": 520750 }, { "epoch": 5.12, "grad_norm": 9.97723388671875, "learning_rate": 2.465171239337881e-06, "loss": 0.0495, "step": 520775 }, { "epoch": 5.12, "grad_norm": 4.313903331756592, "learning_rate": 2.4650471168836327e-06, "loss": 0.0609, "step": 520800 }, { "epoch": 5.12, "grad_norm": 2.9115867614746094, "learning_rate": 2.4649229944293844e-06, "loss": 0.0452, "step": 520825 }, { "epoch": 5.12, "grad_norm": 2.4366891384124756, "learning_rate": 2.464798871975136e-06, "loss": 0.074, "step": 520850 }, { "epoch": 5.12, "grad_norm": 77.96971893310547, "learning_rate": 2.4646747495208877e-06, "loss": 0.0477, "step": 520875 }, { "epoch": 5.12, "grad_norm": 2.9197845458984375, "learning_rate": 2.4645506270666393e-06, "loss": 0.0843, "step": 520900 }, { "epoch": 5.12, "grad_norm": 7.356260776519775, "learning_rate": 2.4644265046123905e-06, "loss": 0.0375, "step": 520925 }, { "epoch": 5.12, "grad_norm": 5.055701732635498, "learning_rate": 2.464302382158142e-06, "loss": 0.0663, "step": 520950 }, { "epoch": 5.12, "grad_norm": 15.429130554199219, "learning_rate": 2.4641782597038938e-06, "loss": 0.0373, "step": 520975 }, { "epoch": 5.12, "grad_norm": 2.785679578781128, "learning_rate": 2.464054137249645e-06, "loss": 0.0768, "step": 521000 }, { "epoch": 5.12, "grad_norm": 6.976553440093994, "learning_rate": 2.4639300147953966e-06, "loss": 0.039, "step": 521025 }, { "epoch": 5.12, "grad_norm": 5.075831413269043, "learning_rate": 2.4638058923411482e-06, "loss": 0.1013, "step": 521050 }, { "epoch": 5.12, "grad_norm": 7.337833404541016, "learning_rate": 2.4636817698869e-06, "loss": 0.0402, "step": 521075 }, { "epoch": 5.12, "grad_norm": 0.004983074031770229, "learning_rate": 2.4635576474326515e-06, "loss": 0.0706, "step": 521100 }, { "epoch": 5.12, "grad_norm": 8.761807441711426, "learning_rate": 2.4634335249784027e-06, "loss": 0.0491, "step": 521125 }, { "epoch": 5.12, "grad_norm": 6.355469226837158, "learning_rate": 2.4633094025241544e-06, "loss": 0.0895, "step": 521150 }, { "epoch": 5.12, "grad_norm": 16.860511779785156, "learning_rate": 2.463185280069906e-06, "loss": 0.0583, "step": 521175 }, { "epoch": 5.12, "grad_norm": 2.514785051345825, "learning_rate": 2.463061157615657e-06, "loss": 0.1134, "step": 521200 }, { "epoch": 5.12, "grad_norm": 3.4182748794555664, "learning_rate": 2.462937035161409e-06, "loss": 0.0492, "step": 521225 }, { "epoch": 5.13, "grad_norm": 4.547136306762695, "learning_rate": 2.4628129127071605e-06, "loss": 0.1016, "step": 521250 }, { "epoch": 5.13, "grad_norm": 11.067644119262695, "learning_rate": 2.462688790252912e-06, "loss": 0.0652, "step": 521275 }, { "epoch": 5.13, "grad_norm": 0.34339696168899536, "learning_rate": 2.4625646677986638e-06, "loss": 0.0777, "step": 521300 }, { "epoch": 5.13, "grad_norm": 8.584537506103516, "learning_rate": 2.4624405453444154e-06, "loss": 0.0423, "step": 521325 }, { "epoch": 5.13, "grad_norm": 1.1363999843597412, "learning_rate": 2.4623164228901666e-06, "loss": 0.0865, "step": 521350 }, { "epoch": 5.13, "grad_norm": 7.483342170715332, "learning_rate": 2.4621923004359182e-06, "loss": 0.0483, "step": 521375 }, { "epoch": 5.13, "grad_norm": 0.19363492727279663, "learning_rate": 2.46206817798167e-06, "loss": 0.0825, "step": 521400 }, { "epoch": 5.13, "grad_norm": 10.366127014160156, "learning_rate": 2.461944055527421e-06, "loss": 0.0459, "step": 521425 }, { "epoch": 5.13, "grad_norm": 1.0683097839355469, "learning_rate": 2.4618199330731727e-06, "loss": 0.0922, "step": 521450 }, { "epoch": 5.13, "grad_norm": 5.878841876983643, "learning_rate": 2.4616958106189243e-06, "loss": 0.0457, "step": 521475 }, { "epoch": 5.13, "grad_norm": 0.42494291067123413, "learning_rate": 2.461571688164676e-06, "loss": 0.1007, "step": 521500 }, { "epoch": 5.13, "grad_norm": 3.708807945251465, "learning_rate": 2.4614475657104276e-06, "loss": 0.0571, "step": 521525 }, { "epoch": 5.13, "grad_norm": 1.8084124326705933, "learning_rate": 2.461323443256179e-06, "loss": 0.1103, "step": 521550 }, { "epoch": 5.13, "grad_norm": 7.370792865753174, "learning_rate": 2.4611993208019305e-06, "loss": 0.0477, "step": 521575 }, { "epoch": 5.13, "grad_norm": 6.157598495483398, "learning_rate": 2.461075198347682e-06, "loss": 0.0841, "step": 521600 }, { "epoch": 5.13, "grad_norm": 23.457706451416016, "learning_rate": 2.4609510758934333e-06, "loss": 0.0572, "step": 521625 }, { "epoch": 5.13, "grad_norm": 1.9900680780410767, "learning_rate": 2.460826953439185e-06, "loss": 0.0615, "step": 521650 }, { "epoch": 5.13, "grad_norm": 9.78824234008789, "learning_rate": 2.4607028309849366e-06, "loss": 0.0464, "step": 521675 }, { "epoch": 5.13, "grad_norm": 3.33978271484375, "learning_rate": 2.4605787085306882e-06, "loss": 0.0722, "step": 521700 }, { "epoch": 5.13, "grad_norm": 15.37701416015625, "learning_rate": 2.46045458607644e-06, "loss": 0.0469, "step": 521725 }, { "epoch": 5.13, "grad_norm": 3.2930197715759277, "learning_rate": 2.4603304636221915e-06, "loss": 0.0792, "step": 521750 }, { "epoch": 5.13, "grad_norm": 1.7382338047027588, "learning_rate": 2.4602063411679427e-06, "loss": 0.0618, "step": 521775 }, { "epoch": 5.13, "grad_norm": 2.5218887329101562, "learning_rate": 2.4600822187136943e-06, "loss": 0.0858, "step": 521800 }, { "epoch": 5.13, "grad_norm": 1.744884729385376, "learning_rate": 2.459958096259446e-06, "loss": 0.0587, "step": 521825 }, { "epoch": 5.13, "grad_norm": 0.47060245275497437, "learning_rate": 2.4598339738051976e-06, "loss": 0.0744, "step": 521850 }, { "epoch": 5.13, "grad_norm": 11.708003997802734, "learning_rate": 2.4597098513509492e-06, "loss": 0.0573, "step": 521875 }, { "epoch": 5.13, "grad_norm": 1.853191614151001, "learning_rate": 2.459585728896701e-06, "loss": 0.0891, "step": 521900 }, { "epoch": 5.13, "grad_norm": 1.4133381843566895, "learning_rate": 2.459461606442452e-06, "loss": 0.0565, "step": 521925 }, { "epoch": 5.13, "grad_norm": 0.204982191324234, "learning_rate": 2.4593374839882037e-06, "loss": 0.063, "step": 521950 }, { "epoch": 5.13, "grad_norm": 13.998564720153809, "learning_rate": 2.459213361533955e-06, "loss": 0.0644, "step": 521975 }, { "epoch": 5.13, "grad_norm": 0.16619956493377686, "learning_rate": 2.4590892390797066e-06, "loss": 0.1095, "step": 522000 }, { "epoch": 5.13, "grad_norm": 11.80838394165039, "learning_rate": 2.458965116625458e-06, "loss": 0.0466, "step": 522025 }, { "epoch": 5.13, "grad_norm": 5.497393608093262, "learning_rate": 2.45884099417121e-06, "loss": 0.0619, "step": 522050 }, { "epoch": 5.13, "grad_norm": 10.685772895812988, "learning_rate": 2.4587168717169615e-06, "loss": 0.0485, "step": 522075 }, { "epoch": 5.13, "grad_norm": 0.4518061876296997, "learning_rate": 2.458592749262713e-06, "loss": 0.064, "step": 522100 }, { "epoch": 5.13, "grad_norm": 17.400239944458008, "learning_rate": 2.4584686268084643e-06, "loss": 0.0689, "step": 522125 }, { "epoch": 5.13, "grad_norm": 1.4064593315124512, "learning_rate": 2.458344504354216e-06, "loss": 0.0674, "step": 522150 }, { "epoch": 5.13, "grad_norm": 17.59259605407715, "learning_rate": 2.4582203818999676e-06, "loss": 0.045, "step": 522175 }, { "epoch": 5.13, "grad_norm": 0.058920763432979584, "learning_rate": 2.458096259445719e-06, "loss": 0.0736, "step": 522200 }, { "epoch": 5.13, "grad_norm": 4.317403316497803, "learning_rate": 2.4579721369914704e-06, "loss": 0.0385, "step": 522225 }, { "epoch": 5.13, "grad_norm": 1.3374738693237305, "learning_rate": 2.457848014537222e-06, "loss": 0.0639, "step": 522250 }, { "epoch": 5.14, "grad_norm": 11.538135528564453, "learning_rate": 2.4577238920829737e-06, "loss": 0.0466, "step": 522275 }, { "epoch": 5.14, "grad_norm": 0.13762596249580383, "learning_rate": 2.4575997696287253e-06, "loss": 0.0673, "step": 522300 }, { "epoch": 5.14, "grad_norm": 12.291046142578125, "learning_rate": 2.457475647174477e-06, "loss": 0.0528, "step": 522325 }, { "epoch": 5.14, "grad_norm": 1.1798901557922363, "learning_rate": 2.457351524720228e-06, "loss": 0.0839, "step": 522350 }, { "epoch": 5.14, "grad_norm": 6.308773517608643, "learning_rate": 2.45722740226598e-06, "loss": 0.0352, "step": 522375 }, { "epoch": 5.14, "grad_norm": 10.497956275939941, "learning_rate": 2.457103279811731e-06, "loss": 0.0615, "step": 522400 }, { "epoch": 5.14, "grad_norm": 10.940805435180664, "learning_rate": 2.4569791573574827e-06, "loss": 0.0557, "step": 522425 }, { "epoch": 5.14, "grad_norm": 1.2375919818878174, "learning_rate": 2.4568550349032343e-06, "loss": 0.0778, "step": 522450 }, { "epoch": 5.14, "grad_norm": 10.08373737335205, "learning_rate": 2.456730912448986e-06, "loss": 0.0521, "step": 522475 }, { "epoch": 5.14, "grad_norm": 3.1366827487945557, "learning_rate": 2.4566117548929074e-06, "loss": 0.0789, "step": 522500 }, { "epoch": 5.14, "grad_norm": 13.929924964904785, "learning_rate": 2.456487632438659e-06, "loss": 0.0526, "step": 522525 }, { "epoch": 5.14, "grad_norm": 0.4577993154525757, "learning_rate": 2.4563635099844107e-06, "loss": 0.1008, "step": 522550 }, { "epoch": 5.14, "grad_norm": 17.56688690185547, "learning_rate": 2.456239387530162e-06, "loss": 0.0464, "step": 522575 }, { "epoch": 5.14, "grad_norm": 6.841202735900879, "learning_rate": 2.4561152650759135e-06, "loss": 0.0534, "step": 522600 }, { "epoch": 5.14, "grad_norm": 5.028695583343506, "learning_rate": 2.455991142621665e-06, "loss": 0.0441, "step": 522625 }, { "epoch": 5.14, "grad_norm": 0.1528003215789795, "learning_rate": 2.4558670201674164e-06, "loss": 0.0752, "step": 522650 }, { "epoch": 5.14, "grad_norm": 5.287869453430176, "learning_rate": 2.455742897713168e-06, "loss": 0.0451, "step": 522675 }, { "epoch": 5.14, "grad_norm": 21.51874351501465, "learning_rate": 2.4556187752589196e-06, "loss": 0.0941, "step": 522700 }, { "epoch": 5.14, "grad_norm": 12.461019515991211, "learning_rate": 2.4554946528046713e-06, "loss": 0.0488, "step": 522725 }, { "epoch": 5.14, "grad_norm": 0.17545250058174133, "learning_rate": 2.455370530350423e-06, "loss": 0.0863, "step": 522750 }, { "epoch": 5.14, "grad_norm": 9.74669075012207, "learning_rate": 2.4552464078961745e-06, "loss": 0.0489, "step": 522775 }, { "epoch": 5.14, "grad_norm": 3.300936222076416, "learning_rate": 2.4551222854419257e-06, "loss": 0.0889, "step": 522800 }, { "epoch": 5.14, "grad_norm": 7.541268348693848, "learning_rate": 2.4549981629876774e-06, "loss": 0.032, "step": 522825 }, { "epoch": 5.14, "grad_norm": 2.7156474590301514, "learning_rate": 2.454874040533429e-06, "loss": 0.0957, "step": 522850 }, { "epoch": 5.14, "grad_norm": 8.688248634338379, "learning_rate": 2.4547499180791802e-06, "loss": 0.0441, "step": 522875 }, { "epoch": 5.14, "grad_norm": 1.3672878742218018, "learning_rate": 2.454625795624932e-06, "loss": 0.0968, "step": 522900 }, { "epoch": 5.14, "grad_norm": 12.085041999816895, "learning_rate": 2.4545016731706835e-06, "loss": 0.0513, "step": 522925 }, { "epoch": 5.14, "grad_norm": 3.1803767681121826, "learning_rate": 2.454377550716435e-06, "loss": 0.0909, "step": 522950 }, { "epoch": 5.14, "grad_norm": 12.680217742919922, "learning_rate": 2.4542534282621868e-06, "loss": 0.0543, "step": 522975 }, { "epoch": 5.14, "grad_norm": 5.566616535186768, "learning_rate": 2.454129305807938e-06, "loss": 0.067, "step": 523000 }, { "epoch": 5.14, "grad_norm": 7.728738784790039, "learning_rate": 2.4540051833536896e-06, "loss": 0.0399, "step": 523025 }, { "epoch": 5.14, "grad_norm": 1.3938206434249878, "learning_rate": 2.4538810608994412e-06, "loss": 0.069, "step": 523050 }, { "epoch": 5.14, "grad_norm": 7.915738105773926, "learning_rate": 2.4537569384451925e-06, "loss": 0.0572, "step": 523075 }, { "epoch": 5.14, "grad_norm": 0.06056895852088928, "learning_rate": 2.453632815990944e-06, "loss": 0.0668, "step": 523100 }, { "epoch": 5.14, "grad_norm": 6.176711559295654, "learning_rate": 2.4535086935366957e-06, "loss": 0.0508, "step": 523125 }, { "epoch": 5.14, "grad_norm": 0.5771670937538147, "learning_rate": 2.4533845710824474e-06, "loss": 0.0741, "step": 523150 }, { "epoch": 5.14, "grad_norm": 6.341701507568359, "learning_rate": 2.453260448628199e-06, "loss": 0.0507, "step": 523175 }, { "epoch": 5.14, "grad_norm": 0.760029137134552, "learning_rate": 2.4531363261739506e-06, "loss": 0.0774, "step": 523200 }, { "epoch": 5.14, "grad_norm": 10.94728946685791, "learning_rate": 2.453012203719702e-06, "loss": 0.0449, "step": 523225 }, { "epoch": 5.14, "grad_norm": 0.05220336839556694, "learning_rate": 2.4528880812654535e-06, "loss": 0.0654, "step": 523250 }, { "epoch": 5.14, "grad_norm": 12.930423736572266, "learning_rate": 2.452763958811205e-06, "loss": 0.0432, "step": 523275 }, { "epoch": 5.15, "grad_norm": 5.540925979614258, "learning_rate": 2.4526398363569563e-06, "loss": 0.099, "step": 523300 }, { "epoch": 5.15, "grad_norm": 12.528953552246094, "learning_rate": 2.452515713902708e-06, "loss": 0.0554, "step": 523325 }, { "epoch": 5.15, "grad_norm": 1.1636648178100586, "learning_rate": 2.4523915914484596e-06, "loss": 0.0529, "step": 523350 }, { "epoch": 5.15, "grad_norm": 14.702483177185059, "learning_rate": 2.4522674689942112e-06, "loss": 0.0556, "step": 523375 }, { "epoch": 5.15, "grad_norm": 3.6150598526000977, "learning_rate": 2.452143346539963e-06, "loss": 0.0693, "step": 523400 }, { "epoch": 5.15, "grad_norm": 7.770029067993164, "learning_rate": 2.452019224085714e-06, "loss": 0.0518, "step": 523425 }, { "epoch": 5.15, "grad_norm": 0.6774001121520996, "learning_rate": 2.4518951016314657e-06, "loss": 0.087, "step": 523450 }, { "epoch": 5.15, "grad_norm": 13.640196800231934, "learning_rate": 2.4517709791772173e-06, "loss": 0.048, "step": 523475 }, { "epoch": 5.15, "grad_norm": 4.109159469604492, "learning_rate": 2.4516468567229686e-06, "loss": 0.0774, "step": 523500 }, { "epoch": 5.15, "grad_norm": 4.293295383453369, "learning_rate": 2.45152273426872e-06, "loss": 0.053, "step": 523525 }, { "epoch": 5.15, "grad_norm": 1.2996090650558472, "learning_rate": 2.451398611814472e-06, "loss": 0.0878, "step": 523550 }, { "epoch": 5.15, "grad_norm": 16.499435424804688, "learning_rate": 2.4512744893602235e-06, "loss": 0.0491, "step": 523575 }, { "epoch": 5.15, "grad_norm": 5.415855884552002, "learning_rate": 2.451150366905975e-06, "loss": 0.0889, "step": 523600 }, { "epoch": 5.15, "grad_norm": 14.007580757141113, "learning_rate": 2.4510262444517267e-06, "loss": 0.0534, "step": 523625 }, { "epoch": 5.15, "grad_norm": 0.1993461698293686, "learning_rate": 2.450902121997478e-06, "loss": 0.0598, "step": 523650 }, { "epoch": 5.15, "grad_norm": 4.856870174407959, "learning_rate": 2.4507779995432296e-06, "loss": 0.0615, "step": 523675 }, { "epoch": 5.15, "grad_norm": 0.24684232473373413, "learning_rate": 2.450653877088981e-06, "loss": 0.1017, "step": 523700 }, { "epoch": 5.15, "grad_norm": 18.137165069580078, "learning_rate": 2.4505297546347324e-06, "loss": 0.0466, "step": 523725 }, { "epoch": 5.15, "grad_norm": 7.044427871704102, "learning_rate": 2.450405632180484e-06, "loss": 0.0882, "step": 523750 }, { "epoch": 5.15, "grad_norm": 9.748374938964844, "learning_rate": 2.4502815097262357e-06, "loss": 0.0475, "step": 523775 }, { "epoch": 5.15, "grad_norm": 0.2785995304584503, "learning_rate": 2.4501573872719873e-06, "loss": 0.0895, "step": 523800 }, { "epoch": 5.15, "grad_norm": 5.475216388702393, "learning_rate": 2.450033264817739e-06, "loss": 0.0596, "step": 523825 }, { "epoch": 5.15, "grad_norm": 0.10673591494560242, "learning_rate": 2.44990914236349e-06, "loss": 0.0884, "step": 523850 }, { "epoch": 5.15, "grad_norm": 11.84589958190918, "learning_rate": 2.449785019909242e-06, "loss": 0.0668, "step": 523875 }, { "epoch": 5.15, "grad_norm": 6.092864990234375, "learning_rate": 2.4496608974549934e-06, "loss": 0.0889, "step": 523900 }, { "epoch": 5.15, "grad_norm": 9.13327407836914, "learning_rate": 2.4495367750007447e-06, "loss": 0.0615, "step": 523925 }, { "epoch": 5.15, "grad_norm": 6.591254234313965, "learning_rate": 2.4494126525464963e-06, "loss": 0.1117, "step": 523950 }, { "epoch": 5.15, "grad_norm": 13.98011302947998, "learning_rate": 2.449288530092248e-06, "loss": 0.059, "step": 523975 }, { "epoch": 5.15, "grad_norm": 0.005536175798624754, "learning_rate": 2.4491644076379996e-06, "loss": 0.0576, "step": 524000 }, { "epoch": 5.15, "grad_norm": 6.459385871887207, "learning_rate": 2.449040285183751e-06, "loss": 0.06, "step": 524025 }, { "epoch": 5.15, "grad_norm": 11.906083106994629, "learning_rate": 2.448916162729503e-06, "loss": 0.0865, "step": 524050 }, { "epoch": 5.15, "grad_norm": 5.65266227722168, "learning_rate": 2.448792040275254e-06, "loss": 0.0472, "step": 524075 }, { "epoch": 5.15, "grad_norm": 0.25393006205558777, "learning_rate": 2.4486679178210057e-06, "loss": 0.0999, "step": 524100 }, { "epoch": 5.15, "grad_norm": 6.0496697425842285, "learning_rate": 2.4485437953667573e-06, "loss": 0.0528, "step": 524125 }, { "epoch": 5.15, "grad_norm": 0.8067338466644287, "learning_rate": 2.4484196729125085e-06, "loss": 0.0831, "step": 524150 }, { "epoch": 5.15, "grad_norm": 5.941356658935547, "learning_rate": 2.44829555045826e-06, "loss": 0.0442, "step": 524175 }, { "epoch": 5.15, "grad_norm": 0.22523576021194458, "learning_rate": 2.448171428004012e-06, "loss": 0.0948, "step": 524200 }, { "epoch": 5.15, "grad_norm": 18.88739013671875, "learning_rate": 2.4480473055497634e-06, "loss": 0.0544, "step": 524225 }, { "epoch": 5.15, "grad_norm": 1.9187333583831787, "learning_rate": 2.447923183095515e-06, "loss": 0.1015, "step": 524250 }, { "epoch": 5.15, "grad_norm": 10.238045692443848, "learning_rate": 2.4477990606412663e-06, "loss": 0.0465, "step": 524275 }, { "epoch": 5.16, "grad_norm": 0.015373733825981617, "learning_rate": 2.447674938187018e-06, "loss": 0.0966, "step": 524300 }, { "epoch": 5.16, "grad_norm": 3.8742055892944336, "learning_rate": 2.4475508157327695e-06, "loss": 0.0578, "step": 524325 }, { "epoch": 5.16, "grad_norm": 1.20648992061615, "learning_rate": 2.4474266932785208e-06, "loss": 0.0842, "step": 524350 }, { "epoch": 5.16, "grad_norm": 12.761552810668945, "learning_rate": 2.4473025708242724e-06, "loss": 0.0514, "step": 524375 }, { "epoch": 5.16, "grad_norm": 0.0031980520579963923, "learning_rate": 2.447178448370024e-06, "loss": 0.0745, "step": 524400 }, { "epoch": 5.16, "grad_norm": 9.282246589660645, "learning_rate": 2.4470543259157757e-06, "loss": 0.0427, "step": 524425 }, { "epoch": 5.16, "grad_norm": 0.8506672978401184, "learning_rate": 2.4469302034615273e-06, "loss": 0.0701, "step": 524450 }, { "epoch": 5.16, "grad_norm": 8.967475891113281, "learning_rate": 2.446806081007279e-06, "loss": 0.0358, "step": 524475 }, { "epoch": 5.16, "grad_norm": 0.9372380971908569, "learning_rate": 2.44668195855303e-06, "loss": 0.0571, "step": 524500 }, { "epoch": 5.16, "grad_norm": 8.919699668884277, "learning_rate": 2.4465578360987818e-06, "loss": 0.0507, "step": 524525 }, { "epoch": 5.16, "grad_norm": 0.020481614395976067, "learning_rate": 2.4464337136445334e-06, "loss": 0.0624, "step": 524550 }, { "epoch": 5.16, "grad_norm": 10.645170211791992, "learning_rate": 2.4463095911902846e-06, "loss": 0.0571, "step": 524575 }, { "epoch": 5.16, "grad_norm": 9.329065322875977, "learning_rate": 2.4461854687360363e-06, "loss": 0.0938, "step": 524600 }, { "epoch": 5.16, "grad_norm": 9.463136672973633, "learning_rate": 2.446061346281788e-06, "loss": 0.0426, "step": 524625 }, { "epoch": 5.16, "grad_norm": 7.873040676116943, "learning_rate": 2.4459372238275395e-06, "loss": 0.083, "step": 524650 }, { "epoch": 5.16, "grad_norm": 8.36269474029541, "learning_rate": 2.445813101373291e-06, "loss": 0.0584, "step": 524675 }, { "epoch": 5.16, "grad_norm": 4.5363311767578125, "learning_rate": 2.4456889789190424e-06, "loss": 0.0863, "step": 524700 }, { "epoch": 5.16, "grad_norm": 3.0585923194885254, "learning_rate": 2.445564856464794e-06, "loss": 0.0424, "step": 524725 }, { "epoch": 5.16, "grad_norm": 1.9185116291046143, "learning_rate": 2.4454407340105456e-06, "loss": 0.0867, "step": 524750 }, { "epoch": 5.16, "grad_norm": 5.10901403427124, "learning_rate": 2.4453166115562973e-06, "loss": 0.0522, "step": 524775 }, { "epoch": 5.16, "grad_norm": 0.3875865340232849, "learning_rate": 2.445192489102049e-06, "loss": 0.0748, "step": 524800 }, { "epoch": 5.16, "grad_norm": 12.108064651489258, "learning_rate": 2.4450683666478005e-06, "loss": 0.0605, "step": 524825 }, { "epoch": 5.16, "grad_norm": 20.681175231933594, "learning_rate": 2.4449442441935518e-06, "loss": 0.065, "step": 524850 }, { "epoch": 5.16, "grad_norm": 5.223692417144775, "learning_rate": 2.4448201217393034e-06, "loss": 0.0629, "step": 524875 }, { "epoch": 5.16, "grad_norm": 3.573147773742676, "learning_rate": 2.444695999285055e-06, "loss": 0.0719, "step": 524900 }, { "epoch": 5.16, "grad_norm": 11.484822273254395, "learning_rate": 2.4445718768308062e-06, "loss": 0.0412, "step": 524925 }, { "epoch": 5.16, "grad_norm": 2.6629505157470703, "learning_rate": 2.444447754376558e-06, "loss": 0.0608, "step": 524950 }, { "epoch": 5.16, "grad_norm": 12.772400856018066, "learning_rate": 2.4443236319223095e-06, "loss": 0.0646, "step": 524975 }, { "epoch": 5.16, "grad_norm": 0.15301962196826935, "learning_rate": 2.444199509468061e-06, "loss": 0.0739, "step": 525000 }, { "epoch": 5.16, "grad_norm": 9.251249313354492, "learning_rate": 2.4440753870138128e-06, "loss": 0.0453, "step": 525025 }, { "epoch": 5.16, "grad_norm": 3.0531914234161377, "learning_rate": 2.4439512645595644e-06, "loss": 0.0858, "step": 525050 }, { "epoch": 5.16, "grad_norm": 10.287836074829102, "learning_rate": 2.4438271421053156e-06, "loss": 0.0626, "step": 525075 }, { "epoch": 5.16, "grad_norm": 0.5079699754714966, "learning_rate": 2.4437030196510673e-06, "loss": 0.0857, "step": 525100 }, { "epoch": 5.16, "grad_norm": 5.582519054412842, "learning_rate": 2.4435788971968185e-06, "loss": 0.063, "step": 525125 }, { "epoch": 5.16, "grad_norm": 2.7133052349090576, "learning_rate": 2.44345477474257e-06, "loss": 0.1002, "step": 525150 }, { "epoch": 5.16, "grad_norm": 13.184591293334961, "learning_rate": 2.4433306522883217e-06, "loss": 0.0636, "step": 525175 }, { "epoch": 5.16, "grad_norm": 0.0633002370595932, "learning_rate": 2.443211494732243e-06, "loss": 0.0856, "step": 525200 }, { "epoch": 5.16, "grad_norm": 6.136517524719238, "learning_rate": 2.443087372277995e-06, "loss": 0.0588, "step": 525225 }, { "epoch": 5.16, "grad_norm": 0.4820614159107208, "learning_rate": 2.4429632498237465e-06, "loss": 0.0711, "step": 525250 }, { "epoch": 5.16, "grad_norm": 18.152204513549805, "learning_rate": 2.442839127369498e-06, "loss": 0.0394, "step": 525275 }, { "epoch": 5.16, "grad_norm": 6.778841495513916, "learning_rate": 2.4427150049152493e-06, "loss": 0.1024, "step": 525300 }, { "epoch": 5.17, "grad_norm": 10.834820747375488, "learning_rate": 2.442590882461001e-06, "loss": 0.0606, "step": 525325 }, { "epoch": 5.17, "grad_norm": 1.8702963590621948, "learning_rate": 2.4424667600067526e-06, "loss": 0.0625, "step": 525350 }, { "epoch": 5.17, "grad_norm": 7.4666619300842285, "learning_rate": 2.442342637552504e-06, "loss": 0.0596, "step": 525375 }, { "epoch": 5.17, "grad_norm": 2.739696502685547, "learning_rate": 2.4422185150982554e-06, "loss": 0.0862, "step": 525400 }, { "epoch": 5.17, "grad_norm": 17.6202335357666, "learning_rate": 2.442094392644007e-06, "loss": 0.0794, "step": 525425 }, { "epoch": 5.17, "grad_norm": 2.725431203842163, "learning_rate": 2.4419702701897587e-06, "loss": 0.0771, "step": 525450 }, { "epoch": 5.17, "grad_norm": 5.026076316833496, "learning_rate": 2.4418461477355103e-06, "loss": 0.0574, "step": 525475 }, { "epoch": 5.17, "grad_norm": 3.0528995990753174, "learning_rate": 2.441722025281262e-06, "loss": 0.0909, "step": 525500 }, { "epoch": 5.17, "grad_norm": 9.725342750549316, "learning_rate": 2.441597902827013e-06, "loss": 0.0529, "step": 525525 }, { "epoch": 5.17, "grad_norm": 0.36454805731773376, "learning_rate": 2.441473780372765e-06, "loss": 0.0712, "step": 525550 }, { "epoch": 5.17, "grad_norm": 69.84020233154297, "learning_rate": 2.4413496579185164e-06, "loss": 0.055, "step": 525575 }, { "epoch": 5.17, "grad_norm": 2.0830202102661133, "learning_rate": 2.4412255354642677e-06, "loss": 0.0598, "step": 525600 }, { "epoch": 5.17, "grad_norm": 12.041024208068848, "learning_rate": 2.4411014130100193e-06, "loss": 0.0334, "step": 525625 }, { "epoch": 5.17, "grad_norm": 0.6784700751304626, "learning_rate": 2.440977290555771e-06, "loss": 0.1119, "step": 525650 }, { "epoch": 5.17, "grad_norm": 5.985408306121826, "learning_rate": 2.4408531681015226e-06, "loss": 0.0566, "step": 525675 }, { "epoch": 5.17, "grad_norm": 2.7077269554138184, "learning_rate": 2.440729045647274e-06, "loss": 0.0877, "step": 525700 }, { "epoch": 5.17, "grad_norm": 43.1369743347168, "learning_rate": 2.4406049231930254e-06, "loss": 0.0358, "step": 525725 }, { "epoch": 5.17, "grad_norm": 1.5217002630233765, "learning_rate": 2.440480800738777e-06, "loss": 0.1056, "step": 525750 }, { "epoch": 5.17, "grad_norm": 13.20661449432373, "learning_rate": 2.4403566782845287e-06, "loss": 0.0596, "step": 525775 }, { "epoch": 5.17, "grad_norm": 0.12049586325883865, "learning_rate": 2.44023255583028e-06, "loss": 0.0753, "step": 525800 }, { "epoch": 5.17, "grad_norm": 8.569293022155762, "learning_rate": 2.4401084333760315e-06, "loss": 0.0393, "step": 525825 }, { "epoch": 5.17, "grad_norm": 1.6834415197372437, "learning_rate": 2.439984310921783e-06, "loss": 0.0928, "step": 525850 }, { "epoch": 5.17, "grad_norm": 2.273003101348877, "learning_rate": 2.439860188467535e-06, "loss": 0.0439, "step": 525875 }, { "epoch": 5.17, "grad_norm": 0.11962537467479706, "learning_rate": 2.4397360660132864e-06, "loss": 0.0711, "step": 525900 }, { "epoch": 5.17, "grad_norm": 18.2356014251709, "learning_rate": 2.439611943559038e-06, "loss": 0.0394, "step": 525925 }, { "epoch": 5.17, "grad_norm": 0.348617821931839, "learning_rate": 2.4394878211047893e-06, "loss": 0.1055, "step": 525950 }, { "epoch": 5.17, "grad_norm": 12.509876251220703, "learning_rate": 2.439363698650541e-06, "loss": 0.0565, "step": 525975 }, { "epoch": 5.17, "grad_norm": 15.959589004516602, "learning_rate": 2.4392395761962925e-06, "loss": 0.081, "step": 526000 }, { "epoch": 5.17, "grad_norm": 15.763348579406738, "learning_rate": 2.4391154537420438e-06, "loss": 0.0495, "step": 526025 }, { "epoch": 5.17, "grad_norm": 2.010568380355835, "learning_rate": 2.4389913312877954e-06, "loss": 0.0648, "step": 526050 }, { "epoch": 5.17, "grad_norm": 19.53766441345215, "learning_rate": 2.438867208833547e-06, "loss": 0.0666, "step": 526075 }, { "epoch": 5.17, "grad_norm": 4.261346817016602, "learning_rate": 2.4387430863792987e-06, "loss": 0.0765, "step": 526100 }, { "epoch": 5.17, "grad_norm": 17.79854965209961, "learning_rate": 2.4386189639250503e-06, "loss": 0.0706, "step": 526125 }, { "epoch": 5.17, "grad_norm": 8.59843921661377, "learning_rate": 2.4384948414708015e-06, "loss": 0.0795, "step": 526150 }, { "epoch": 5.17, "grad_norm": 8.11406135559082, "learning_rate": 2.438370719016553e-06, "loss": 0.0448, "step": 526175 }, { "epoch": 5.17, "grad_norm": 1.3759347200393677, "learning_rate": 2.4382465965623048e-06, "loss": 0.0804, "step": 526200 }, { "epoch": 5.17, "grad_norm": 22.80472755432129, "learning_rate": 2.438122474108056e-06, "loss": 0.0561, "step": 526225 }, { "epoch": 5.17, "grad_norm": 0.17981688678264618, "learning_rate": 2.4379983516538076e-06, "loss": 0.072, "step": 526250 }, { "epoch": 5.17, "grad_norm": 9.629228591918945, "learning_rate": 2.4378742291995593e-06, "loss": 0.0419, "step": 526275 }, { "epoch": 5.17, "grad_norm": 8.09936237335205, "learning_rate": 2.437750106745311e-06, "loss": 0.092, "step": 526300 }, { "epoch": 5.17, "grad_norm": 14.883777618408203, "learning_rate": 2.4376259842910625e-06, "loss": 0.0418, "step": 526325 }, { "epoch": 5.18, "grad_norm": 0.7379246354103088, "learning_rate": 2.437501861836814e-06, "loss": 0.1182, "step": 526350 }, { "epoch": 5.18, "grad_norm": 6.893866539001465, "learning_rate": 2.4373777393825654e-06, "loss": 0.0492, "step": 526375 }, { "epoch": 5.18, "grad_norm": 2.2401294708251953, "learning_rate": 2.437253616928317e-06, "loss": 0.083, "step": 526400 }, { "epoch": 5.18, "grad_norm": 8.23737907409668, "learning_rate": 2.4371294944740686e-06, "loss": 0.0537, "step": 526425 }, { "epoch": 5.18, "grad_norm": 3.41204833984375, "learning_rate": 2.43700537201982e-06, "loss": 0.0822, "step": 526450 }, { "epoch": 5.18, "grad_norm": 2.087203025817871, "learning_rate": 2.4368812495655715e-06, "loss": 0.0316, "step": 526475 }, { "epoch": 5.18, "grad_norm": 0.8317099213600159, "learning_rate": 2.436757127111323e-06, "loss": 0.0966, "step": 526500 }, { "epoch": 5.18, "grad_norm": 9.507376670837402, "learning_rate": 2.4366330046570748e-06, "loss": 0.0369, "step": 526525 }, { "epoch": 5.18, "grad_norm": 2.96781325340271, "learning_rate": 2.4365088822028264e-06, "loss": 0.1174, "step": 526550 }, { "epoch": 5.18, "grad_norm": 11.636631965637207, "learning_rate": 2.4363847597485776e-06, "loss": 0.0516, "step": 526575 }, { "epoch": 5.18, "grad_norm": 0.3765096068382263, "learning_rate": 2.4362606372943292e-06, "loss": 0.0923, "step": 526600 }, { "epoch": 5.18, "grad_norm": 7.125023365020752, "learning_rate": 2.436136514840081e-06, "loss": 0.048, "step": 526625 }, { "epoch": 5.18, "grad_norm": 2.7592480182647705, "learning_rate": 2.436012392385832e-06, "loss": 0.0951, "step": 526650 }, { "epoch": 5.18, "grad_norm": 16.76683235168457, "learning_rate": 2.4358882699315837e-06, "loss": 0.0489, "step": 526675 }, { "epoch": 5.18, "grad_norm": 1.7610434293746948, "learning_rate": 2.4357641474773354e-06, "loss": 0.0685, "step": 526700 }, { "epoch": 5.18, "grad_norm": 7.358968257904053, "learning_rate": 2.435640025023087e-06, "loss": 0.0376, "step": 526725 }, { "epoch": 5.18, "grad_norm": 2.3068037033081055, "learning_rate": 2.4355159025688386e-06, "loss": 0.0787, "step": 526750 }, { "epoch": 5.18, "grad_norm": 8.488572120666504, "learning_rate": 2.4353917801145903e-06, "loss": 0.0571, "step": 526775 }, { "epoch": 5.18, "grad_norm": 2.1914823055267334, "learning_rate": 2.4352676576603415e-06, "loss": 0.0856, "step": 526800 }, { "epoch": 5.18, "grad_norm": 5.149812698364258, "learning_rate": 2.435143535206093e-06, "loss": 0.0484, "step": 526825 }, { "epoch": 5.18, "grad_norm": 5.0427165031433105, "learning_rate": 2.4350194127518447e-06, "loss": 0.0766, "step": 526850 }, { "epoch": 5.18, "grad_norm": 10.125112533569336, "learning_rate": 2.434895290297596e-06, "loss": 0.0342, "step": 526875 }, { "epoch": 5.18, "grad_norm": 6.433354377746582, "learning_rate": 2.4347711678433476e-06, "loss": 0.0564, "step": 526900 }, { "epoch": 5.18, "grad_norm": 8.178686141967773, "learning_rate": 2.4346470453890992e-06, "loss": 0.0498, "step": 526925 }, { "epoch": 5.18, "grad_norm": 0.07319490611553192, "learning_rate": 2.434522922934851e-06, "loss": 0.087, "step": 526950 }, { "epoch": 5.18, "grad_norm": 6.243655681610107, "learning_rate": 2.4343988004806025e-06, "loss": 0.0521, "step": 526975 }, { "epoch": 5.18, "grad_norm": 1.3975814580917358, "learning_rate": 2.4342746780263537e-06, "loss": 0.0744, "step": 527000 }, { "epoch": 5.18, "grad_norm": 19.667917251586914, "learning_rate": 2.4341505555721053e-06, "loss": 0.0396, "step": 527025 }, { "epoch": 5.18, "grad_norm": 5.618308067321777, "learning_rate": 2.434026433117857e-06, "loss": 0.1128, "step": 527050 }, { "epoch": 5.18, "grad_norm": 11.838805198669434, "learning_rate": 2.433902310663608e-06, "loss": 0.0564, "step": 527075 }, { "epoch": 5.18, "grad_norm": 6.210629463195801, "learning_rate": 2.43377818820936e-06, "loss": 0.0969, "step": 527100 }, { "epoch": 5.18, "grad_norm": 14.79428482055664, "learning_rate": 2.4336540657551115e-06, "loss": 0.0402, "step": 527125 }, { "epoch": 5.18, "grad_norm": 5.236191272735596, "learning_rate": 2.433529943300863e-06, "loss": 0.1069, "step": 527150 }, { "epoch": 5.18, "grad_norm": 7.588738918304443, "learning_rate": 2.4334058208466147e-06, "loss": 0.0566, "step": 527175 }, { "epoch": 5.18, "grad_norm": 0.08398056775331497, "learning_rate": 2.4332816983923664e-06, "loss": 0.0913, "step": 527200 }, { "epoch": 5.18, "grad_norm": 8.397589683532715, "learning_rate": 2.4331575759381176e-06, "loss": 0.0226, "step": 527225 }, { "epoch": 5.18, "grad_norm": 2.129183053970337, "learning_rate": 2.4330334534838692e-06, "loss": 0.0687, "step": 527250 }, { "epoch": 5.18, "grad_norm": 13.037361145019531, "learning_rate": 2.432909331029621e-06, "loss": 0.0653, "step": 527275 }, { "epoch": 5.18, "grad_norm": 0.6804469227790833, "learning_rate": 2.432785208575372e-06, "loss": 0.0547, "step": 527300 }, { "epoch": 5.18, "grad_norm": 4.259753704071045, "learning_rate": 2.4326610861211237e-06, "loss": 0.0461, "step": 527325 }, { "epoch": 5.18, "grad_norm": 7.896701335906982, "learning_rate": 2.4325369636668753e-06, "loss": 0.0779, "step": 527350 }, { "epoch": 5.19, "grad_norm": 11.274126052856445, "learning_rate": 2.432412841212627e-06, "loss": 0.0623, "step": 527375 }, { "epoch": 5.19, "grad_norm": 0.5270224213600159, "learning_rate": 2.4322887187583786e-06, "loss": 0.0997, "step": 527400 }, { "epoch": 5.19, "grad_norm": 3.29313063621521, "learning_rate": 2.43216459630413e-06, "loss": 0.031, "step": 527425 }, { "epoch": 5.19, "grad_norm": 0.0034714837092906237, "learning_rate": 2.4320404738498814e-06, "loss": 0.0664, "step": 527450 }, { "epoch": 5.19, "grad_norm": 16.65854263305664, "learning_rate": 2.431916351395633e-06, "loss": 0.0395, "step": 527475 }, { "epoch": 5.19, "grad_norm": 18.277475357055664, "learning_rate": 2.4317922289413843e-06, "loss": 0.0928, "step": 527500 }, { "epoch": 5.19, "grad_norm": 13.377742767333984, "learning_rate": 2.431668106487136e-06, "loss": 0.0494, "step": 527525 }, { "epoch": 5.19, "grad_norm": 3.3291120529174805, "learning_rate": 2.4315439840328876e-06, "loss": 0.0628, "step": 527550 }, { "epoch": 5.19, "grad_norm": 10.621475219726562, "learning_rate": 2.431419861578639e-06, "loss": 0.041, "step": 527575 }, { "epoch": 5.19, "grad_norm": 8.821765899658203, "learning_rate": 2.431295739124391e-06, "loss": 0.1021, "step": 527600 }, { "epoch": 5.19, "grad_norm": 10.587851524353027, "learning_rate": 2.4311716166701425e-06, "loss": 0.0467, "step": 527625 }, { "epoch": 5.19, "grad_norm": 3.879650592803955, "learning_rate": 2.4310474942158937e-06, "loss": 0.0812, "step": 527650 }, { "epoch": 5.19, "grad_norm": 9.167694091796875, "learning_rate": 2.4309233717616453e-06, "loss": 0.0492, "step": 527675 }, { "epoch": 5.19, "grad_norm": 1.3373510837554932, "learning_rate": 2.430799249307397e-06, "loss": 0.0833, "step": 527700 }, { "epoch": 5.19, "grad_norm": 7.733062744140625, "learning_rate": 2.4306751268531486e-06, "loss": 0.0514, "step": 527725 }, { "epoch": 5.19, "grad_norm": 1.2905144691467285, "learning_rate": 2.4305510043989002e-06, "loss": 0.0789, "step": 527750 }, { "epoch": 5.19, "grad_norm": 10.344773292541504, "learning_rate": 2.430426881944652e-06, "loss": 0.0452, "step": 527775 }, { "epoch": 5.19, "grad_norm": 8.474408149719238, "learning_rate": 2.430302759490403e-06, "loss": 0.0861, "step": 527800 }, { "epoch": 5.19, "grad_norm": 2.818108081817627, "learning_rate": 2.4301786370361547e-06, "loss": 0.0392, "step": 527825 }, { "epoch": 5.19, "grad_norm": 1.952715516090393, "learning_rate": 2.430054514581906e-06, "loss": 0.0698, "step": 527850 }, { "epoch": 5.19, "grad_norm": 10.703368186950684, "learning_rate": 2.4299303921276575e-06, "loss": 0.0547, "step": 527875 }, { "epoch": 5.19, "grad_norm": 0.015104941092431545, "learning_rate": 2.429811234571579e-06, "loss": 0.0864, "step": 527900 }, { "epoch": 5.19, "grad_norm": 8.514618873596191, "learning_rate": 2.4296871121173306e-06, "loss": 0.0551, "step": 527925 }, { "epoch": 5.19, "grad_norm": 1.9071649312973022, "learning_rate": 2.4295629896630823e-06, "loss": 0.0854, "step": 527950 }, { "epoch": 5.19, "grad_norm": 8.417609214782715, "learning_rate": 2.429438867208834e-06, "loss": 0.0319, "step": 527975 }, { "epoch": 5.19, "grad_norm": 0.35063084959983826, "learning_rate": 2.4293147447545855e-06, "loss": 0.0696, "step": 528000 }, { "epoch": 5.19, "grad_norm": 13.108969688415527, "learning_rate": 2.4291906223003367e-06, "loss": 0.0628, "step": 528025 }, { "epoch": 5.19, "grad_norm": 2.466244697570801, "learning_rate": 2.4290664998460884e-06, "loss": 0.0936, "step": 528050 }, { "epoch": 5.19, "grad_norm": 5.349576473236084, "learning_rate": 2.42894237739184e-06, "loss": 0.0306, "step": 528075 }, { "epoch": 5.19, "grad_norm": 0.06324382871389389, "learning_rate": 2.4288182549375912e-06, "loss": 0.09, "step": 528100 }, { "epoch": 5.19, "grad_norm": 6.269229412078857, "learning_rate": 2.428694132483343e-06, "loss": 0.0611, "step": 528125 }, { "epoch": 5.19, "grad_norm": 0.15711909532546997, "learning_rate": 2.4285700100290945e-06, "loss": 0.0712, "step": 528150 }, { "epoch": 5.19, "grad_norm": 15.731948852539062, "learning_rate": 2.428445887574846e-06, "loss": 0.0527, "step": 528175 }, { "epoch": 5.19, "grad_norm": 0.18158988654613495, "learning_rate": 2.4283217651205978e-06, "loss": 0.1001, "step": 528200 }, { "epoch": 5.19, "grad_norm": 6.227136135101318, "learning_rate": 2.4281976426663494e-06, "loss": 0.0491, "step": 528225 }, { "epoch": 5.19, "grad_norm": 2.283982515335083, "learning_rate": 2.4280735202121006e-06, "loss": 0.0691, "step": 528250 }, { "epoch": 5.19, "grad_norm": 13.90436840057373, "learning_rate": 2.4279493977578523e-06, "loss": 0.0505, "step": 528275 }, { "epoch": 5.19, "grad_norm": 1.2175612449645996, "learning_rate": 2.427825275303604e-06, "loss": 0.0775, "step": 528300 }, { "epoch": 5.19, "grad_norm": 12.181188583374023, "learning_rate": 2.427701152849355e-06, "loss": 0.0395, "step": 528325 }, { "epoch": 5.19, "grad_norm": 4.839820861816406, "learning_rate": 2.4275770303951067e-06, "loss": 0.0617, "step": 528350 }, { "epoch": 5.2, "grad_norm": 7.000899791717529, "learning_rate": 2.4274529079408584e-06, "loss": 0.0534, "step": 528375 }, { "epoch": 5.2, "grad_norm": 1.5432146787643433, "learning_rate": 2.42732878548661e-06, "loss": 0.0862, "step": 528400 }, { "epoch": 5.2, "grad_norm": 10.137724876403809, "learning_rate": 2.4272046630323616e-06, "loss": 0.0627, "step": 528425 }, { "epoch": 5.2, "grad_norm": 1.440563678741455, "learning_rate": 2.427080540578113e-06, "loss": 0.1028, "step": 528450 }, { "epoch": 5.2, "grad_norm": 11.9993314743042, "learning_rate": 2.4269564181238645e-06, "loss": 0.0418, "step": 528475 }, { "epoch": 5.2, "grad_norm": 0.04001650586724281, "learning_rate": 2.426832295669616e-06, "loss": 0.0719, "step": 528500 }, { "epoch": 5.2, "grad_norm": 11.320639610290527, "learning_rate": 2.4267081732153673e-06, "loss": 0.05, "step": 528525 }, { "epoch": 5.2, "grad_norm": 1.195023536682129, "learning_rate": 2.426584050761119e-06, "loss": 0.0715, "step": 528550 }, { "epoch": 5.2, "grad_norm": 2.233088731765747, "learning_rate": 2.4264599283068706e-06, "loss": 0.0339, "step": 528575 }, { "epoch": 5.2, "grad_norm": 4.900925159454346, "learning_rate": 2.4263358058526222e-06, "loss": 0.0766, "step": 528600 }, { "epoch": 5.2, "grad_norm": 14.422240257263184, "learning_rate": 2.426211683398374e-06, "loss": 0.0508, "step": 528625 }, { "epoch": 5.2, "grad_norm": 7.443012714385986, "learning_rate": 2.4260875609441255e-06, "loss": 0.0689, "step": 528650 }, { "epoch": 5.2, "grad_norm": 6.832075595855713, "learning_rate": 2.4259634384898767e-06, "loss": 0.0398, "step": 528675 }, { "epoch": 5.2, "grad_norm": 0.10197493433952332, "learning_rate": 2.4258393160356284e-06, "loss": 0.0627, "step": 528700 }, { "epoch": 5.2, "grad_norm": 10.923064231872559, "learning_rate": 2.42571519358138e-06, "loss": 0.0667, "step": 528725 }, { "epoch": 5.2, "grad_norm": 2.4515979290008545, "learning_rate": 2.425591071127131e-06, "loss": 0.086, "step": 528750 }, { "epoch": 5.2, "grad_norm": 11.142210960388184, "learning_rate": 2.425466948672883e-06, "loss": 0.0485, "step": 528775 }, { "epoch": 5.2, "grad_norm": 0.08454765379428864, "learning_rate": 2.4253428262186345e-06, "loss": 0.0975, "step": 528800 }, { "epoch": 5.2, "grad_norm": 7.866822242736816, "learning_rate": 2.425218703764386e-06, "loss": 0.0443, "step": 528825 }, { "epoch": 5.2, "grad_norm": 7.816533088684082, "learning_rate": 2.4250945813101377e-06, "loss": 0.0888, "step": 528850 }, { "epoch": 5.2, "grad_norm": 11.384892463684082, "learning_rate": 2.424970458855889e-06, "loss": 0.0362, "step": 528875 }, { "epoch": 5.2, "grad_norm": 0.13976457715034485, "learning_rate": 2.4248463364016406e-06, "loss": 0.0677, "step": 528900 }, { "epoch": 5.2, "grad_norm": 5.409945487976074, "learning_rate": 2.4247222139473922e-06, "loss": 0.057, "step": 528925 }, { "epoch": 5.2, "grad_norm": 3.343372344970703, "learning_rate": 2.4245980914931434e-06, "loss": 0.1071, "step": 528950 }, { "epoch": 5.2, "grad_norm": 6.793105125427246, "learning_rate": 2.424473969038895e-06, "loss": 0.0295, "step": 528975 }, { "epoch": 5.2, "grad_norm": 0.13094472885131836, "learning_rate": 2.4243498465846467e-06, "loss": 0.0774, "step": 529000 }, { "epoch": 5.2, "grad_norm": 17.004714965820312, "learning_rate": 2.4242257241303983e-06, "loss": 0.0645, "step": 529025 }, { "epoch": 5.2, "grad_norm": 1.4613475799560547, "learning_rate": 2.42410160167615e-06, "loss": 0.076, "step": 529050 }, { "epoch": 5.2, "grad_norm": 8.451761245727539, "learning_rate": 2.4239774792219016e-06, "loss": 0.0557, "step": 529075 }, { "epoch": 5.2, "grad_norm": 5.351586818695068, "learning_rate": 2.423853356767653e-06, "loss": 0.0934, "step": 529100 }, { "epoch": 5.2, "grad_norm": 10.229755401611328, "learning_rate": 2.4237292343134045e-06, "loss": 0.0458, "step": 529125 }, { "epoch": 5.2, "grad_norm": 8.127819061279297, "learning_rate": 2.423605111859156e-06, "loss": 0.0989, "step": 529150 }, { "epoch": 5.2, "grad_norm": 5.1395463943481445, "learning_rate": 2.4234809894049073e-06, "loss": 0.0456, "step": 529175 }, { "epoch": 5.2, "grad_norm": 0.23128733038902283, "learning_rate": 2.423356866950659e-06, "loss": 0.0783, "step": 529200 }, { "epoch": 5.2, "grad_norm": 7.917322635650635, "learning_rate": 2.4232327444964106e-06, "loss": 0.0551, "step": 529225 }, { "epoch": 5.2, "grad_norm": 2.3221991062164307, "learning_rate": 2.423108622042162e-06, "loss": 0.0725, "step": 529250 }, { "epoch": 5.2, "grad_norm": 3.632770538330078, "learning_rate": 2.422984499587914e-06, "loss": 0.0458, "step": 529275 }, { "epoch": 5.2, "grad_norm": 1.6202055215835571, "learning_rate": 2.422860377133665e-06, "loss": 0.0704, "step": 529300 }, { "epoch": 5.2, "grad_norm": 12.236588478088379, "learning_rate": 2.4227362546794167e-06, "loss": 0.0407, "step": 529325 }, { "epoch": 5.2, "grad_norm": 0.39855921268463135, "learning_rate": 2.4226121322251683e-06, "loss": 0.0624, "step": 529350 }, { "epoch": 5.2, "grad_norm": 10.581978797912598, "learning_rate": 2.4224880097709195e-06, "loss": 0.0487, "step": 529375 }, { "epoch": 5.21, "grad_norm": 0.31402695178985596, "learning_rate": 2.422363887316671e-06, "loss": 0.0669, "step": 529400 }, { "epoch": 5.21, "grad_norm": 4.7665581703186035, "learning_rate": 2.422239764862423e-06, "loss": 0.0487, "step": 529425 }, { "epoch": 5.21, "grad_norm": 1.3149303197860718, "learning_rate": 2.4221156424081744e-06, "loss": 0.0614, "step": 529450 }, { "epoch": 5.21, "grad_norm": 5.207431316375732, "learning_rate": 2.421991519953926e-06, "loss": 0.0312, "step": 529475 }, { "epoch": 5.21, "grad_norm": 0.28784188628196716, "learning_rate": 2.4218673974996777e-06, "loss": 0.0858, "step": 529500 }, { "epoch": 5.21, "grad_norm": 7.04573917388916, "learning_rate": 2.421743275045429e-06, "loss": 0.0619, "step": 529525 }, { "epoch": 5.21, "grad_norm": 5.557805061340332, "learning_rate": 2.4216191525911806e-06, "loss": 0.0683, "step": 529550 }, { "epoch": 5.21, "grad_norm": 6.879036903381348, "learning_rate": 2.421495030136932e-06, "loss": 0.0426, "step": 529575 }, { "epoch": 5.21, "grad_norm": 2.220689058303833, "learning_rate": 2.4213709076826834e-06, "loss": 0.0751, "step": 529600 }, { "epoch": 5.21, "grad_norm": 6.489174842834473, "learning_rate": 2.421246785228435e-06, "loss": 0.0542, "step": 529625 }, { "epoch": 5.21, "grad_norm": 6.596255302429199, "learning_rate": 2.4211226627741867e-06, "loss": 0.0729, "step": 529650 }, { "epoch": 5.21, "grad_norm": 16.990985870361328, "learning_rate": 2.4209985403199383e-06, "loss": 0.0405, "step": 529675 }, { "epoch": 5.21, "grad_norm": 0.029058698564767838, "learning_rate": 2.42087441786569e-06, "loss": 0.0916, "step": 529700 }, { "epoch": 5.21, "grad_norm": 12.675424575805664, "learning_rate": 2.420750295411441e-06, "loss": 0.0316, "step": 529725 }, { "epoch": 5.21, "grad_norm": 4.656988143920898, "learning_rate": 2.4206261729571928e-06, "loss": 0.085, "step": 529750 }, { "epoch": 5.21, "grad_norm": 18.56517791748047, "learning_rate": 2.4205020505029444e-06, "loss": 0.067, "step": 529775 }, { "epoch": 5.21, "grad_norm": 1.0860787630081177, "learning_rate": 2.4203779280486956e-06, "loss": 0.0985, "step": 529800 }, { "epoch": 5.21, "grad_norm": 2.381582021713257, "learning_rate": 2.4202538055944473e-06, "loss": 0.0425, "step": 529825 }, { "epoch": 5.21, "grad_norm": 0.050386734306812286, "learning_rate": 2.420129683140199e-06, "loss": 0.089, "step": 529850 }, { "epoch": 5.21, "grad_norm": 10.569758415222168, "learning_rate": 2.4200055606859505e-06, "loss": 0.0407, "step": 529875 }, { "epoch": 5.21, "grad_norm": 0.19359321892261505, "learning_rate": 2.419881438231702e-06, "loss": 0.0865, "step": 529900 }, { "epoch": 5.21, "grad_norm": 1.6835678815841675, "learning_rate": 2.419757315777454e-06, "loss": 0.0416, "step": 529925 }, { "epoch": 5.21, "grad_norm": 0.03910454735159874, "learning_rate": 2.419633193323205e-06, "loss": 0.0851, "step": 529950 }, { "epoch": 5.21, "grad_norm": 4.451363563537598, "learning_rate": 2.4195090708689567e-06, "loss": 0.0425, "step": 529975 }, { "epoch": 5.21, "grad_norm": 9.063081741333008, "learning_rate": 2.4193849484147083e-06, "loss": 0.0742, "step": 530000 }, { "epoch": 5.21, "grad_norm": 11.411663055419922, "learning_rate": 2.4192657908586297e-06, "loss": 0.0531, "step": 530025 }, { "epoch": 5.21, "grad_norm": 2.9869024753570557, "learning_rate": 2.419141668404381e-06, "loss": 0.0773, "step": 530050 }, { "epoch": 5.21, "grad_norm": 5.724085807800293, "learning_rate": 2.4190175459501326e-06, "loss": 0.0432, "step": 530075 }, { "epoch": 5.21, "grad_norm": 5.184662818908691, "learning_rate": 2.4188934234958842e-06, "loss": 0.0775, "step": 530100 }, { "epoch": 5.21, "grad_norm": 10.914938926696777, "learning_rate": 2.418769301041636e-06, "loss": 0.0532, "step": 530125 }, { "epoch": 5.21, "grad_norm": 8.041956901550293, "learning_rate": 2.4186451785873875e-06, "loss": 0.0754, "step": 530150 }, { "epoch": 5.21, "grad_norm": 7.2034831047058105, "learning_rate": 2.418521056133139e-06, "loss": 0.0446, "step": 530175 }, { "epoch": 5.21, "grad_norm": 3.718715190887451, "learning_rate": 2.4183969336788903e-06, "loss": 0.0768, "step": 530200 }, { "epoch": 5.21, "grad_norm": 12.878361701965332, "learning_rate": 2.418272811224642e-06, "loss": 0.0622, "step": 530225 }, { "epoch": 5.21, "grad_norm": 0.009136405773460865, "learning_rate": 2.418148688770393e-06, "loss": 0.0578, "step": 530250 }, { "epoch": 5.21, "grad_norm": 16.89764976501465, "learning_rate": 2.418024566316145e-06, "loss": 0.0684, "step": 530275 }, { "epoch": 5.21, "grad_norm": 0.17295153439044952, "learning_rate": 2.4179004438618965e-06, "loss": 0.0577, "step": 530300 }, { "epoch": 5.21, "grad_norm": 45.212257385253906, "learning_rate": 2.417776321407648e-06, "loss": 0.0347, "step": 530325 }, { "epoch": 5.21, "grad_norm": 2.248094081878662, "learning_rate": 2.4176521989533997e-06, "loss": 0.0647, "step": 530350 }, { "epoch": 5.21, "grad_norm": 2.921548843383789, "learning_rate": 2.4175280764991514e-06, "loss": 0.0348, "step": 530375 }, { "epoch": 5.21, "grad_norm": 2.9972341060638428, "learning_rate": 2.4174039540449026e-06, "loss": 0.0622, "step": 530400 }, { "epoch": 5.22, "grad_norm": 2.668964385986328, "learning_rate": 2.417279831590654e-06, "loss": 0.0425, "step": 530425 }, { "epoch": 5.22, "grad_norm": 0.4394705891609192, "learning_rate": 2.417155709136406e-06, "loss": 0.0729, "step": 530450 }, { "epoch": 5.22, "grad_norm": 9.087101936340332, "learning_rate": 2.417031586682157e-06, "loss": 0.0562, "step": 530475 }, { "epoch": 5.22, "grad_norm": 1.7002673149108887, "learning_rate": 2.4169074642279087e-06, "loss": 0.0618, "step": 530500 }, { "epoch": 5.22, "grad_norm": 8.559648513793945, "learning_rate": 2.4167833417736603e-06, "loss": 0.0385, "step": 530525 }, { "epoch": 5.22, "grad_norm": 1.6538656949996948, "learning_rate": 2.416659219319412e-06, "loss": 0.0748, "step": 530550 }, { "epoch": 5.22, "grad_norm": 8.35757064819336, "learning_rate": 2.4165350968651636e-06, "loss": 0.0513, "step": 530575 }, { "epoch": 5.22, "grad_norm": 0.4396032392978668, "learning_rate": 2.4164109744109152e-06, "loss": 0.0866, "step": 530600 }, { "epoch": 5.22, "grad_norm": 9.513429641723633, "learning_rate": 2.4162868519566664e-06, "loss": 0.0636, "step": 530625 }, { "epoch": 5.22, "grad_norm": 19.790143966674805, "learning_rate": 2.416162729502418e-06, "loss": 0.076, "step": 530650 }, { "epoch": 5.22, "grad_norm": 7.651510715484619, "learning_rate": 2.4160386070481697e-06, "loss": 0.0437, "step": 530675 }, { "epoch": 5.22, "grad_norm": 0.1467410922050476, "learning_rate": 2.4159144845939213e-06, "loss": 0.0589, "step": 530700 }, { "epoch": 5.22, "grad_norm": 12.08248233795166, "learning_rate": 2.415790362139673e-06, "loss": 0.0479, "step": 530725 }, { "epoch": 5.22, "grad_norm": 0.6850080490112305, "learning_rate": 2.415666239685424e-06, "loss": 0.0482, "step": 530750 }, { "epoch": 5.22, "grad_norm": 4.608225345611572, "learning_rate": 2.415542117231176e-06, "loss": 0.051, "step": 530775 }, { "epoch": 5.22, "grad_norm": 10.22547435760498, "learning_rate": 2.4154179947769275e-06, "loss": 0.0723, "step": 530800 }, { "epoch": 5.22, "grad_norm": 9.363332748413086, "learning_rate": 2.4152938723226787e-06, "loss": 0.0567, "step": 530825 }, { "epoch": 5.22, "grad_norm": 3.8919920921325684, "learning_rate": 2.4151697498684303e-06, "loss": 0.0815, "step": 530850 }, { "epoch": 5.22, "grad_norm": 9.054990768432617, "learning_rate": 2.415045627414182e-06, "loss": 0.0425, "step": 530875 }, { "epoch": 5.22, "grad_norm": 0.5729488134384155, "learning_rate": 2.4149215049599336e-06, "loss": 0.0858, "step": 530900 }, { "epoch": 5.22, "grad_norm": 10.793570518493652, "learning_rate": 2.414797382505685e-06, "loss": 0.0581, "step": 530925 }, { "epoch": 5.22, "grad_norm": 0.08734060078859329, "learning_rate": 2.414673260051437e-06, "loss": 0.084, "step": 530950 }, { "epoch": 5.22, "grad_norm": 6.903880596160889, "learning_rate": 2.414549137597188e-06, "loss": 0.0563, "step": 530975 }, { "epoch": 5.22, "grad_norm": 1.5370430946350098, "learning_rate": 2.4144250151429397e-06, "loss": 0.0709, "step": 531000 }, { "epoch": 5.22, "grad_norm": 4.012636184692383, "learning_rate": 2.4143008926886913e-06, "loss": 0.0469, "step": 531025 }, { "epoch": 5.22, "grad_norm": 0.20098356902599335, "learning_rate": 2.4141767702344425e-06, "loss": 0.0937, "step": 531050 }, { "epoch": 5.22, "grad_norm": 13.329971313476562, "learning_rate": 2.414052647780194e-06, "loss": 0.0392, "step": 531075 }, { "epoch": 5.22, "grad_norm": 1.0676337480545044, "learning_rate": 2.413928525325946e-06, "loss": 0.0879, "step": 531100 }, { "epoch": 5.22, "grad_norm": 10.69219970703125, "learning_rate": 2.4138044028716974e-06, "loss": 0.0623, "step": 531125 }, { "epoch": 5.22, "grad_norm": 4.543290138244629, "learning_rate": 2.413680280417449e-06, "loss": 0.0717, "step": 531150 }, { "epoch": 5.22, "grad_norm": 5.988134384155273, "learning_rate": 2.4135561579632003e-06, "loss": 0.0539, "step": 531175 }, { "epoch": 5.22, "grad_norm": 0.7325290441513062, "learning_rate": 2.413432035508952e-06, "loss": 0.0838, "step": 531200 }, { "epoch": 5.22, "grad_norm": 9.331167221069336, "learning_rate": 2.4133079130547036e-06, "loss": 0.0438, "step": 531225 }, { "epoch": 5.22, "grad_norm": 3.8137569427490234, "learning_rate": 2.4131837906004548e-06, "loss": 0.0777, "step": 531250 }, { "epoch": 5.22, "grad_norm": 7.391939640045166, "learning_rate": 2.4130596681462064e-06, "loss": 0.038, "step": 531275 }, { "epoch": 5.22, "grad_norm": 0.2085614651441574, "learning_rate": 2.412935545691958e-06, "loss": 0.0787, "step": 531300 }, { "epoch": 5.22, "grad_norm": 6.55320405960083, "learning_rate": 2.4128114232377097e-06, "loss": 0.0511, "step": 531325 }, { "epoch": 5.22, "grad_norm": 4.594552516937256, "learning_rate": 2.4126873007834613e-06, "loss": 0.0827, "step": 531350 }, { "epoch": 5.22, "grad_norm": 10.082989692687988, "learning_rate": 2.412563178329213e-06, "loss": 0.0426, "step": 531375 }, { "epoch": 5.22, "grad_norm": 5.537602424621582, "learning_rate": 2.412439055874964e-06, "loss": 0.0867, "step": 531400 }, { "epoch": 5.23, "grad_norm": 11.930676460266113, "learning_rate": 2.412314933420716e-06, "loss": 0.0477, "step": 531425 }, { "epoch": 5.23, "grad_norm": 0.6044110655784607, "learning_rate": 2.4121908109664674e-06, "loss": 0.0585, "step": 531450 }, { "epoch": 5.23, "grad_norm": 10.343708992004395, "learning_rate": 2.4120666885122186e-06, "loss": 0.0513, "step": 531475 }, { "epoch": 5.23, "grad_norm": 7.301738739013672, "learning_rate": 2.4119425660579703e-06, "loss": 0.0655, "step": 531500 }, { "epoch": 5.23, "grad_norm": 4.9986114501953125, "learning_rate": 2.411818443603722e-06, "loss": 0.0533, "step": 531525 }, { "epoch": 5.23, "grad_norm": 6.0273051261901855, "learning_rate": 2.4116943211494735e-06, "loss": 0.0948, "step": 531550 }, { "epoch": 5.23, "grad_norm": 14.768453598022461, "learning_rate": 2.411570198695225e-06, "loss": 0.0588, "step": 531575 }, { "epoch": 5.23, "grad_norm": 5.651832580566406, "learning_rate": 2.4114460762409764e-06, "loss": 0.0876, "step": 531600 }, { "epoch": 5.23, "grad_norm": 7.831793308258057, "learning_rate": 2.411321953786728e-06, "loss": 0.0522, "step": 531625 }, { "epoch": 5.23, "grad_norm": 4.008433818817139, "learning_rate": 2.4111978313324797e-06, "loss": 0.0861, "step": 531650 }, { "epoch": 5.23, "grad_norm": 9.858076095581055, "learning_rate": 2.411073708878231e-06, "loss": 0.0397, "step": 531675 }, { "epoch": 5.23, "grad_norm": 0.23853257298469543, "learning_rate": 2.4109495864239825e-06, "loss": 0.0574, "step": 531700 }, { "epoch": 5.23, "grad_norm": 6.111724376678467, "learning_rate": 2.410825463969734e-06, "loss": 0.031, "step": 531725 }, { "epoch": 5.23, "grad_norm": 1.062144160270691, "learning_rate": 2.4107013415154858e-06, "loss": 0.0717, "step": 531750 }, { "epoch": 5.23, "grad_norm": 8.071474075317383, "learning_rate": 2.4105772190612374e-06, "loss": 0.0369, "step": 531775 }, { "epoch": 5.23, "grad_norm": 0.21925599873065948, "learning_rate": 2.410453096606989e-06, "loss": 0.0714, "step": 531800 }, { "epoch": 5.23, "grad_norm": 8.335134506225586, "learning_rate": 2.4103289741527403e-06, "loss": 0.0443, "step": 531825 }, { "epoch": 5.23, "grad_norm": 2.2906134128570557, "learning_rate": 2.410204851698492e-06, "loss": 0.07, "step": 531850 }, { "epoch": 5.23, "grad_norm": 10.102919578552246, "learning_rate": 2.4100807292442435e-06, "loss": 0.0607, "step": 531875 }, { "epoch": 5.23, "grad_norm": 2.830665349960327, "learning_rate": 2.4099566067899947e-06, "loss": 0.083, "step": 531900 }, { "epoch": 5.23, "grad_norm": 7.135533332824707, "learning_rate": 2.4098324843357464e-06, "loss": 0.0422, "step": 531925 }, { "epoch": 5.23, "grad_norm": 0.08460965007543564, "learning_rate": 2.409708361881498e-06, "loss": 0.086, "step": 531950 }, { "epoch": 5.23, "grad_norm": 4.6189680099487305, "learning_rate": 2.4095842394272496e-06, "loss": 0.0484, "step": 531975 }, { "epoch": 5.23, "grad_norm": 0.19638457894325256, "learning_rate": 2.4094601169730013e-06, "loss": 0.0943, "step": 532000 }, { "epoch": 5.23, "grad_norm": 2.360671043395996, "learning_rate": 2.4093359945187525e-06, "loss": 0.0393, "step": 532025 }, { "epoch": 5.23, "grad_norm": 10.050544738769531, "learning_rate": 2.409211872064504e-06, "loss": 0.0782, "step": 532050 }, { "epoch": 5.23, "grad_norm": 4.861123085021973, "learning_rate": 2.4090877496102558e-06, "loss": 0.0614, "step": 532075 }, { "epoch": 5.23, "grad_norm": 5.5114030838012695, "learning_rate": 2.408963627156007e-06, "loss": 0.0511, "step": 532100 }, { "epoch": 5.23, "grad_norm": 14.831815719604492, "learning_rate": 2.4088395047017586e-06, "loss": 0.0513, "step": 532125 }, { "epoch": 5.23, "grad_norm": 3.292597532272339, "learning_rate": 2.4087153822475102e-06, "loss": 0.0627, "step": 532150 }, { "epoch": 5.23, "grad_norm": 10.439885139465332, "learning_rate": 2.408591259793262e-06, "loss": 0.0484, "step": 532175 }, { "epoch": 5.23, "grad_norm": 1.018847107887268, "learning_rate": 2.4084671373390135e-06, "loss": 0.0798, "step": 532200 }, { "epoch": 5.23, "grad_norm": 7.436373233795166, "learning_rate": 2.408343014884765e-06, "loss": 0.0519, "step": 532225 }, { "epoch": 5.23, "grad_norm": 5.057380676269531, "learning_rate": 2.4082188924305164e-06, "loss": 0.0782, "step": 532250 }, { "epoch": 5.23, "grad_norm": 15.430045127868652, "learning_rate": 2.408094769976268e-06, "loss": 0.0541, "step": 532275 }, { "epoch": 5.23, "grad_norm": 3.3568687438964844, "learning_rate": 2.4079756124201894e-06, "loss": 0.0909, "step": 532300 }, { "epoch": 5.23, "grad_norm": 10.595137596130371, "learning_rate": 2.407851489965941e-06, "loss": 0.0444, "step": 532325 }, { "epoch": 5.23, "grad_norm": 7.091536998748779, "learning_rate": 2.4077273675116923e-06, "loss": 0.1057, "step": 532350 }, { "epoch": 5.23, "grad_norm": 1.367425560951233, "learning_rate": 2.407603245057444e-06, "loss": 0.0551, "step": 532375 }, { "epoch": 5.23, "grad_norm": 5.156845569610596, "learning_rate": 2.4074791226031956e-06, "loss": 0.0845, "step": 532400 }, { "epoch": 5.23, "grad_norm": 17.133277893066406, "learning_rate": 2.407355000148947e-06, "loss": 0.0574, "step": 532425 }, { "epoch": 5.24, "grad_norm": 5.798119068145752, "learning_rate": 2.407230877694699e-06, "loss": 0.0816, "step": 532450 }, { "epoch": 5.24, "grad_norm": 12.109167098999023, "learning_rate": 2.4071067552404505e-06, "loss": 0.0562, "step": 532475 }, { "epoch": 5.24, "grad_norm": 1.821974277496338, "learning_rate": 2.4069826327862017e-06, "loss": 0.1042, "step": 532500 }, { "epoch": 5.24, "grad_norm": 18.755361557006836, "learning_rate": 2.4068585103319533e-06, "loss": 0.0427, "step": 532525 }, { "epoch": 5.24, "grad_norm": 0.9910541772842407, "learning_rate": 2.4067343878777045e-06, "loss": 0.0768, "step": 532550 }, { "epoch": 5.24, "grad_norm": 14.584268569946289, "learning_rate": 2.406610265423456e-06, "loss": 0.0564, "step": 532575 }, { "epoch": 5.24, "grad_norm": 0.62933748960495, "learning_rate": 2.406486142969208e-06, "loss": 0.0989, "step": 532600 }, { "epoch": 5.24, "grad_norm": 17.775894165039062, "learning_rate": 2.4063620205149594e-06, "loss": 0.0442, "step": 532625 }, { "epoch": 5.24, "grad_norm": 0.6698768138885498, "learning_rate": 2.406237898060711e-06, "loss": 0.0934, "step": 532650 }, { "epoch": 5.24, "grad_norm": 12.027092933654785, "learning_rate": 2.4061137756064627e-06, "loss": 0.0458, "step": 532675 }, { "epoch": 5.24, "grad_norm": 0.3446027338504791, "learning_rate": 2.405989653152214e-06, "loss": 0.0929, "step": 532700 }, { "epoch": 5.24, "grad_norm": 9.923527717590332, "learning_rate": 2.4058655306979655e-06, "loss": 0.0462, "step": 532725 }, { "epoch": 5.24, "grad_norm": 7.530845642089844, "learning_rate": 2.405741408243717e-06, "loss": 0.0894, "step": 532750 }, { "epoch": 5.24, "grad_norm": 6.320115566253662, "learning_rate": 2.4056172857894684e-06, "loss": 0.0613, "step": 532775 }, { "epoch": 5.24, "grad_norm": 3.37870454788208, "learning_rate": 2.40549316333522e-06, "loss": 0.1146, "step": 532800 }, { "epoch": 5.24, "grad_norm": 12.553878784179688, "learning_rate": 2.4053690408809717e-06, "loss": 0.0422, "step": 532825 }, { "epoch": 5.24, "grad_norm": 0.037568919360637665, "learning_rate": 2.4052449184267233e-06, "loss": 0.0664, "step": 532850 }, { "epoch": 5.24, "grad_norm": 5.900677680969238, "learning_rate": 2.405120795972475e-06, "loss": 0.0527, "step": 532875 }, { "epoch": 5.24, "grad_norm": 2.074763774871826, "learning_rate": 2.4049966735182266e-06, "loss": 0.0892, "step": 532900 }, { "epoch": 5.24, "grad_norm": 2.616136312484741, "learning_rate": 2.4048725510639778e-06, "loss": 0.027, "step": 532925 }, { "epoch": 5.24, "grad_norm": 1.8467158079147339, "learning_rate": 2.4047484286097294e-06, "loss": 0.078, "step": 532950 }, { "epoch": 5.24, "grad_norm": 14.138593673706055, "learning_rate": 2.4046243061554806e-06, "loss": 0.0601, "step": 532975 }, { "epoch": 5.24, "grad_norm": 0.3576168119907379, "learning_rate": 2.4045001837012323e-06, "loss": 0.0844, "step": 533000 }, { "epoch": 5.24, "grad_norm": 11.41565990447998, "learning_rate": 2.404376061246984e-06, "loss": 0.0516, "step": 533025 }, { "epoch": 5.24, "grad_norm": 5.930930137634277, "learning_rate": 2.4042519387927355e-06, "loss": 0.0813, "step": 533050 }, { "epoch": 5.24, "grad_norm": 8.489389419555664, "learning_rate": 2.404127816338487e-06, "loss": 0.067, "step": 533075 }, { "epoch": 5.24, "grad_norm": 4.153364181518555, "learning_rate": 2.404003693884239e-06, "loss": 0.109, "step": 533100 }, { "epoch": 5.24, "grad_norm": 4.1255927085876465, "learning_rate": 2.40387957142999e-06, "loss": 0.0579, "step": 533125 }, { "epoch": 5.24, "grad_norm": 5.505603790283203, "learning_rate": 2.4037554489757416e-06, "loss": 0.062, "step": 533150 }, { "epoch": 5.24, "grad_norm": 10.013398170471191, "learning_rate": 2.4036313265214933e-06, "loss": 0.0454, "step": 533175 }, { "epoch": 5.24, "grad_norm": 5.543679714202881, "learning_rate": 2.4035072040672445e-06, "loss": 0.1051, "step": 533200 }, { "epoch": 5.24, "grad_norm": 7.089984893798828, "learning_rate": 2.403383081612996e-06, "loss": 0.0443, "step": 533225 }, { "epoch": 5.24, "grad_norm": 2.3821017742156982, "learning_rate": 2.4032589591587478e-06, "loss": 0.0729, "step": 533250 }, { "epoch": 5.24, "grad_norm": 6.6481218338012695, "learning_rate": 2.4031348367044994e-06, "loss": 0.0342, "step": 533275 }, { "epoch": 5.24, "grad_norm": 0.5864952206611633, "learning_rate": 2.403010714250251e-06, "loss": 0.0919, "step": 533300 }, { "epoch": 5.24, "grad_norm": 2.725743293762207, "learning_rate": 2.4028865917960027e-06, "loss": 0.0408, "step": 533325 }, { "epoch": 5.24, "grad_norm": 1.4277024269104004, "learning_rate": 2.402762469341754e-06, "loss": 0.0727, "step": 533350 }, { "epoch": 5.24, "grad_norm": 16.820724487304688, "learning_rate": 2.4026383468875055e-06, "loss": 0.0608, "step": 533375 }, { "epoch": 5.24, "grad_norm": 2.693023204803467, "learning_rate": 2.4025142244332567e-06, "loss": 0.0627, "step": 533400 }, { "epoch": 5.24, "grad_norm": 12.521011352539062, "learning_rate": 2.4023901019790084e-06, "loss": 0.0498, "step": 533425 }, { "epoch": 5.24, "grad_norm": 1.8765283823013306, "learning_rate": 2.40226597952476e-06, "loss": 0.0727, "step": 533450 }, { "epoch": 5.25, "grad_norm": 8.398216247558594, "learning_rate": 2.4021418570705116e-06, "loss": 0.0459, "step": 533475 }, { "epoch": 5.25, "grad_norm": 0.07386443018913269, "learning_rate": 2.4020177346162633e-06, "loss": 0.08, "step": 533500 }, { "epoch": 5.25, "grad_norm": 9.398611068725586, "learning_rate": 2.401893612162015e-06, "loss": 0.0444, "step": 533525 }, { "epoch": 5.25, "grad_norm": 0.8698365092277527, "learning_rate": 2.401769489707766e-06, "loss": 0.0877, "step": 533550 }, { "epoch": 5.25, "grad_norm": 10.120505332946777, "learning_rate": 2.4016453672535177e-06, "loss": 0.0544, "step": 533575 }, { "epoch": 5.25, "grad_norm": 1.2243237495422363, "learning_rate": 2.4015212447992694e-06, "loss": 0.0861, "step": 533600 }, { "epoch": 5.25, "grad_norm": 2.9555346965789795, "learning_rate": 2.401397122345021e-06, "loss": 0.0578, "step": 533625 }, { "epoch": 5.25, "grad_norm": 2.7666077613830566, "learning_rate": 2.4012729998907727e-06, "loss": 0.0911, "step": 533650 }, { "epoch": 5.25, "grad_norm": 9.796833038330078, "learning_rate": 2.4011488774365243e-06, "loss": 0.0426, "step": 533675 }, { "epoch": 5.25, "grad_norm": 3.0657401084899902, "learning_rate": 2.4010247549822755e-06, "loss": 0.0851, "step": 533700 }, { "epoch": 5.25, "grad_norm": 9.689676284790039, "learning_rate": 2.400900632528027e-06, "loss": 0.0667, "step": 533725 }, { "epoch": 5.25, "grad_norm": 0.631354570388794, "learning_rate": 2.4007765100737788e-06, "loss": 0.1036, "step": 533750 }, { "epoch": 5.25, "grad_norm": 7.089174270629883, "learning_rate": 2.40065238761953e-06, "loss": 0.0591, "step": 533775 }, { "epoch": 5.25, "grad_norm": 3.923365354537964, "learning_rate": 2.4005282651652816e-06, "loss": 0.0954, "step": 533800 }, { "epoch": 5.25, "grad_norm": 11.732941627502441, "learning_rate": 2.4004041427110332e-06, "loss": 0.0481, "step": 533825 }, { "epoch": 5.25, "grad_norm": 6.844465255737305, "learning_rate": 2.400280020256785e-06, "loss": 0.0806, "step": 533850 }, { "epoch": 5.25, "grad_norm": 11.64816665649414, "learning_rate": 2.4001558978025365e-06, "loss": 0.042, "step": 533875 }, { "epoch": 5.25, "grad_norm": 2.730820894241333, "learning_rate": 2.4000317753482877e-06, "loss": 0.1104, "step": 533900 }, { "epoch": 5.25, "grad_norm": 16.51837730407715, "learning_rate": 2.3999076528940394e-06, "loss": 0.0649, "step": 533925 }, { "epoch": 5.25, "grad_norm": 0.44423407316207886, "learning_rate": 2.399783530439791e-06, "loss": 0.0833, "step": 533950 }, { "epoch": 5.25, "grad_norm": 5.545548439025879, "learning_rate": 2.399659407985542e-06, "loss": 0.0486, "step": 533975 }, { "epoch": 5.25, "grad_norm": 1.3898338079452515, "learning_rate": 2.399535285531294e-06, "loss": 0.0859, "step": 534000 }, { "epoch": 5.25, "grad_norm": 18.619123458862305, "learning_rate": 2.3994111630770455e-06, "loss": 0.0442, "step": 534025 }, { "epoch": 5.25, "grad_norm": 7.284178256988525, "learning_rate": 2.399287040622797e-06, "loss": 0.0841, "step": 534050 }, { "epoch": 5.25, "grad_norm": 6.151334285736084, "learning_rate": 2.3991629181685488e-06, "loss": 0.037, "step": 534075 }, { "epoch": 5.25, "grad_norm": 2.9755501747131348, "learning_rate": 2.3990387957143004e-06, "loss": 0.0774, "step": 534100 }, { "epoch": 5.25, "grad_norm": 9.175542831420898, "learning_rate": 2.3989146732600516e-06, "loss": 0.0538, "step": 534125 }, { "epoch": 5.25, "grad_norm": 0.7508450746536255, "learning_rate": 2.3987905508058032e-06, "loss": 0.055, "step": 534150 }, { "epoch": 5.25, "grad_norm": 4.863530158996582, "learning_rate": 2.398666428351555e-06, "loss": 0.041, "step": 534175 }, { "epoch": 5.25, "grad_norm": 0.5646703243255615, "learning_rate": 2.398542305897306e-06, "loss": 0.092, "step": 534200 }, { "epoch": 5.25, "grad_norm": 34.650272369384766, "learning_rate": 2.3984181834430577e-06, "loss": 0.0582, "step": 534225 }, { "epoch": 5.25, "grad_norm": 5.889779090881348, "learning_rate": 2.3982940609888093e-06, "loss": 0.0991, "step": 534250 }, { "epoch": 5.25, "grad_norm": 10.997133255004883, "learning_rate": 2.398169938534561e-06, "loss": 0.0505, "step": 534275 }, { "epoch": 5.25, "grad_norm": 0.19339428842067719, "learning_rate": 2.3980458160803126e-06, "loss": 0.0795, "step": 534300 }, { "epoch": 5.25, "grad_norm": 12.547266006469727, "learning_rate": 2.397921693626064e-06, "loss": 0.0479, "step": 534325 }, { "epoch": 5.25, "grad_norm": 5.991338729858398, "learning_rate": 2.3978025360699857e-06, "loss": 0.097, "step": 534350 }, { "epoch": 5.25, "grad_norm": 5.480767726898193, "learning_rate": 2.397678413615737e-06, "loss": 0.06, "step": 534375 }, { "epoch": 5.25, "grad_norm": 2.3179779052734375, "learning_rate": 2.3975542911614886e-06, "loss": 0.0883, "step": 534400 }, { "epoch": 5.25, "grad_norm": 10.154219627380371, "learning_rate": 2.3974301687072398e-06, "loss": 0.0561, "step": 534425 }, { "epoch": 5.25, "grad_norm": 1.142852783203125, "learning_rate": 2.3973060462529914e-06, "loss": 0.0754, "step": 534450 }, { "epoch": 5.26, "grad_norm": 2.9326226711273193, "learning_rate": 2.397181923798743e-06, "loss": 0.0459, "step": 534475 }, { "epoch": 5.26, "grad_norm": 4.4034881591796875, "learning_rate": 2.3970578013444947e-06, "loss": 0.0825, "step": 534500 }, { "epoch": 5.26, "grad_norm": 24.051414489746094, "learning_rate": 2.3969336788902463e-06, "loss": 0.0612, "step": 534525 }, { "epoch": 5.26, "grad_norm": 4.114790916442871, "learning_rate": 2.396809556435998e-06, "loss": 0.0529, "step": 534550 }, { "epoch": 5.26, "grad_norm": 9.699618339538574, "learning_rate": 2.396685433981749e-06, "loss": 0.0316, "step": 534575 }, { "epoch": 5.26, "grad_norm": 1.1421879529953003, "learning_rate": 2.3965613115275008e-06, "loss": 0.0617, "step": 534600 }, { "epoch": 5.26, "grad_norm": 12.723980903625488, "learning_rate": 2.3964371890732524e-06, "loss": 0.0496, "step": 534625 }, { "epoch": 5.26, "grad_norm": 0.2222440540790558, "learning_rate": 2.3963130666190036e-06, "loss": 0.0687, "step": 534650 }, { "epoch": 5.26, "grad_norm": 6.6166768074035645, "learning_rate": 2.3961889441647553e-06, "loss": 0.0532, "step": 534675 }, { "epoch": 5.26, "grad_norm": 0.2249387949705124, "learning_rate": 2.396064821710507e-06, "loss": 0.0806, "step": 534700 }, { "epoch": 5.26, "grad_norm": 10.02772331237793, "learning_rate": 2.3959406992562585e-06, "loss": 0.0745, "step": 534725 }, { "epoch": 5.26, "grad_norm": 6.349228382110596, "learning_rate": 2.39581657680201e-06, "loss": 0.084, "step": 534750 }, { "epoch": 5.26, "grad_norm": 11.584614753723145, "learning_rate": 2.395692454347762e-06, "loss": 0.0704, "step": 534775 }, { "epoch": 5.26, "grad_norm": 10.773457527160645, "learning_rate": 2.395568331893513e-06, "loss": 0.0968, "step": 534800 }, { "epoch": 5.26, "grad_norm": 10.918502807617188, "learning_rate": 2.3954442094392647e-06, "loss": 0.0403, "step": 534825 }, { "epoch": 5.26, "grad_norm": 5.912632942199707, "learning_rate": 2.395320086985016e-06, "loss": 0.0915, "step": 534850 }, { "epoch": 5.26, "grad_norm": 9.843670845031738, "learning_rate": 2.3951959645307675e-06, "loss": 0.0529, "step": 534875 }, { "epoch": 5.26, "grad_norm": 0.2543202042579651, "learning_rate": 2.395071842076519e-06, "loss": 0.0625, "step": 534900 }, { "epoch": 5.26, "grad_norm": 7.23852014541626, "learning_rate": 2.3949477196222708e-06, "loss": 0.0616, "step": 534925 }, { "epoch": 5.26, "grad_norm": 1.9799747467041016, "learning_rate": 2.3948235971680224e-06, "loss": 0.0827, "step": 534950 }, { "epoch": 5.26, "grad_norm": 2.6431262493133545, "learning_rate": 2.394699474713774e-06, "loss": 0.0678, "step": 534975 }, { "epoch": 5.26, "grad_norm": 0.18050381541252136, "learning_rate": 2.3945753522595253e-06, "loss": 0.0834, "step": 535000 }, { "epoch": 5.26, "grad_norm": 1.2964749336242676, "learning_rate": 2.394451229805277e-06, "loss": 0.0577, "step": 535025 }, { "epoch": 5.26, "grad_norm": 2.590486526489258, "learning_rate": 2.3943271073510285e-06, "loss": 0.0756, "step": 535050 }, { "epoch": 5.26, "grad_norm": 9.511153221130371, "learning_rate": 2.3942029848967797e-06, "loss": 0.0734, "step": 535075 }, { "epoch": 5.26, "grad_norm": 0.3712881803512573, "learning_rate": 2.3940788624425314e-06, "loss": 0.0717, "step": 535100 }, { "epoch": 5.26, "grad_norm": 4.617995738983154, "learning_rate": 2.393954739988283e-06, "loss": 0.0415, "step": 535125 }, { "epoch": 5.26, "grad_norm": 11.727776527404785, "learning_rate": 2.3938306175340346e-06, "loss": 0.076, "step": 535150 }, { "epoch": 5.26, "grad_norm": 8.589655876159668, "learning_rate": 2.3937064950797863e-06, "loss": 0.0542, "step": 535175 }, { "epoch": 5.26, "grad_norm": 1.5968308448791504, "learning_rate": 2.393582372625538e-06, "loss": 0.0751, "step": 535200 }, { "epoch": 5.26, "grad_norm": 8.664977073669434, "learning_rate": 2.393458250171289e-06, "loss": 0.0549, "step": 535225 }, { "epoch": 5.26, "grad_norm": 4.355197906494141, "learning_rate": 2.3933341277170408e-06, "loss": 0.0728, "step": 535250 }, { "epoch": 5.26, "grad_norm": 12.26065444946289, "learning_rate": 2.393210005262792e-06, "loss": 0.0409, "step": 535275 }, { "epoch": 5.26, "grad_norm": 2.247487783432007, "learning_rate": 2.3930858828085436e-06, "loss": 0.0585, "step": 535300 }, { "epoch": 5.26, "grad_norm": 16.45456314086914, "learning_rate": 2.3929617603542952e-06, "loss": 0.064, "step": 535325 }, { "epoch": 5.26, "grad_norm": 5.686227798461914, "learning_rate": 2.392837637900047e-06, "loss": 0.0768, "step": 535350 }, { "epoch": 5.26, "grad_norm": 9.363776206970215, "learning_rate": 2.3927135154457985e-06, "loss": 0.0596, "step": 535375 }, { "epoch": 5.26, "grad_norm": 0.02493208833038807, "learning_rate": 2.39258939299155e-06, "loss": 0.0889, "step": 535400 }, { "epoch": 5.26, "grad_norm": 8.019901275634766, "learning_rate": 2.3924652705373014e-06, "loss": 0.0355, "step": 535425 }, { "epoch": 5.26, "grad_norm": 6.416582107543945, "learning_rate": 2.392341148083053e-06, "loss": 0.0889, "step": 535450 }, { "epoch": 5.26, "grad_norm": 8.649495124816895, "learning_rate": 2.3922170256288046e-06, "loss": 0.0561, "step": 535475 }, { "epoch": 5.27, "grad_norm": 11.104857444763184, "learning_rate": 2.392092903174556e-06, "loss": 0.089, "step": 535500 }, { "epoch": 5.27, "grad_norm": 16.716413497924805, "learning_rate": 2.3919687807203075e-06, "loss": 0.0554, "step": 535525 }, { "epoch": 5.27, "grad_norm": 1.7284245491027832, "learning_rate": 2.391844658266059e-06, "loss": 0.0988, "step": 535550 }, { "epoch": 5.27, "grad_norm": 13.926994323730469, "learning_rate": 2.3917205358118107e-06, "loss": 0.0468, "step": 535575 }, { "epoch": 5.27, "grad_norm": 5.380400657653809, "learning_rate": 2.3915964133575624e-06, "loss": 0.0961, "step": 535600 }, { "epoch": 5.27, "grad_norm": 13.25655746459961, "learning_rate": 2.391472290903314e-06, "loss": 0.0492, "step": 535625 }, { "epoch": 5.27, "grad_norm": 10.219240188598633, "learning_rate": 2.3913481684490652e-06, "loss": 0.0912, "step": 535650 }, { "epoch": 5.27, "grad_norm": 9.283127784729004, "learning_rate": 2.391224045994817e-06, "loss": 0.0587, "step": 535675 }, { "epoch": 5.27, "grad_norm": 0.26800450682640076, "learning_rate": 2.391099923540568e-06, "loss": 0.0856, "step": 535700 }, { "epoch": 5.27, "grad_norm": 7.12217903137207, "learning_rate": 2.3909758010863197e-06, "loss": 0.0714, "step": 535725 }, { "epoch": 5.27, "grad_norm": 7.000494956970215, "learning_rate": 2.3908516786320713e-06, "loss": 0.1005, "step": 535750 }, { "epoch": 5.27, "grad_norm": 6.432135105133057, "learning_rate": 2.390727556177823e-06, "loss": 0.0395, "step": 535775 }, { "epoch": 5.27, "grad_norm": 5.632907867431641, "learning_rate": 2.3906034337235746e-06, "loss": 0.0759, "step": 535800 }, { "epoch": 5.27, "grad_norm": 10.466910362243652, "learning_rate": 2.3904793112693262e-06, "loss": 0.0476, "step": 535825 }, { "epoch": 5.27, "grad_norm": 2.7858171463012695, "learning_rate": 2.3903551888150775e-06, "loss": 0.0773, "step": 535850 }, { "epoch": 5.27, "grad_norm": 5.724929332733154, "learning_rate": 2.390231066360829e-06, "loss": 0.0536, "step": 535875 }, { "epoch": 5.27, "grad_norm": 0.03406425192952156, "learning_rate": 2.3901069439065807e-06, "loss": 0.0715, "step": 535900 }, { "epoch": 5.27, "grad_norm": 4.872729778289795, "learning_rate": 2.389982821452332e-06, "loss": 0.0716, "step": 535925 }, { "epoch": 5.27, "grad_norm": 4.193798065185547, "learning_rate": 2.3898586989980836e-06, "loss": 0.1082, "step": 535950 }, { "epoch": 5.27, "grad_norm": 10.580392837524414, "learning_rate": 2.389734576543835e-06, "loss": 0.0471, "step": 535975 }, { "epoch": 5.27, "grad_norm": 0.0163172148168087, "learning_rate": 2.389610454089587e-06, "loss": 0.0809, "step": 536000 }, { "epoch": 5.27, "grad_norm": 5.432669162750244, "learning_rate": 2.3894863316353385e-06, "loss": 0.0324, "step": 536025 }, { "epoch": 5.27, "grad_norm": 2.0419974327087402, "learning_rate": 2.38936220918109e-06, "loss": 0.0923, "step": 536050 }, { "epoch": 5.27, "grad_norm": 9.800101280212402, "learning_rate": 2.3892380867268413e-06, "loss": 0.0401, "step": 536075 }, { "epoch": 5.27, "grad_norm": 3.711636781692505, "learning_rate": 2.389113964272593e-06, "loss": 0.0997, "step": 536100 }, { "epoch": 5.27, "grad_norm": 11.224058151245117, "learning_rate": 2.388989841818344e-06, "loss": 0.0461, "step": 536125 }, { "epoch": 5.27, "grad_norm": 7.635007858276367, "learning_rate": 2.388865719364096e-06, "loss": 0.0704, "step": 536150 }, { "epoch": 5.27, "grad_norm": 12.868000984191895, "learning_rate": 2.3887415969098474e-06, "loss": 0.0881, "step": 536175 }, { "epoch": 5.27, "grad_norm": 0.2711622714996338, "learning_rate": 2.388617474455599e-06, "loss": 0.0768, "step": 536200 }, { "epoch": 5.27, "grad_norm": 6.692712783813477, "learning_rate": 2.3884933520013507e-06, "loss": 0.0614, "step": 536225 }, { "epoch": 5.27, "grad_norm": 4.86519718170166, "learning_rate": 2.3883692295471023e-06, "loss": 0.0759, "step": 536250 }, { "epoch": 5.27, "grad_norm": 8.411659240722656, "learning_rate": 2.3882451070928536e-06, "loss": 0.0487, "step": 536275 }, { "epoch": 5.27, "grad_norm": 0.4047611653804779, "learning_rate": 2.388120984638605e-06, "loss": 0.0819, "step": 536300 }, { "epoch": 5.27, "grad_norm": 14.91247272491455, "learning_rate": 2.387996862184357e-06, "loss": 0.0486, "step": 536325 }, { "epoch": 5.27, "grad_norm": 0.04009825736284256, "learning_rate": 2.3878727397301085e-06, "loss": 0.0671, "step": 536350 }, { "epoch": 5.27, "grad_norm": 7.451019287109375, "learning_rate": 2.38774861727586e-06, "loss": 0.059, "step": 536375 }, { "epoch": 5.27, "grad_norm": 4.9939961433410645, "learning_rate": 2.3876244948216117e-06, "loss": 0.0888, "step": 536400 }, { "epoch": 5.27, "grad_norm": 12.876777648925781, "learning_rate": 2.387500372367363e-06, "loss": 0.0846, "step": 536425 }, { "epoch": 5.27, "grad_norm": 0.5543365478515625, "learning_rate": 2.3873762499131146e-06, "loss": 0.0772, "step": 536450 }, { "epoch": 5.27, "grad_norm": 8.672731399536133, "learning_rate": 2.387252127458866e-06, "loss": 0.0452, "step": 536475 }, { "epoch": 5.27, "grad_norm": 8.201300621032715, "learning_rate": 2.3871329699027877e-06, "loss": 0.0804, "step": 536500 }, { "epoch": 5.28, "grad_norm": 4.4928460121154785, "learning_rate": 2.387008847448539e-06, "loss": 0.0365, "step": 536525 }, { "epoch": 5.28, "grad_norm": 4.471996307373047, "learning_rate": 2.3868847249942905e-06, "loss": 0.0924, "step": 536550 }, { "epoch": 5.28, "grad_norm": 10.270480155944824, "learning_rate": 2.386760602540042e-06, "loss": 0.0544, "step": 536575 }, { "epoch": 5.28, "grad_norm": 1.5535101890563965, "learning_rate": 2.3866364800857938e-06, "loss": 0.0912, "step": 536600 }, { "epoch": 5.28, "grad_norm": 14.676627159118652, "learning_rate": 2.3865123576315454e-06, "loss": 0.0474, "step": 536625 }, { "epoch": 5.28, "grad_norm": 5.904865741729736, "learning_rate": 2.386388235177297e-06, "loss": 0.0624, "step": 536650 }, { "epoch": 5.28, "grad_norm": 15.267702102661133, "learning_rate": 2.3862641127230483e-06, "loss": 0.0625, "step": 536675 }, { "epoch": 5.28, "grad_norm": 0.016942495480179787, "learning_rate": 2.3861399902688e-06, "loss": 0.0887, "step": 536700 }, { "epoch": 5.28, "grad_norm": 18.169111251831055, "learning_rate": 2.386015867814551e-06, "loss": 0.0531, "step": 536725 }, { "epoch": 5.28, "grad_norm": 1.4617228507995605, "learning_rate": 2.3858917453603027e-06, "loss": 0.0894, "step": 536750 }, { "epoch": 5.28, "grad_norm": 18.1815128326416, "learning_rate": 2.3857676229060544e-06, "loss": 0.0438, "step": 536775 }, { "epoch": 5.28, "grad_norm": 4.224783420562744, "learning_rate": 2.385643500451806e-06, "loss": 0.0742, "step": 536800 }, { "epoch": 5.28, "grad_norm": 9.07262134552002, "learning_rate": 2.3855193779975576e-06, "loss": 0.0647, "step": 536825 }, { "epoch": 5.28, "grad_norm": 0.7119283080101013, "learning_rate": 2.3853952555433093e-06, "loss": 0.0686, "step": 536850 }, { "epoch": 5.28, "grad_norm": 6.315561294555664, "learning_rate": 2.3852711330890605e-06, "loss": 0.0599, "step": 536875 }, { "epoch": 5.28, "grad_norm": 5.78826904296875, "learning_rate": 2.385147010634812e-06, "loss": 0.0906, "step": 536900 }, { "epoch": 5.28, "grad_norm": 7.938834190368652, "learning_rate": 2.3850228881805638e-06, "loss": 0.054, "step": 536925 }, { "epoch": 5.28, "grad_norm": 1.5143654346466064, "learning_rate": 2.384898765726315e-06, "loss": 0.0886, "step": 536950 }, { "epoch": 5.28, "grad_norm": 9.073884963989258, "learning_rate": 2.3847746432720666e-06, "loss": 0.0388, "step": 536975 }, { "epoch": 5.28, "grad_norm": 4.242617607116699, "learning_rate": 2.3846505208178182e-06, "loss": 0.0785, "step": 537000 }, { "epoch": 5.28, "grad_norm": 13.681734085083008, "learning_rate": 2.38452639836357e-06, "loss": 0.0518, "step": 537025 }, { "epoch": 5.28, "grad_norm": 13.670417785644531, "learning_rate": 2.3844022759093215e-06, "loss": 0.1035, "step": 537050 }, { "epoch": 5.28, "grad_norm": 10.478078842163086, "learning_rate": 2.384278153455073e-06, "loss": 0.0609, "step": 537075 }, { "epoch": 5.28, "grad_norm": 6.424088001251221, "learning_rate": 2.3841540310008244e-06, "loss": 0.0676, "step": 537100 }, { "epoch": 5.28, "grad_norm": 5.027463912963867, "learning_rate": 2.384029908546576e-06, "loss": 0.0408, "step": 537125 }, { "epoch": 5.28, "grad_norm": 0.47775354981422424, "learning_rate": 2.383905786092327e-06, "loss": 0.0699, "step": 537150 }, { "epoch": 5.28, "grad_norm": 5.544140815734863, "learning_rate": 2.383781663638079e-06, "loss": 0.0509, "step": 537175 }, { "epoch": 5.28, "grad_norm": 0.36247143149375916, "learning_rate": 2.3836575411838305e-06, "loss": 0.0685, "step": 537200 }, { "epoch": 5.28, "grad_norm": 0.7506543397903442, "learning_rate": 2.383533418729582e-06, "loss": 0.0693, "step": 537225 }, { "epoch": 5.28, "grad_norm": 0.16827793419361115, "learning_rate": 2.3834092962753337e-06, "loss": 0.0704, "step": 537250 }, { "epoch": 5.28, "grad_norm": 1.435114860534668, "learning_rate": 2.3832851738210854e-06, "loss": 0.0499, "step": 537275 }, { "epoch": 5.28, "grad_norm": 0.3511812388896942, "learning_rate": 2.3831610513668366e-06, "loss": 0.0671, "step": 537300 }, { "epoch": 5.28, "grad_norm": 12.698683738708496, "learning_rate": 2.3830369289125882e-06, "loss": 0.0672, "step": 537325 }, { "epoch": 5.28, "grad_norm": 0.04729727283120155, "learning_rate": 2.38291280645834e-06, "loss": 0.0836, "step": 537350 }, { "epoch": 5.28, "grad_norm": 2.658823013305664, "learning_rate": 2.382788684004091e-06, "loss": 0.0515, "step": 537375 }, { "epoch": 5.28, "grad_norm": 0.9674819111824036, "learning_rate": 2.3826645615498427e-06, "loss": 0.0625, "step": 537400 }, { "epoch": 5.28, "grad_norm": 9.24781608581543, "learning_rate": 2.3825404390955943e-06, "loss": 0.0623, "step": 537425 }, { "epoch": 5.28, "grad_norm": 1.0994303226470947, "learning_rate": 2.382416316641346e-06, "loss": 0.0538, "step": 537450 }, { "epoch": 5.28, "grad_norm": 16.70206069946289, "learning_rate": 2.3822921941870976e-06, "loss": 0.0488, "step": 537475 }, { "epoch": 5.28, "grad_norm": 0.24821501970291138, "learning_rate": 2.3821680717328492e-06, "loss": 0.0729, "step": 537500 }, { "epoch": 5.29, "grad_norm": 9.45135498046875, "learning_rate": 2.3820439492786005e-06, "loss": 0.0329, "step": 537525 }, { "epoch": 5.29, "grad_norm": 8.691455841064453, "learning_rate": 2.381919826824352e-06, "loss": 0.0672, "step": 537550 }, { "epoch": 5.29, "grad_norm": 9.332742691040039, "learning_rate": 2.3817957043701033e-06, "loss": 0.0445, "step": 537575 }, { "epoch": 5.29, "grad_norm": 0.5718335509300232, "learning_rate": 2.381671581915855e-06, "loss": 0.0739, "step": 537600 }, { "epoch": 5.29, "grad_norm": 11.297798156738281, "learning_rate": 2.3815474594616066e-06, "loss": 0.0564, "step": 537625 }, { "epoch": 5.29, "grad_norm": 0.18505029380321503, "learning_rate": 2.381423337007358e-06, "loss": 0.0825, "step": 537650 }, { "epoch": 5.29, "grad_norm": 3.1763598918914795, "learning_rate": 2.38129921455311e-06, "loss": 0.0301, "step": 537675 }, { "epoch": 5.29, "grad_norm": 0.8037057518959045, "learning_rate": 2.3811750920988615e-06, "loss": 0.0703, "step": 537700 }, { "epoch": 5.29, "grad_norm": 9.570730209350586, "learning_rate": 2.3810509696446127e-06, "loss": 0.0564, "step": 537725 }, { "epoch": 5.29, "grad_norm": 2.2240190505981445, "learning_rate": 2.3809268471903643e-06, "loss": 0.0719, "step": 537750 }, { "epoch": 5.29, "grad_norm": 24.173837661743164, "learning_rate": 2.380802724736116e-06, "loss": 0.0692, "step": 537775 }, { "epoch": 5.29, "grad_norm": 3.857269048690796, "learning_rate": 2.380678602281867e-06, "loss": 0.0782, "step": 537800 }, { "epoch": 5.29, "grad_norm": 13.885049819946289, "learning_rate": 2.380554479827619e-06, "loss": 0.0419, "step": 537825 }, { "epoch": 5.29, "grad_norm": 3.858013868331909, "learning_rate": 2.3804303573733704e-06, "loss": 0.0917, "step": 537850 }, { "epoch": 5.29, "grad_norm": 5.161180019378662, "learning_rate": 2.380306234919122e-06, "loss": 0.0538, "step": 537875 }, { "epoch": 5.29, "grad_norm": 11.97912311553955, "learning_rate": 2.3801821124648737e-06, "loss": 0.0929, "step": 537900 }, { "epoch": 5.29, "grad_norm": 6.081395149230957, "learning_rate": 2.3800579900106253e-06, "loss": 0.0285, "step": 537925 }, { "epoch": 5.29, "grad_norm": 2.157947301864624, "learning_rate": 2.3799338675563766e-06, "loss": 0.0875, "step": 537950 }, { "epoch": 5.29, "grad_norm": 0.4545641541481018, "learning_rate": 2.379809745102128e-06, "loss": 0.0505, "step": 537975 }, { "epoch": 5.29, "grad_norm": 4.9380364418029785, "learning_rate": 2.3796856226478794e-06, "loss": 0.0909, "step": 538000 }, { "epoch": 5.29, "grad_norm": 13.593940734863281, "learning_rate": 2.379561500193631e-06, "loss": 0.0611, "step": 538025 }, { "epoch": 5.29, "grad_norm": 0.21730254590511322, "learning_rate": 2.3794373777393827e-06, "loss": 0.082, "step": 538050 }, { "epoch": 5.29, "grad_norm": 4.858811855316162, "learning_rate": 2.3793132552851343e-06, "loss": 0.0534, "step": 538075 }, { "epoch": 5.29, "grad_norm": 0.06414362043142319, "learning_rate": 2.379189132830886e-06, "loss": 0.1104, "step": 538100 }, { "epoch": 5.29, "grad_norm": 11.553296089172363, "learning_rate": 2.3790650103766376e-06, "loss": 0.0477, "step": 538125 }, { "epoch": 5.29, "grad_norm": 0.4862399697303772, "learning_rate": 2.378940887922389e-06, "loss": 0.0569, "step": 538150 }, { "epoch": 5.29, "grad_norm": 8.296401977539062, "learning_rate": 2.3788167654681404e-06, "loss": 0.0588, "step": 538175 }, { "epoch": 5.29, "grad_norm": 0.9973286390304565, "learning_rate": 2.378692643013892e-06, "loss": 0.1079, "step": 538200 }, { "epoch": 5.29, "grad_norm": 10.147945404052734, "learning_rate": 2.3785685205596433e-06, "loss": 0.0476, "step": 538225 }, { "epoch": 5.29, "grad_norm": 0.45199689269065857, "learning_rate": 2.378444398105395e-06, "loss": 0.0614, "step": 538250 }, { "epoch": 5.29, "grad_norm": 14.893335342407227, "learning_rate": 2.3783202756511465e-06, "loss": 0.049, "step": 538275 }, { "epoch": 5.29, "grad_norm": 1.4756150245666504, "learning_rate": 2.378196153196898e-06, "loss": 0.1016, "step": 538300 }, { "epoch": 5.29, "grad_norm": 5.871993064880371, "learning_rate": 2.37807203074265e-06, "loss": 0.038, "step": 538325 }, { "epoch": 5.29, "grad_norm": 0.2991887927055359, "learning_rate": 2.3779479082884014e-06, "loss": 0.0781, "step": 538350 }, { "epoch": 5.29, "grad_norm": 2.159743070602417, "learning_rate": 2.3778237858341527e-06, "loss": 0.0352, "step": 538375 }, { "epoch": 5.29, "grad_norm": 1.9296778440475464, "learning_rate": 2.3776996633799043e-06, "loss": 0.0814, "step": 538400 }, { "epoch": 5.29, "grad_norm": 25.26860809326172, "learning_rate": 2.3775755409256555e-06, "loss": 0.073, "step": 538425 }, { "epoch": 5.29, "grad_norm": 4.643678665161133, "learning_rate": 2.377451418471407e-06, "loss": 0.0799, "step": 538450 }, { "epoch": 5.29, "grad_norm": 7.410235404968262, "learning_rate": 2.3773272960171588e-06, "loss": 0.0597, "step": 538475 }, { "epoch": 5.29, "grad_norm": 0.5418553948402405, "learning_rate": 2.3772031735629104e-06, "loss": 0.0894, "step": 538500 }, { "epoch": 5.29, "grad_norm": 16.115514755249023, "learning_rate": 2.377079051108662e-06, "loss": 0.0652, "step": 538525 }, { "epoch": 5.3, "grad_norm": 1.5792328119277954, "learning_rate": 2.3769549286544137e-06, "loss": 0.1054, "step": 538550 }, { "epoch": 5.3, "grad_norm": 8.047307014465332, "learning_rate": 2.376830806200165e-06, "loss": 0.0511, "step": 538575 }, { "epoch": 5.3, "grad_norm": 2.494506359100342, "learning_rate": 2.3767066837459165e-06, "loss": 0.0876, "step": 538600 }, { "epoch": 5.3, "grad_norm": 13.992070198059082, "learning_rate": 2.376582561291668e-06, "loss": 0.0477, "step": 538625 }, { "epoch": 5.3, "grad_norm": 2.5781569480895996, "learning_rate": 2.3764584388374194e-06, "loss": 0.092, "step": 538650 }, { "epoch": 5.3, "grad_norm": 10.456779479980469, "learning_rate": 2.376334316383171e-06, "loss": 0.0383, "step": 538675 }, { "epoch": 5.3, "grad_norm": 1.6427267789840698, "learning_rate": 2.3762101939289226e-06, "loss": 0.0623, "step": 538700 }, { "epoch": 5.3, "grad_norm": 10.743257522583008, "learning_rate": 2.3760860714746743e-06, "loss": 0.0485, "step": 538725 }, { "epoch": 5.3, "grad_norm": 8.517696380615234, "learning_rate": 2.375961949020426e-06, "loss": 0.081, "step": 538750 }, { "epoch": 5.3, "grad_norm": 12.38203239440918, "learning_rate": 2.3758378265661775e-06, "loss": 0.0388, "step": 538775 }, { "epoch": 5.3, "grad_norm": 0.032765500247478485, "learning_rate": 2.3757137041119288e-06, "loss": 0.074, "step": 538800 }, { "epoch": 5.3, "grad_norm": 10.692934036254883, "learning_rate": 2.3755895816576804e-06, "loss": 0.041, "step": 538825 }, { "epoch": 5.3, "grad_norm": 1.5787394046783447, "learning_rate": 2.3754654592034316e-06, "loss": 0.1132, "step": 538850 }, { "epoch": 5.3, "grad_norm": 8.497370719909668, "learning_rate": 2.3753413367491832e-06, "loss": 0.0372, "step": 538875 }, { "epoch": 5.3, "grad_norm": 0.9914803504943848, "learning_rate": 2.375217214294935e-06, "loss": 0.1026, "step": 538900 }, { "epoch": 5.3, "grad_norm": 13.9608793258667, "learning_rate": 2.3750930918406865e-06, "loss": 0.0564, "step": 538925 }, { "epoch": 5.3, "grad_norm": 0.31452956795692444, "learning_rate": 2.374968969386438e-06, "loss": 0.0827, "step": 538950 }, { "epoch": 5.3, "grad_norm": 6.347405433654785, "learning_rate": 2.3748448469321898e-06, "loss": 0.0427, "step": 538975 }, { "epoch": 5.3, "grad_norm": 3.1354665756225586, "learning_rate": 2.3747256893761112e-06, "loss": 0.0789, "step": 539000 }, { "epoch": 5.3, "grad_norm": 17.411022186279297, "learning_rate": 2.3746015669218624e-06, "loss": 0.0472, "step": 539025 }, { "epoch": 5.3, "grad_norm": 1.4580734968185425, "learning_rate": 2.374477444467614e-06, "loss": 0.0884, "step": 539050 }, { "epoch": 5.3, "grad_norm": 7.233006477355957, "learning_rate": 2.3743533220133657e-06, "loss": 0.0253, "step": 539075 }, { "epoch": 5.3, "grad_norm": 6.115781784057617, "learning_rate": 2.374229199559117e-06, "loss": 0.0729, "step": 539100 }, { "epoch": 5.3, "grad_norm": 11.89685344696045, "learning_rate": 2.3741050771048686e-06, "loss": 0.0371, "step": 539125 }, { "epoch": 5.3, "grad_norm": 0.002787999575957656, "learning_rate": 2.37398095465062e-06, "loss": 0.0989, "step": 539150 }, { "epoch": 5.3, "grad_norm": 8.061416625976562, "learning_rate": 2.373856832196372e-06, "loss": 0.0651, "step": 539175 }, { "epoch": 5.3, "grad_norm": 1.6406655311584473, "learning_rate": 2.3737327097421235e-06, "loss": 0.0638, "step": 539200 }, { "epoch": 5.3, "grad_norm": 11.400683403015137, "learning_rate": 2.373608587287875e-06, "loss": 0.0613, "step": 539225 }, { "epoch": 5.3, "grad_norm": 1.7643380165100098, "learning_rate": 2.3734844648336263e-06, "loss": 0.0798, "step": 539250 }, { "epoch": 5.3, "grad_norm": 1.561301350593567, "learning_rate": 2.373360342379378e-06, "loss": 0.0649, "step": 539275 }, { "epoch": 5.3, "grad_norm": 5.580821990966797, "learning_rate": 2.3732362199251296e-06, "loss": 0.1004, "step": 539300 }, { "epoch": 5.3, "grad_norm": 7.619907855987549, "learning_rate": 2.3731120974708812e-06, "loss": 0.0594, "step": 539325 }, { "epoch": 5.3, "grad_norm": 5.542701244354248, "learning_rate": 2.372987975016633e-06, "loss": 0.084, "step": 539350 }, { "epoch": 5.3, "grad_norm": 1.2096667289733887, "learning_rate": 2.372863852562384e-06, "loss": 0.0427, "step": 539375 }, { "epoch": 5.3, "grad_norm": 3.2933707237243652, "learning_rate": 2.3727397301081357e-06, "loss": 0.0946, "step": 539400 }, { "epoch": 5.3, "grad_norm": 9.88182258605957, "learning_rate": 2.3726156076538873e-06, "loss": 0.0658, "step": 539425 }, { "epoch": 5.3, "grad_norm": 0.023538578301668167, "learning_rate": 2.3724914851996385e-06, "loss": 0.0842, "step": 539450 }, { "epoch": 5.3, "grad_norm": 11.308540344238281, "learning_rate": 2.37236736274539e-06, "loss": 0.0593, "step": 539475 }, { "epoch": 5.3, "grad_norm": 0.5331189036369324, "learning_rate": 2.372243240291142e-06, "loss": 0.1092, "step": 539500 }, { "epoch": 5.3, "grad_norm": 6.571035385131836, "learning_rate": 2.3721191178368934e-06, "loss": 0.0454, "step": 539525 }, { "epoch": 5.3, "grad_norm": 0.5858007669448853, "learning_rate": 2.371994995382645e-06, "loss": 0.0856, "step": 539550 }, { "epoch": 5.31, "grad_norm": 2.206186532974243, "learning_rate": 2.3718708729283967e-06, "loss": 0.036, "step": 539575 }, { "epoch": 5.31, "grad_norm": 27.429492950439453, "learning_rate": 2.371746750474148e-06, "loss": 0.0893, "step": 539600 }, { "epoch": 5.31, "grad_norm": 16.467931747436523, "learning_rate": 2.3716226280198996e-06, "loss": 0.0468, "step": 539625 }, { "epoch": 5.31, "grad_norm": 2.972494125366211, "learning_rate": 2.371498505565651e-06, "loss": 0.098, "step": 539650 }, { "epoch": 5.31, "grad_norm": 11.896100997924805, "learning_rate": 2.3713743831114024e-06, "loss": 0.0365, "step": 539675 }, { "epoch": 5.31, "grad_norm": 1.1455992460250854, "learning_rate": 2.371250260657154e-06, "loss": 0.0867, "step": 539700 }, { "epoch": 5.31, "grad_norm": 3.5522546768188477, "learning_rate": 2.3711261382029057e-06, "loss": 0.0363, "step": 539725 }, { "epoch": 5.31, "grad_norm": 3.186744451522827, "learning_rate": 2.3710020157486573e-06, "loss": 0.0806, "step": 539750 }, { "epoch": 5.31, "grad_norm": 8.843108177185059, "learning_rate": 2.370877893294409e-06, "loss": 0.0415, "step": 539775 }, { "epoch": 5.31, "grad_norm": 1.2591694593429565, "learning_rate": 2.3707537708401606e-06, "loss": 0.1251, "step": 539800 }, { "epoch": 5.31, "grad_norm": 10.686407089233398, "learning_rate": 2.370629648385912e-06, "loss": 0.036, "step": 539825 }, { "epoch": 5.31, "grad_norm": 0.03908088430762291, "learning_rate": 2.3705055259316634e-06, "loss": 0.0817, "step": 539850 }, { "epoch": 5.31, "grad_norm": 13.045026779174805, "learning_rate": 2.3703814034774146e-06, "loss": 0.0397, "step": 539875 }, { "epoch": 5.31, "grad_norm": 0.30036455392837524, "learning_rate": 2.3702572810231663e-06, "loss": 0.0767, "step": 539900 }, { "epoch": 5.31, "grad_norm": 6.345373630523682, "learning_rate": 2.370133158568918e-06, "loss": 0.0476, "step": 539925 }, { "epoch": 5.31, "grad_norm": 0.5498595237731934, "learning_rate": 2.3700090361146695e-06, "loss": 0.0922, "step": 539950 }, { "epoch": 5.31, "grad_norm": 9.613784790039062, "learning_rate": 2.369884913660421e-06, "loss": 0.0575, "step": 539975 }, { "epoch": 5.31, "grad_norm": 6.61207914352417, "learning_rate": 2.369760791206173e-06, "loss": 0.0804, "step": 540000 }, { "epoch": 5.31, "eval_loss": 0.7639896273612976, "eval_runtime": 6101.7018, "eval_samples_per_second": 1.552, "eval_steps_per_second": 0.194, "eval_wer": 0.11580184124122836, "step": 540000 }, { "epoch": 5.31, "grad_norm": 6.109635353088379, "learning_rate": 2.369636668751924e-06, "loss": 0.0585, "step": 540025 }, { "epoch": 5.31, "grad_norm": 0.14283892512321472, "learning_rate": 2.3695125462976757e-06, "loss": 0.0706, "step": 540050 }, { "epoch": 5.31, "grad_norm": 14.52751636505127, "learning_rate": 2.3693884238434273e-06, "loss": 0.0592, "step": 540075 }, { "epoch": 5.31, "grad_norm": 4.870349407196045, "learning_rate": 2.3692643013891785e-06, "loss": 0.067, "step": 540100 }, { "epoch": 5.31, "grad_norm": 13.994752883911133, "learning_rate": 2.36914017893493e-06, "loss": 0.0643, "step": 540125 }, { "epoch": 5.31, "grad_norm": 3.6066174507141113, "learning_rate": 2.3690160564806818e-06, "loss": 0.0836, "step": 540150 }, { "epoch": 5.31, "grad_norm": 14.962323188781738, "learning_rate": 2.3688919340264334e-06, "loss": 0.0576, "step": 540175 }, { "epoch": 5.31, "grad_norm": 1.331986665725708, "learning_rate": 2.368767811572185e-06, "loss": 0.0815, "step": 540200 }, { "epoch": 5.31, "grad_norm": 10.504961967468262, "learning_rate": 2.3686436891179367e-06, "loss": 0.0608, "step": 540225 }, { "epoch": 5.31, "grad_norm": 2.178668737411499, "learning_rate": 2.368519566663688e-06, "loss": 0.0754, "step": 540250 }, { "epoch": 5.31, "grad_norm": 8.534971237182617, "learning_rate": 2.3683954442094395e-06, "loss": 0.0472, "step": 540275 }, { "epoch": 5.31, "grad_norm": 0.16953368484973907, "learning_rate": 2.3682713217551907e-06, "loss": 0.0833, "step": 540300 }, { "epoch": 5.31, "grad_norm": 5.11976432800293, "learning_rate": 2.3681471993009424e-06, "loss": 0.0471, "step": 540325 }, { "epoch": 5.31, "grad_norm": 0.41501113772392273, "learning_rate": 2.368023076846694e-06, "loss": 0.0871, "step": 540350 }, { "epoch": 5.31, "grad_norm": 11.819955825805664, "learning_rate": 2.3678989543924456e-06, "loss": 0.0496, "step": 540375 }, { "epoch": 5.31, "grad_norm": 0.30638810992240906, "learning_rate": 2.3677748319381973e-06, "loss": 0.081, "step": 540400 }, { "epoch": 5.31, "grad_norm": 12.49555492401123, "learning_rate": 2.367650709483949e-06, "loss": 0.0453, "step": 540425 }, { "epoch": 5.31, "grad_norm": 2.3441171646118164, "learning_rate": 2.3675265870297e-06, "loss": 0.0878, "step": 540450 }, { "epoch": 5.31, "grad_norm": 9.348756790161133, "learning_rate": 2.3674024645754518e-06, "loss": 0.0381, "step": 540475 }, { "epoch": 5.31, "grad_norm": 0.5867544412612915, "learning_rate": 2.3672783421212034e-06, "loss": 0.0773, "step": 540500 }, { "epoch": 5.31, "grad_norm": 12.310863494873047, "learning_rate": 2.3671542196669546e-06, "loss": 0.0432, "step": 540525 }, { "epoch": 5.31, "grad_norm": 0.034489795565605164, "learning_rate": 2.3670300972127062e-06, "loss": 0.0763, "step": 540550 }, { "epoch": 5.32, "grad_norm": 8.949857711791992, "learning_rate": 2.366905974758458e-06, "loss": 0.0397, "step": 540575 }, { "epoch": 5.32, "grad_norm": 1.4469048976898193, "learning_rate": 2.3667818523042095e-06, "loss": 0.0738, "step": 540600 }, { "epoch": 5.32, "grad_norm": 14.941162109375, "learning_rate": 2.366657729849961e-06, "loss": 0.0811, "step": 540625 }, { "epoch": 5.32, "grad_norm": 1.1629457473754883, "learning_rate": 2.3665336073957128e-06, "loss": 0.0853, "step": 540650 }, { "epoch": 5.32, "grad_norm": 6.657078266143799, "learning_rate": 2.366409484941464e-06, "loss": 0.0552, "step": 540675 }, { "epoch": 5.32, "grad_norm": 7.669007778167725, "learning_rate": 2.3662853624872156e-06, "loss": 0.1276, "step": 540700 }, { "epoch": 5.32, "grad_norm": 13.277936935424805, "learning_rate": 2.366161240032967e-06, "loss": 0.0532, "step": 540725 }, { "epoch": 5.32, "grad_norm": 1.6436177492141724, "learning_rate": 2.3660371175787185e-06, "loss": 0.072, "step": 540750 }, { "epoch": 5.32, "grad_norm": 7.9210662841796875, "learning_rate": 2.36591299512447e-06, "loss": 0.0642, "step": 540775 }, { "epoch": 5.32, "grad_norm": 1.2449467182159424, "learning_rate": 2.3657888726702217e-06, "loss": 0.0734, "step": 540800 }, { "epoch": 5.32, "grad_norm": 8.452159881591797, "learning_rate": 2.3656647502159734e-06, "loss": 0.0481, "step": 540825 }, { "epoch": 5.32, "grad_norm": 3.7434959411621094, "learning_rate": 2.365540627761725e-06, "loss": 0.0835, "step": 540850 }, { "epoch": 5.32, "grad_norm": 14.388382911682129, "learning_rate": 2.3654165053074762e-06, "loss": 0.0437, "step": 540875 }, { "epoch": 5.32, "grad_norm": 2.4475927352905273, "learning_rate": 2.365292382853228e-06, "loss": 0.0539, "step": 540900 }, { "epoch": 5.32, "grad_norm": 5.475462436676025, "learning_rate": 2.3651682603989795e-06, "loss": 0.0512, "step": 540925 }, { "epoch": 5.32, "grad_norm": 1.9267114400863647, "learning_rate": 2.3650441379447307e-06, "loss": 0.0914, "step": 540950 }, { "epoch": 5.32, "grad_norm": 7.032668113708496, "learning_rate": 2.3649200154904823e-06, "loss": 0.0495, "step": 540975 }, { "epoch": 5.32, "grad_norm": 1.8356436491012573, "learning_rate": 2.364795893036234e-06, "loss": 0.0742, "step": 541000 }, { "epoch": 5.32, "grad_norm": 5.176436901092529, "learning_rate": 2.3646717705819856e-06, "loss": 0.0506, "step": 541025 }, { "epoch": 5.32, "grad_norm": 1.407670259475708, "learning_rate": 2.3645476481277373e-06, "loss": 0.0721, "step": 541050 }, { "epoch": 5.32, "grad_norm": 7.092195510864258, "learning_rate": 2.364423525673489e-06, "loss": 0.0517, "step": 541075 }, { "epoch": 5.32, "grad_norm": 1.5115293264389038, "learning_rate": 2.36429940321924e-06, "loss": 0.1021, "step": 541100 }, { "epoch": 5.32, "grad_norm": 10.583637237548828, "learning_rate": 2.3641752807649917e-06, "loss": 0.0513, "step": 541125 }, { "epoch": 5.32, "grad_norm": 2.958418607711792, "learning_rate": 2.364051158310743e-06, "loss": 0.0763, "step": 541150 }, { "epoch": 5.32, "grad_norm": 12.811722755432129, "learning_rate": 2.3639270358564946e-06, "loss": 0.0473, "step": 541175 }, { "epoch": 5.32, "grad_norm": 0.02412877231836319, "learning_rate": 2.3638029134022462e-06, "loss": 0.0956, "step": 541200 }, { "epoch": 5.32, "grad_norm": 11.028632164001465, "learning_rate": 2.363678790947998e-06, "loss": 0.0434, "step": 541225 }, { "epoch": 5.32, "grad_norm": 3.204101085662842, "learning_rate": 2.3635546684937495e-06, "loss": 0.0721, "step": 541250 }, { "epoch": 5.32, "grad_norm": 21.007478713989258, "learning_rate": 2.363430546039501e-06, "loss": 0.0543, "step": 541275 }, { "epoch": 5.32, "grad_norm": 0.4464569687843323, "learning_rate": 2.3633064235852523e-06, "loss": 0.1033, "step": 541300 }, { "epoch": 5.32, "grad_norm": 11.533790588378906, "learning_rate": 2.363182301131004e-06, "loss": 0.049, "step": 541325 }, { "epoch": 5.32, "grad_norm": 1.725053310394287, "learning_rate": 2.3630581786767556e-06, "loss": 0.078, "step": 541350 }, { "epoch": 5.32, "grad_norm": 11.748322486877441, "learning_rate": 2.362934056222507e-06, "loss": 0.092, "step": 541375 }, { "epoch": 5.32, "grad_norm": 1.4984304904937744, "learning_rate": 2.3628099337682584e-06, "loss": 0.085, "step": 541400 }, { "epoch": 5.32, "grad_norm": 7.112555027008057, "learning_rate": 2.36268581131401e-06, "loss": 0.045, "step": 541425 }, { "epoch": 5.32, "grad_norm": 5.097825527191162, "learning_rate": 2.3625616888597617e-06, "loss": 0.0788, "step": 541450 }, { "epoch": 5.32, "grad_norm": 4.965160846710205, "learning_rate": 2.3624375664055134e-06, "loss": 0.0619, "step": 541475 }, { "epoch": 5.32, "grad_norm": 0.12548723816871643, "learning_rate": 2.362313443951265e-06, "loss": 0.0701, "step": 541500 }, { "epoch": 5.32, "grad_norm": 10.84586238861084, "learning_rate": 2.362189321497016e-06, "loss": 0.0458, "step": 541525 }, { "epoch": 5.32, "grad_norm": 0.17102785408496857, "learning_rate": 2.362065199042768e-06, "loss": 0.0581, "step": 541550 }, { "epoch": 5.32, "grad_norm": 10.396543502807617, "learning_rate": 2.361941076588519e-06, "loss": 0.0466, "step": 541575 }, { "epoch": 5.33, "grad_norm": 0.9667901992797852, "learning_rate": 2.3618169541342707e-06, "loss": 0.0922, "step": 541600 }, { "epoch": 5.33, "grad_norm": 10.242274284362793, "learning_rate": 2.3616928316800223e-06, "loss": 0.0396, "step": 541625 }, { "epoch": 5.33, "grad_norm": 1.8791815042495728, "learning_rate": 2.361568709225774e-06, "loss": 0.114, "step": 541650 }, { "epoch": 5.33, "grad_norm": 8.544710159301758, "learning_rate": 2.3614445867715256e-06, "loss": 0.0479, "step": 541675 }, { "epoch": 5.33, "grad_norm": 5.493015289306641, "learning_rate": 2.3613204643172772e-06, "loss": 0.0572, "step": 541700 }, { "epoch": 5.33, "grad_norm": 9.078688621520996, "learning_rate": 2.3611963418630284e-06, "loss": 0.0397, "step": 541725 }, { "epoch": 5.33, "grad_norm": 7.437276363372803, "learning_rate": 2.36107221940878e-06, "loss": 0.0681, "step": 541750 }, { "epoch": 5.33, "grad_norm": 16.248870849609375, "learning_rate": 2.3609480969545317e-06, "loss": 0.0461, "step": 541775 }, { "epoch": 5.33, "grad_norm": 0.38633963465690613, "learning_rate": 2.360823974500283e-06, "loss": 0.0865, "step": 541800 }, { "epoch": 5.33, "grad_norm": 5.309274196624756, "learning_rate": 2.3606998520460345e-06, "loss": 0.0454, "step": 541825 }, { "epoch": 5.33, "grad_norm": 0.25489893555641174, "learning_rate": 2.360575729591786e-06, "loss": 0.0624, "step": 541850 }, { "epoch": 5.33, "grad_norm": 14.8026123046875, "learning_rate": 2.360451607137538e-06, "loss": 0.0472, "step": 541875 }, { "epoch": 5.33, "grad_norm": 3.5221099853515625, "learning_rate": 2.3603274846832895e-06, "loss": 0.0798, "step": 541900 }, { "epoch": 5.33, "grad_norm": 17.22162628173828, "learning_rate": 2.360203362229041e-06, "loss": 0.0475, "step": 541925 }, { "epoch": 5.33, "grad_norm": 1.1494745016098022, "learning_rate": 2.3600792397747923e-06, "loss": 0.0867, "step": 541950 }, { "epoch": 5.33, "grad_norm": 12.883461952209473, "learning_rate": 2.359955117320544e-06, "loss": 0.0325, "step": 541975 }, { "epoch": 5.33, "grad_norm": 1.5608301162719727, "learning_rate": 2.359830994866295e-06, "loss": 0.0959, "step": 542000 }, { "epoch": 5.33, "grad_norm": 16.475841522216797, "learning_rate": 2.3597068724120468e-06, "loss": 0.0467, "step": 542025 }, { "epoch": 5.33, "grad_norm": 1.4735291004180908, "learning_rate": 2.3595827499577984e-06, "loss": 0.0902, "step": 542050 }, { "epoch": 5.33, "grad_norm": 13.549893379211426, "learning_rate": 2.35945862750355e-06, "loss": 0.0604, "step": 542075 }, { "epoch": 5.33, "grad_norm": 6.0166096687316895, "learning_rate": 2.3593345050493017e-06, "loss": 0.0865, "step": 542100 }, { "epoch": 5.33, "grad_norm": 6.7172746658325195, "learning_rate": 2.3592103825950533e-06, "loss": 0.0333, "step": 542125 }, { "epoch": 5.33, "grad_norm": 6.792939186096191, "learning_rate": 2.3590862601408045e-06, "loss": 0.0941, "step": 542150 }, { "epoch": 5.33, "grad_norm": 10.328031539916992, "learning_rate": 2.358962137686556e-06, "loss": 0.0499, "step": 542175 }, { "epoch": 5.33, "grad_norm": 2.6754567623138428, "learning_rate": 2.358838015232308e-06, "loss": 0.0765, "step": 542200 }, { "epoch": 5.33, "grad_norm": 13.099247932434082, "learning_rate": 2.3587138927780594e-06, "loss": 0.0479, "step": 542225 }, { "epoch": 5.33, "grad_norm": 4.0282487869262695, "learning_rate": 2.358589770323811e-06, "loss": 0.0651, "step": 542250 }, { "epoch": 5.33, "grad_norm": 11.284915924072266, "learning_rate": 2.3584656478695627e-06, "loss": 0.0686, "step": 542275 }, { "epoch": 5.33, "grad_norm": 0.9508029222488403, "learning_rate": 2.358341525415314e-06, "loss": 0.0958, "step": 542300 }, { "epoch": 5.33, "grad_norm": 6.600462436676025, "learning_rate": 2.3582174029610656e-06, "loss": 0.0522, "step": 542325 }, { "epoch": 5.33, "grad_norm": 1.9971606731414795, "learning_rate": 2.358093280506817e-06, "loss": 0.0572, "step": 542350 }, { "epoch": 5.33, "grad_norm": 9.815642356872559, "learning_rate": 2.3579691580525684e-06, "loss": 0.0558, "step": 542375 }, { "epoch": 5.33, "grad_norm": 1.3419163227081299, "learning_rate": 2.35784503559832e-06, "loss": 0.0849, "step": 542400 }, { "epoch": 5.33, "grad_norm": 14.11896800994873, "learning_rate": 2.3577209131440717e-06, "loss": 0.0375, "step": 542425 }, { "epoch": 5.33, "grad_norm": 0.7249996662139893, "learning_rate": 2.3575967906898233e-06, "loss": 0.0762, "step": 542450 }, { "epoch": 5.33, "grad_norm": 11.773731231689453, "learning_rate": 2.357472668235575e-06, "loss": 0.0506, "step": 542475 }, { "epoch": 5.33, "grad_norm": 3.0637176036834717, "learning_rate": 2.357348545781326e-06, "loss": 0.0769, "step": 542500 }, { "epoch": 5.33, "grad_norm": 17.660785675048828, "learning_rate": 2.3572244233270778e-06, "loss": 0.0927, "step": 542525 }, { "epoch": 5.33, "grad_norm": 7.255184650421143, "learning_rate": 2.3571003008728294e-06, "loss": 0.0862, "step": 542550 }, { "epoch": 5.33, "grad_norm": 6.503281593322754, "learning_rate": 2.3569761784185806e-06, "loss": 0.0511, "step": 542575 }, { "epoch": 5.33, "grad_norm": 0.6294177174568176, "learning_rate": 2.3568520559643323e-06, "loss": 0.1115, "step": 542600 }, { "epoch": 5.34, "grad_norm": 13.746877670288086, "learning_rate": 2.356727933510084e-06, "loss": 0.0455, "step": 542625 }, { "epoch": 5.34, "grad_norm": 5.847716808319092, "learning_rate": 2.3566038110558355e-06, "loss": 0.0999, "step": 542650 }, { "epoch": 5.34, "grad_norm": 17.1519718170166, "learning_rate": 2.356479688601587e-06, "loss": 0.0565, "step": 542675 }, { "epoch": 5.34, "grad_norm": 6.015132904052734, "learning_rate": 2.356355566147339e-06, "loss": 0.0856, "step": 542700 }, { "epoch": 5.34, "grad_norm": 1.541660189628601, "learning_rate": 2.35623144369309e-06, "loss": 0.0424, "step": 542725 }, { "epoch": 5.34, "grad_norm": 5.9260358810424805, "learning_rate": 2.3561073212388417e-06, "loss": 0.0742, "step": 542750 }, { "epoch": 5.34, "grad_norm": 9.907187461853027, "learning_rate": 2.3559831987845933e-06, "loss": 0.0562, "step": 542775 }, { "epoch": 5.34, "grad_norm": 0.10949341207742691, "learning_rate": 2.3558590763303445e-06, "loss": 0.102, "step": 542800 }, { "epoch": 5.34, "grad_norm": 0.3880130350589752, "learning_rate": 2.355734953876096e-06, "loss": 0.0436, "step": 542825 }, { "epoch": 5.34, "grad_norm": 3.2517619132995605, "learning_rate": 2.3556108314218478e-06, "loss": 0.0914, "step": 542850 }, { "epoch": 5.34, "grad_norm": 8.310556411743164, "learning_rate": 2.3554867089675994e-06, "loss": 0.0574, "step": 542875 }, { "epoch": 5.34, "grad_norm": 3.7233262062072754, "learning_rate": 2.355367551411521e-06, "loss": 0.0957, "step": 542900 }, { "epoch": 5.34, "grad_norm": 14.887750625610352, "learning_rate": 2.3552434289572725e-06, "loss": 0.0531, "step": 542925 }, { "epoch": 5.34, "grad_norm": 0.42605334520339966, "learning_rate": 2.355119306503024e-06, "loss": 0.0865, "step": 542950 }, { "epoch": 5.34, "grad_norm": 13.704984664916992, "learning_rate": 2.3549951840487753e-06, "loss": 0.0474, "step": 542975 }, { "epoch": 5.34, "grad_norm": 0.06843230128288269, "learning_rate": 2.354871061594527e-06, "loss": 0.0936, "step": 543000 }, { "epoch": 5.34, "grad_norm": 11.48908519744873, "learning_rate": 2.354746939140278e-06, "loss": 0.0585, "step": 543025 }, { "epoch": 5.34, "grad_norm": 0.08497518301010132, "learning_rate": 2.35462281668603e-06, "loss": 0.0654, "step": 543050 }, { "epoch": 5.34, "grad_norm": 1.5031180381774902, "learning_rate": 2.3544986942317815e-06, "loss": 0.0409, "step": 543075 }, { "epoch": 5.34, "grad_norm": 0.052362874150276184, "learning_rate": 2.354374571777533e-06, "loss": 0.0846, "step": 543100 }, { "epoch": 5.34, "grad_norm": 4.158339977264404, "learning_rate": 2.3542504493232847e-06, "loss": 0.0469, "step": 543125 }, { "epoch": 5.34, "grad_norm": 0.15135402977466583, "learning_rate": 2.3541263268690364e-06, "loss": 0.0728, "step": 543150 }, { "epoch": 5.34, "grad_norm": 13.377166748046875, "learning_rate": 2.3540022044147876e-06, "loss": 0.0622, "step": 543175 }, { "epoch": 5.34, "grad_norm": 0.20487427711486816, "learning_rate": 2.353878081960539e-06, "loss": 0.0836, "step": 543200 }, { "epoch": 5.34, "grad_norm": 6.794551372528076, "learning_rate": 2.353753959506291e-06, "loss": 0.0524, "step": 543225 }, { "epoch": 5.34, "grad_norm": 1.7780267000198364, "learning_rate": 2.353629837052042e-06, "loss": 0.1043, "step": 543250 }, { "epoch": 5.34, "grad_norm": 17.156063079833984, "learning_rate": 2.3535057145977937e-06, "loss": 0.0452, "step": 543275 }, { "epoch": 5.34, "grad_norm": 4.991341590881348, "learning_rate": 2.3533815921435453e-06, "loss": 0.1116, "step": 543300 }, { "epoch": 5.34, "grad_norm": 9.144195556640625, "learning_rate": 2.353257469689297e-06, "loss": 0.0346, "step": 543325 }, { "epoch": 5.34, "grad_norm": 3.2743980884552, "learning_rate": 2.3531333472350486e-06, "loss": 0.0605, "step": 543350 }, { "epoch": 5.34, "grad_norm": 12.89100456237793, "learning_rate": 2.3530092247808002e-06, "loss": 0.0536, "step": 543375 }, { "epoch": 5.34, "grad_norm": 0.6460604071617126, "learning_rate": 2.3528851023265514e-06, "loss": 0.0823, "step": 543400 }, { "epoch": 5.34, "grad_norm": 19.10441017150879, "learning_rate": 2.352760979872303e-06, "loss": 0.0485, "step": 543425 }, { "epoch": 5.34, "grad_norm": 0.5951221585273743, "learning_rate": 2.3526368574180543e-06, "loss": 0.0945, "step": 543450 }, { "epoch": 5.34, "grad_norm": 9.998756408691406, "learning_rate": 2.352512734963806e-06, "loss": 0.0643, "step": 543475 }, { "epoch": 5.34, "grad_norm": 0.12425771355628967, "learning_rate": 2.3523886125095576e-06, "loss": 0.1003, "step": 543500 }, { "epoch": 5.34, "grad_norm": 5.69351863861084, "learning_rate": 2.352264490055309e-06, "loss": 0.0355, "step": 543525 }, { "epoch": 5.34, "grad_norm": 20.42766571044922, "learning_rate": 2.352140367601061e-06, "loss": 0.1063, "step": 543550 }, { "epoch": 5.34, "grad_norm": 13.856764793395996, "learning_rate": 2.3520162451468125e-06, "loss": 0.0393, "step": 543575 }, { "epoch": 5.34, "grad_norm": 2.2706990242004395, "learning_rate": 2.3518921226925637e-06, "loss": 0.0663, "step": 543600 }, { "epoch": 5.35, "grad_norm": 13.411526679992676, "learning_rate": 2.3517680002383153e-06, "loss": 0.0392, "step": 543625 }, { "epoch": 5.35, "grad_norm": 0.8573804497718811, "learning_rate": 2.351643877784067e-06, "loss": 0.0432, "step": 543650 }, { "epoch": 5.35, "grad_norm": 6.971464157104492, "learning_rate": 2.351519755329818e-06, "loss": 0.056, "step": 543675 }, { "epoch": 5.35, "grad_norm": 0.5653982758522034, "learning_rate": 2.3513956328755698e-06, "loss": 0.0835, "step": 543700 }, { "epoch": 5.35, "grad_norm": 9.757359504699707, "learning_rate": 2.3512715104213214e-06, "loss": 0.0449, "step": 543725 }, { "epoch": 5.35, "grad_norm": 6.464751243591309, "learning_rate": 2.351147387967073e-06, "loss": 0.0818, "step": 543750 }, { "epoch": 5.35, "grad_norm": 9.460275650024414, "learning_rate": 2.3510232655128247e-06, "loss": 0.042, "step": 543775 }, { "epoch": 5.35, "grad_norm": 1.1635502576828003, "learning_rate": 2.3508991430585763e-06, "loss": 0.0985, "step": 543800 }, { "epoch": 5.35, "grad_norm": 10.612277030944824, "learning_rate": 2.3507750206043275e-06, "loss": 0.0726, "step": 543825 }, { "epoch": 5.35, "grad_norm": 1.4553322792053223, "learning_rate": 2.350650898150079e-06, "loss": 0.0884, "step": 543850 }, { "epoch": 5.35, "grad_norm": 8.121038436889648, "learning_rate": 2.3505267756958304e-06, "loss": 0.047, "step": 543875 }, { "epoch": 5.35, "grad_norm": 1.7186509370803833, "learning_rate": 2.350402653241582e-06, "loss": 0.112, "step": 543900 }, { "epoch": 5.35, "grad_norm": 13.172192573547363, "learning_rate": 2.3502785307873337e-06, "loss": 0.066, "step": 543925 }, { "epoch": 5.35, "grad_norm": 0.8307215571403503, "learning_rate": 2.3501544083330853e-06, "loss": 0.089, "step": 543950 }, { "epoch": 5.35, "grad_norm": 9.005983352661133, "learning_rate": 2.350030285878837e-06, "loss": 0.0366, "step": 543975 }, { "epoch": 5.35, "grad_norm": 2.1467177867889404, "learning_rate": 2.3499061634245886e-06, "loss": 0.0713, "step": 544000 }, { "epoch": 5.35, "grad_norm": 0.7023928165435791, "learning_rate": 2.3497820409703398e-06, "loss": 0.053, "step": 544025 }, { "epoch": 5.35, "grad_norm": 0.6302984356880188, "learning_rate": 2.3496579185160914e-06, "loss": 0.0791, "step": 544050 }, { "epoch": 5.35, "grad_norm": 0.5981355905532837, "learning_rate": 2.349533796061843e-06, "loss": 0.0667, "step": 544075 }, { "epoch": 5.35, "grad_norm": 0.21023626625537872, "learning_rate": 2.3494096736075943e-06, "loss": 0.0624, "step": 544100 }, { "epoch": 5.35, "grad_norm": 5.5923638343811035, "learning_rate": 2.349285551153346e-06, "loss": 0.0447, "step": 544125 }, { "epoch": 5.35, "grad_norm": 1.4944607019424438, "learning_rate": 2.3491614286990975e-06, "loss": 0.0751, "step": 544150 }, { "epoch": 5.35, "grad_norm": 14.574214935302734, "learning_rate": 2.349037306244849e-06, "loss": 0.0569, "step": 544175 }, { "epoch": 5.35, "grad_norm": 4.835447311401367, "learning_rate": 2.348913183790601e-06, "loss": 0.1099, "step": 544200 }, { "epoch": 5.35, "grad_norm": 12.074933052062988, "learning_rate": 2.3487890613363524e-06, "loss": 0.0752, "step": 544225 }, { "epoch": 5.35, "grad_norm": 5.575271129608154, "learning_rate": 2.3486649388821036e-06, "loss": 0.1277, "step": 544250 }, { "epoch": 5.35, "grad_norm": 6.522902488708496, "learning_rate": 2.3485408164278553e-06, "loss": 0.0503, "step": 544275 }, { "epoch": 5.35, "grad_norm": 0.3065147399902344, "learning_rate": 2.3484166939736065e-06, "loss": 0.1116, "step": 544300 }, { "epoch": 5.35, "grad_norm": 4.086756229400635, "learning_rate": 2.348292571519358e-06, "loss": 0.0425, "step": 544325 }, { "epoch": 5.35, "grad_norm": 0.1486108899116516, "learning_rate": 2.3481684490651098e-06, "loss": 0.0739, "step": 544350 }, { "epoch": 5.35, "grad_norm": 13.87750244140625, "learning_rate": 2.3480443266108614e-06, "loss": 0.0509, "step": 544375 }, { "epoch": 5.35, "grad_norm": 0.4625931680202484, "learning_rate": 2.347920204156613e-06, "loss": 0.0663, "step": 544400 }, { "epoch": 5.35, "grad_norm": 11.506490707397461, "learning_rate": 2.3477960817023647e-06, "loss": 0.0638, "step": 544425 }, { "epoch": 5.35, "grad_norm": 1.0873831510543823, "learning_rate": 2.347671959248116e-06, "loss": 0.0946, "step": 544450 }, { "epoch": 5.35, "grad_norm": 11.823735237121582, "learning_rate": 2.3475478367938675e-06, "loss": 0.0458, "step": 544475 }, { "epoch": 5.35, "grad_norm": 1.9268403053283691, "learning_rate": 2.347423714339619e-06, "loss": 0.0815, "step": 544500 }, { "epoch": 5.35, "grad_norm": 7.001045227050781, "learning_rate": 2.3472995918853704e-06, "loss": 0.0645, "step": 544525 }, { "epoch": 5.35, "grad_norm": 0.0784808024764061, "learning_rate": 2.347175469431122e-06, "loss": 0.0815, "step": 544550 }, { "epoch": 5.35, "grad_norm": 6.869597434997559, "learning_rate": 2.3470513469768736e-06, "loss": 0.0467, "step": 544575 }, { "epoch": 5.35, "grad_norm": 0.10884302854537964, "learning_rate": 2.3469272245226253e-06, "loss": 0.0906, "step": 544600 }, { "epoch": 5.35, "grad_norm": 12.006632804870605, "learning_rate": 2.346803102068377e-06, "loss": 0.0532, "step": 544625 }, { "epoch": 5.36, "grad_norm": 0.528594434261322, "learning_rate": 2.3466789796141285e-06, "loss": 0.0551, "step": 544650 }, { "epoch": 5.36, "grad_norm": 11.41781997680664, "learning_rate": 2.3465548571598797e-06, "loss": 0.0633, "step": 544675 }, { "epoch": 5.36, "grad_norm": 0.33296146988868713, "learning_rate": 2.3464307347056314e-06, "loss": 0.0713, "step": 544700 }, { "epoch": 5.36, "grad_norm": 11.206155776977539, "learning_rate": 2.3463066122513826e-06, "loss": 0.0569, "step": 544725 }, { "epoch": 5.36, "grad_norm": 3.515761375427246, "learning_rate": 2.3461824897971342e-06, "loss": 0.0986, "step": 544750 }, { "epoch": 5.36, "grad_norm": 3.3487486839294434, "learning_rate": 2.346058367342886e-06, "loss": 0.0354, "step": 544775 }, { "epoch": 5.36, "grad_norm": 6.075984954833984, "learning_rate": 2.3459342448886375e-06, "loss": 0.1052, "step": 544800 }, { "epoch": 5.36, "grad_norm": 13.781476974487305, "learning_rate": 2.345810122434389e-06, "loss": 0.056, "step": 544825 }, { "epoch": 5.36, "grad_norm": 6.284210681915283, "learning_rate": 2.3456859999801408e-06, "loss": 0.0764, "step": 544850 }, { "epoch": 5.36, "grad_norm": 13.822347640991211, "learning_rate": 2.345561877525892e-06, "loss": 0.0448, "step": 544875 }, { "epoch": 5.36, "grad_norm": 1.7449861764907837, "learning_rate": 2.3454377550716436e-06, "loss": 0.0656, "step": 544900 }, { "epoch": 5.36, "grad_norm": 11.837261199951172, "learning_rate": 2.3453136326173952e-06, "loss": 0.0512, "step": 544925 }, { "epoch": 5.36, "grad_norm": 0.18466205894947052, "learning_rate": 2.3451895101631465e-06, "loss": 0.061, "step": 544950 }, { "epoch": 5.36, "grad_norm": 9.634770393371582, "learning_rate": 2.345065387708898e-06, "loss": 0.0495, "step": 544975 }, { "epoch": 5.36, "grad_norm": 4.5128278732299805, "learning_rate": 2.3449412652546497e-06, "loss": 0.0918, "step": 545000 }, { "epoch": 5.36, "grad_norm": 3.63388991355896, "learning_rate": 2.3448171428004014e-06, "loss": 0.0665, "step": 545025 }, { "epoch": 5.36, "grad_norm": 2.3222923278808594, "learning_rate": 2.344693020346153e-06, "loss": 0.1014, "step": 545050 }, { "epoch": 5.36, "grad_norm": 8.816636085510254, "learning_rate": 2.3445688978919046e-06, "loss": 0.0557, "step": 545075 }, { "epoch": 5.36, "grad_norm": 1.5233508348464966, "learning_rate": 2.344444775437656e-06, "loss": 0.0813, "step": 545100 }, { "epoch": 5.36, "grad_norm": 6.017764568328857, "learning_rate": 2.3443206529834075e-06, "loss": 0.046, "step": 545125 }, { "epoch": 5.36, "grad_norm": 1.002275824546814, "learning_rate": 2.344201495427329e-06, "loss": 0.0783, "step": 545150 }, { "epoch": 5.36, "grad_norm": 9.469840049743652, "learning_rate": 2.3440773729730806e-06, "loss": 0.0413, "step": 545175 }, { "epoch": 5.36, "grad_norm": 0.11540858447551727, "learning_rate": 2.343953250518832e-06, "loss": 0.0705, "step": 545200 }, { "epoch": 5.36, "grad_norm": 10.245406150817871, "learning_rate": 2.343829128064584e-06, "loss": 0.0372, "step": 545225 }, { "epoch": 5.36, "grad_norm": 3.124068021774292, "learning_rate": 2.3437050056103355e-06, "loss": 0.0895, "step": 545250 }, { "epoch": 5.36, "grad_norm": 3.2495851516723633, "learning_rate": 2.3435808831560867e-06, "loss": 0.0451, "step": 545275 }, { "epoch": 5.36, "grad_norm": 8.501265525817871, "learning_rate": 2.343461725600008e-06, "loss": 0.098, "step": 545300 }, { "epoch": 5.36, "grad_norm": 8.631730079650879, "learning_rate": 2.3433376031457598e-06, "loss": 0.0453, "step": 545325 }, { "epoch": 5.36, "grad_norm": 1.0149635076522827, "learning_rate": 2.3432134806915114e-06, "loss": 0.0671, "step": 545350 }, { "epoch": 5.36, "grad_norm": 13.922067642211914, "learning_rate": 2.3430893582372626e-06, "loss": 0.0419, "step": 545375 }, { "epoch": 5.36, "grad_norm": 1.187525749206543, "learning_rate": 2.3429652357830142e-06, "loss": 0.1122, "step": 545400 }, { "epoch": 5.36, "grad_norm": 18.245750427246094, "learning_rate": 2.342841113328766e-06, "loss": 0.0495, "step": 545425 }, { "epoch": 5.36, "grad_norm": 4.765649318695068, "learning_rate": 2.3427169908745175e-06, "loss": 0.0999, "step": 545450 }, { "epoch": 5.36, "grad_norm": 11.11484432220459, "learning_rate": 2.342592868420269e-06, "loss": 0.0554, "step": 545475 }, { "epoch": 5.36, "grad_norm": 0.47313588857650757, "learning_rate": 2.3424687459660204e-06, "loss": 0.0653, "step": 545500 }, { "epoch": 5.36, "grad_norm": 6.636096000671387, "learning_rate": 2.342344623511772e-06, "loss": 0.0515, "step": 545525 }, { "epoch": 5.36, "grad_norm": 4.401478290557861, "learning_rate": 2.3422205010575236e-06, "loss": 0.0897, "step": 545550 }, { "epoch": 5.36, "grad_norm": 2.8588685989379883, "learning_rate": 2.342096378603275e-06, "loss": 0.0415, "step": 545575 }, { "epoch": 5.36, "grad_norm": 0.6520808935165405, "learning_rate": 2.3419722561490265e-06, "loss": 0.0585, "step": 545600 }, { "epoch": 5.36, "grad_norm": 9.307197570800781, "learning_rate": 2.341848133694778e-06, "loss": 0.0397, "step": 545625 }, { "epoch": 5.36, "grad_norm": 1.2074451446533203, "learning_rate": 2.3417240112405297e-06, "loss": 0.0497, "step": 545650 }, { "epoch": 5.37, "grad_norm": 10.977704048156738, "learning_rate": 2.3415998887862814e-06, "loss": 0.0518, "step": 545675 }, { "epoch": 5.37, "grad_norm": 0.31467923521995544, "learning_rate": 2.341475766332033e-06, "loss": 0.0892, "step": 545700 }, { "epoch": 5.37, "grad_norm": 8.635778427124023, "learning_rate": 2.3413516438777842e-06, "loss": 0.0519, "step": 545725 }, { "epoch": 5.37, "grad_norm": 1.369370937347412, "learning_rate": 2.341227521423536e-06, "loss": 0.0892, "step": 545750 }, { "epoch": 5.37, "grad_norm": 8.72986888885498, "learning_rate": 2.3411033989692875e-06, "loss": 0.0687, "step": 545775 }, { "epoch": 5.37, "grad_norm": 1.9382073879241943, "learning_rate": 2.3409792765150387e-06, "loss": 0.0822, "step": 545800 }, { "epoch": 5.37, "grad_norm": 8.890556335449219, "learning_rate": 2.3408551540607903e-06, "loss": 0.063, "step": 545825 }, { "epoch": 5.37, "grad_norm": 7.536726951599121, "learning_rate": 2.340731031606542e-06, "loss": 0.065, "step": 545850 }, { "epoch": 5.37, "grad_norm": 14.827479362487793, "learning_rate": 2.3406069091522936e-06, "loss": 0.0498, "step": 545875 }, { "epoch": 5.37, "grad_norm": 0.05723247304558754, "learning_rate": 2.3404827866980453e-06, "loss": 0.0784, "step": 545900 }, { "epoch": 5.37, "grad_norm": 18.686174392700195, "learning_rate": 2.3403586642437965e-06, "loss": 0.0377, "step": 545925 }, { "epoch": 5.37, "grad_norm": 7.232578277587891, "learning_rate": 2.340234541789548e-06, "loss": 0.0949, "step": 545950 }, { "epoch": 5.37, "grad_norm": 12.194890022277832, "learning_rate": 2.3401104193352997e-06, "loss": 0.0506, "step": 545975 }, { "epoch": 5.37, "grad_norm": 0.13232922554016113, "learning_rate": 2.339986296881051e-06, "loss": 0.1016, "step": 546000 }, { "epoch": 5.37, "grad_norm": 2.8078205585479736, "learning_rate": 2.3398621744268026e-06, "loss": 0.0528, "step": 546025 }, { "epoch": 5.37, "grad_norm": 0.20874080061912537, "learning_rate": 2.3397380519725542e-06, "loss": 0.0715, "step": 546050 }, { "epoch": 5.37, "grad_norm": 14.439690589904785, "learning_rate": 2.339613929518306e-06, "loss": 0.0557, "step": 546075 }, { "epoch": 5.37, "grad_norm": 2.4527766704559326, "learning_rate": 2.3394898070640575e-06, "loss": 0.1025, "step": 546100 }, { "epoch": 5.37, "grad_norm": 4.5159711837768555, "learning_rate": 2.339365684609809e-06, "loss": 0.0569, "step": 546125 }, { "epoch": 5.37, "grad_norm": 0.08712780475616455, "learning_rate": 2.3392415621555603e-06, "loss": 0.0653, "step": 546150 }, { "epoch": 5.37, "grad_norm": 4.3264055252075195, "learning_rate": 2.339117439701312e-06, "loss": 0.0423, "step": 546175 }, { "epoch": 5.37, "grad_norm": 3.8167002201080322, "learning_rate": 2.3389933172470636e-06, "loss": 0.0916, "step": 546200 }, { "epoch": 5.37, "grad_norm": 1.0924018621444702, "learning_rate": 2.338869194792815e-06, "loss": 0.058, "step": 546225 }, { "epoch": 5.37, "grad_norm": 0.4672817885875702, "learning_rate": 2.3387450723385664e-06, "loss": 0.0558, "step": 546250 }, { "epoch": 5.37, "grad_norm": 4.5165276527404785, "learning_rate": 2.338620949884318e-06, "loss": 0.0415, "step": 546275 }, { "epoch": 5.37, "grad_norm": 0.28811997175216675, "learning_rate": 2.3384968274300697e-06, "loss": 0.0939, "step": 546300 }, { "epoch": 5.37, "grad_norm": 9.55251407623291, "learning_rate": 2.3383727049758214e-06, "loss": 0.0517, "step": 546325 }, { "epoch": 5.37, "grad_norm": 2.5916128158569336, "learning_rate": 2.3382485825215726e-06, "loss": 0.0897, "step": 546350 }, { "epoch": 5.37, "grad_norm": 12.34228229522705, "learning_rate": 2.338124460067324e-06, "loss": 0.0578, "step": 546375 }, { "epoch": 5.37, "grad_norm": 3.1172537803649902, "learning_rate": 2.338000337613076e-06, "loss": 0.0711, "step": 546400 }, { "epoch": 5.37, "grad_norm": 6.133243083953857, "learning_rate": 2.337876215158827e-06, "loss": 0.0495, "step": 546425 }, { "epoch": 5.37, "grad_norm": 4.362992763519287, "learning_rate": 2.3377520927045787e-06, "loss": 0.0884, "step": 546450 }, { "epoch": 5.37, "grad_norm": 7.534833908081055, "learning_rate": 2.3376279702503303e-06, "loss": 0.0536, "step": 546475 }, { "epoch": 5.37, "grad_norm": 0.024279452860355377, "learning_rate": 2.337503847796082e-06, "loss": 0.0922, "step": 546500 }, { "epoch": 5.37, "grad_norm": 14.771111488342285, "learning_rate": 2.3373797253418336e-06, "loss": 0.0406, "step": 546525 }, { "epoch": 5.37, "grad_norm": 0.15057063102722168, "learning_rate": 2.3372556028875852e-06, "loss": 0.0697, "step": 546550 }, { "epoch": 5.37, "grad_norm": 10.374651908874512, "learning_rate": 2.3371314804333364e-06, "loss": 0.0349, "step": 546575 }, { "epoch": 5.37, "grad_norm": 0.8771985173225403, "learning_rate": 2.337007357979088e-06, "loss": 0.1003, "step": 546600 }, { "epoch": 5.37, "grad_norm": 9.798650741577148, "learning_rate": 2.3368832355248397e-06, "loss": 0.0502, "step": 546625 }, { "epoch": 5.37, "grad_norm": 3.6704585552215576, "learning_rate": 2.336759113070591e-06, "loss": 0.0833, "step": 546650 }, { "epoch": 5.37, "grad_norm": 18.460094451904297, "learning_rate": 2.3366349906163425e-06, "loss": 0.0515, "step": 546675 }, { "epoch": 5.38, "grad_norm": 2.7277307510375977, "learning_rate": 2.336510868162094e-06, "loss": 0.0779, "step": 546700 }, { "epoch": 5.38, "grad_norm": 15.22480583190918, "learning_rate": 2.336386745707846e-06, "loss": 0.038, "step": 546725 }, { "epoch": 5.38, "grad_norm": 5.074147701263428, "learning_rate": 2.3362626232535975e-06, "loss": 0.0802, "step": 546750 }, { "epoch": 5.38, "grad_norm": 7.390774250030518, "learning_rate": 2.3361385007993487e-06, "loss": 0.0543, "step": 546775 }, { "epoch": 5.38, "grad_norm": 4.24985933303833, "learning_rate": 2.3360143783451003e-06, "loss": 0.0648, "step": 546800 }, { "epoch": 5.38, "grad_norm": 1.9122804403305054, "learning_rate": 2.335890255890852e-06, "loss": 0.053, "step": 546825 }, { "epoch": 5.38, "grad_norm": 1.770337462425232, "learning_rate": 2.335766133436603e-06, "loss": 0.0705, "step": 546850 }, { "epoch": 5.38, "grad_norm": 10.501012802124023, "learning_rate": 2.3356420109823548e-06, "loss": 0.0489, "step": 546875 }, { "epoch": 5.38, "grad_norm": 0.17247657477855682, "learning_rate": 2.3355178885281064e-06, "loss": 0.0801, "step": 546900 }, { "epoch": 5.38, "grad_norm": 7.374335765838623, "learning_rate": 2.335393766073858e-06, "loss": 0.0439, "step": 546925 }, { "epoch": 5.38, "grad_norm": 1.9160877466201782, "learning_rate": 2.3352696436196097e-06, "loss": 0.0716, "step": 546950 }, { "epoch": 5.38, "grad_norm": 11.9220609664917, "learning_rate": 2.3351455211653613e-06, "loss": 0.0426, "step": 546975 }, { "epoch": 5.38, "grad_norm": 4.1053032875061035, "learning_rate": 2.3350213987111125e-06, "loss": 0.0802, "step": 547000 }, { "epoch": 5.38, "grad_norm": 4.940043926239014, "learning_rate": 2.334897276256864e-06, "loss": 0.069, "step": 547025 }, { "epoch": 5.38, "grad_norm": 5.029728889465332, "learning_rate": 2.334773153802616e-06, "loss": 0.0778, "step": 547050 }, { "epoch": 5.38, "grad_norm": 10.2337064743042, "learning_rate": 2.334649031348367e-06, "loss": 0.0537, "step": 547075 }, { "epoch": 5.38, "grad_norm": 2.1876728534698486, "learning_rate": 2.3345249088941186e-06, "loss": 0.0711, "step": 547100 }, { "epoch": 5.38, "grad_norm": 11.793916702270508, "learning_rate": 2.3344007864398703e-06, "loss": 0.0463, "step": 547125 }, { "epoch": 5.38, "grad_norm": 0.04777942970395088, "learning_rate": 2.334276663985622e-06, "loss": 0.0642, "step": 547150 }, { "epoch": 5.38, "grad_norm": 9.734174728393555, "learning_rate": 2.3341525415313736e-06, "loss": 0.0413, "step": 547175 }, { "epoch": 5.38, "grad_norm": 1.1199835538864136, "learning_rate": 2.3340284190771248e-06, "loss": 0.094, "step": 547200 }, { "epoch": 5.38, "grad_norm": 8.710977554321289, "learning_rate": 2.3339042966228764e-06, "loss": 0.0417, "step": 547225 }, { "epoch": 5.38, "grad_norm": 1.9274623394012451, "learning_rate": 2.333780174168628e-06, "loss": 0.0875, "step": 547250 }, { "epoch": 5.38, "grad_norm": 9.769111633300781, "learning_rate": 2.3336560517143792e-06, "loss": 0.0702, "step": 547275 }, { "epoch": 5.38, "grad_norm": 8.42846393585205, "learning_rate": 2.333531929260131e-06, "loss": 0.0767, "step": 547300 }, { "epoch": 5.38, "grad_norm": 5.96288537979126, "learning_rate": 2.3334078068058825e-06, "loss": 0.0617, "step": 547325 }, { "epoch": 5.38, "grad_norm": 2.117155075073242, "learning_rate": 2.333283684351634e-06, "loss": 0.0732, "step": 547350 }, { "epoch": 5.38, "grad_norm": 11.411795616149902, "learning_rate": 2.3331595618973858e-06, "loss": 0.0693, "step": 547375 }, { "epoch": 5.38, "grad_norm": 1.7483493089675903, "learning_rate": 2.3330354394431374e-06, "loss": 0.0721, "step": 547400 }, { "epoch": 5.38, "grad_norm": 11.837509155273438, "learning_rate": 2.3329113169888886e-06, "loss": 0.0561, "step": 547425 }, { "epoch": 5.38, "grad_norm": 3.287804365158081, "learning_rate": 2.3327871945346403e-06, "loss": 0.073, "step": 547450 }, { "epoch": 5.38, "grad_norm": 9.048166275024414, "learning_rate": 2.332663072080392e-06, "loss": 0.0598, "step": 547475 }, { "epoch": 5.38, "grad_norm": 0.06907150894403458, "learning_rate": 2.332538949626143e-06, "loss": 0.0824, "step": 547500 }, { "epoch": 5.38, "grad_norm": 7.012850761413574, "learning_rate": 2.3324148271718947e-06, "loss": 0.0419, "step": 547525 }, { "epoch": 5.38, "grad_norm": 0.9157940149307251, "learning_rate": 2.3322907047176464e-06, "loss": 0.0743, "step": 547550 }, { "epoch": 5.38, "grad_norm": 15.739889144897461, "learning_rate": 2.332166582263398e-06, "loss": 0.0627, "step": 547575 }, { "epoch": 5.38, "grad_norm": 3.6615102291107178, "learning_rate": 2.3320424598091497e-06, "loss": 0.0879, "step": 547600 }, { "epoch": 5.38, "grad_norm": 9.457281112670898, "learning_rate": 2.331918337354901e-06, "loss": 0.0439, "step": 547625 }, { "epoch": 5.38, "grad_norm": 4.655602931976318, "learning_rate": 2.3317942149006525e-06, "loss": 0.0879, "step": 547650 }, { "epoch": 5.38, "grad_norm": 19.439022064208984, "learning_rate": 2.331670092446404e-06, "loss": 0.0457, "step": 547675 }, { "epoch": 5.39, "grad_norm": 2.340606212615967, "learning_rate": 2.3315459699921553e-06, "loss": 0.0786, "step": 547700 }, { "epoch": 5.39, "grad_norm": 5.976609230041504, "learning_rate": 2.331421847537907e-06, "loss": 0.028, "step": 547725 }, { "epoch": 5.39, "grad_norm": 0.9832989573478699, "learning_rate": 2.3312977250836586e-06, "loss": 0.0756, "step": 547750 }, { "epoch": 5.39, "grad_norm": 4.598147392272949, "learning_rate": 2.3311736026294103e-06, "loss": 0.059, "step": 547775 }, { "epoch": 5.39, "grad_norm": 0.37289106845855713, "learning_rate": 2.331049480175162e-06, "loss": 0.0772, "step": 547800 }, { "epoch": 5.39, "grad_norm": 10.314719200134277, "learning_rate": 2.3309253577209135e-06, "loss": 0.0704, "step": 547825 }, { "epoch": 5.39, "grad_norm": 2.3912134170532227, "learning_rate": 2.3308012352666647e-06, "loss": 0.0758, "step": 547850 }, { "epoch": 5.39, "grad_norm": 14.451354026794434, "learning_rate": 2.3306771128124164e-06, "loss": 0.0448, "step": 547875 }, { "epoch": 5.39, "grad_norm": 29.115137100219727, "learning_rate": 2.330552990358168e-06, "loss": 0.0919, "step": 547900 }, { "epoch": 5.39, "grad_norm": 12.649455070495605, "learning_rate": 2.3304288679039192e-06, "loss": 0.0426, "step": 547925 }, { "epoch": 5.39, "grad_norm": 0.21467255055904388, "learning_rate": 2.330304745449671e-06, "loss": 0.0631, "step": 547950 }, { "epoch": 5.39, "grad_norm": 8.398798942565918, "learning_rate": 2.3301806229954225e-06, "loss": 0.0616, "step": 547975 }, { "epoch": 5.39, "grad_norm": 5.942273139953613, "learning_rate": 2.330056500541174e-06, "loss": 0.0798, "step": 548000 }, { "epoch": 5.39, "grad_norm": 5.275056838989258, "learning_rate": 2.3299323780869258e-06, "loss": 0.0565, "step": 548025 }, { "epoch": 5.39, "grad_norm": 0.19252298772335052, "learning_rate": 2.329808255632677e-06, "loss": 0.0735, "step": 548050 }, { "epoch": 5.39, "grad_norm": 15.889158248901367, "learning_rate": 2.3296841331784286e-06, "loss": 0.059, "step": 548075 }, { "epoch": 5.39, "grad_norm": 2.265026569366455, "learning_rate": 2.3295600107241802e-06, "loss": 0.0586, "step": 548100 }, { "epoch": 5.39, "grad_norm": 10.634604454040527, "learning_rate": 2.329435888269932e-06, "loss": 0.0682, "step": 548125 }, { "epoch": 5.39, "grad_norm": 0.1336589753627777, "learning_rate": 2.3293117658156835e-06, "loss": 0.0808, "step": 548150 }, { "epoch": 5.39, "grad_norm": 22.51915740966797, "learning_rate": 2.329187643361435e-06, "loss": 0.0765, "step": 548175 }, { "epoch": 5.39, "grad_norm": 3.5102577209472656, "learning_rate": 2.3290635209071864e-06, "loss": 0.0899, "step": 548200 }, { "epoch": 5.39, "grad_norm": 2.155059814453125, "learning_rate": 2.328939398452938e-06, "loss": 0.0406, "step": 548225 }, { "epoch": 5.39, "grad_norm": 0.01901685819029808, "learning_rate": 2.3288152759986896e-06, "loss": 0.0884, "step": 548250 }, { "epoch": 5.39, "grad_norm": 5.999655246734619, "learning_rate": 2.328691153544441e-06, "loss": 0.0398, "step": 548275 }, { "epoch": 5.39, "grad_norm": 4.943421363830566, "learning_rate": 2.3285670310901925e-06, "loss": 0.0821, "step": 548300 }, { "epoch": 5.39, "grad_norm": 9.812077522277832, "learning_rate": 2.328442908635944e-06, "loss": 0.0623, "step": 548325 }, { "epoch": 5.39, "grad_norm": 1.61892831325531, "learning_rate": 2.3283237510798656e-06, "loss": 0.1094, "step": 548350 }, { "epoch": 5.39, "grad_norm": 12.647712707519531, "learning_rate": 2.328199628625617e-06, "loss": 0.0567, "step": 548375 }, { "epoch": 5.39, "grad_norm": 0.448385089635849, "learning_rate": 2.328075506171369e-06, "loss": 0.08, "step": 548400 }, { "epoch": 5.39, "grad_norm": 11.284944534301758, "learning_rate": 2.3279513837171205e-06, "loss": 0.0476, "step": 548425 }, { "epoch": 5.39, "grad_norm": 0.18132689595222473, "learning_rate": 2.3278272612628717e-06, "loss": 0.1055, "step": 548450 }, { "epoch": 5.39, "grad_norm": 5.578929901123047, "learning_rate": 2.3277031388086233e-06, "loss": 0.0442, "step": 548475 }, { "epoch": 5.39, "grad_norm": 0.4175269305706024, "learning_rate": 2.327579016354375e-06, "loss": 0.0745, "step": 548500 }, { "epoch": 5.39, "grad_norm": 10.854686737060547, "learning_rate": 2.327454893900126e-06, "loss": 0.0372, "step": 548525 }, { "epoch": 5.39, "grad_norm": 1.9621127843856812, "learning_rate": 2.3273307714458778e-06, "loss": 0.0831, "step": 548550 }, { "epoch": 5.39, "grad_norm": 16.60088348388672, "learning_rate": 2.3272066489916294e-06, "loss": 0.0651, "step": 548575 }, { "epoch": 5.39, "grad_norm": 2.2376201152801514, "learning_rate": 2.327082526537381e-06, "loss": 0.0777, "step": 548600 }, { "epoch": 5.39, "grad_norm": 9.927969932556152, "learning_rate": 2.3269584040831327e-06, "loss": 0.0658, "step": 548625 }, { "epoch": 5.39, "grad_norm": 4.751941204071045, "learning_rate": 2.326834281628884e-06, "loss": 0.0906, "step": 548650 }, { "epoch": 5.39, "grad_norm": 7.91402006149292, "learning_rate": 2.3267101591746355e-06, "loss": 0.0485, "step": 548675 }, { "epoch": 5.39, "grad_norm": 2.758894205093384, "learning_rate": 2.326586036720387e-06, "loss": 0.0548, "step": 548700 }, { "epoch": 5.4, "grad_norm": 2.590203285217285, "learning_rate": 2.3264619142661384e-06, "loss": 0.058, "step": 548725 }, { "epoch": 5.4, "grad_norm": 1.1791431903839111, "learning_rate": 2.32633779181189e-06, "loss": 0.073, "step": 548750 }, { "epoch": 5.4, "grad_norm": 4.130589008331299, "learning_rate": 2.3262136693576417e-06, "loss": 0.0597, "step": 548775 }, { "epoch": 5.4, "grad_norm": 3.8024041652679443, "learning_rate": 2.3260895469033933e-06, "loss": 0.0801, "step": 548800 }, { "epoch": 5.4, "grad_norm": 8.749032020568848, "learning_rate": 2.325965424449145e-06, "loss": 0.0396, "step": 548825 }, { "epoch": 5.4, "grad_norm": 8.452545166015625, "learning_rate": 2.3258413019948966e-06, "loss": 0.0908, "step": 548850 }, { "epoch": 5.4, "grad_norm": 13.295255661010742, "learning_rate": 2.3257171795406478e-06, "loss": 0.0487, "step": 548875 }, { "epoch": 5.4, "grad_norm": 7.382726192474365, "learning_rate": 2.3255930570863994e-06, "loss": 0.1012, "step": 548900 }, { "epoch": 5.4, "grad_norm": 9.35772705078125, "learning_rate": 2.325468934632151e-06, "loss": 0.0562, "step": 548925 }, { "epoch": 5.4, "grad_norm": 0.23889507353305817, "learning_rate": 2.3253448121779023e-06, "loss": 0.0614, "step": 548950 }, { "epoch": 5.4, "grad_norm": 12.500271797180176, "learning_rate": 2.325220689723654e-06, "loss": 0.0448, "step": 548975 }, { "epoch": 5.4, "grad_norm": 0.3028458058834076, "learning_rate": 2.3250965672694055e-06, "loss": 0.0771, "step": 549000 }, { "epoch": 5.4, "grad_norm": 18.72840118408203, "learning_rate": 2.324972444815157e-06, "loss": 0.0772, "step": 549025 }, { "epoch": 5.4, "grad_norm": 0.33668291568756104, "learning_rate": 2.324848322360909e-06, "loss": 0.0938, "step": 549050 }, { "epoch": 5.4, "grad_norm": 5.326418399810791, "learning_rate": 2.32472419990666e-06, "loss": 0.0586, "step": 549075 }, { "epoch": 5.4, "grad_norm": 0.0497869998216629, "learning_rate": 2.3246000774524116e-06, "loss": 0.0894, "step": 549100 }, { "epoch": 5.4, "grad_norm": 8.448098182678223, "learning_rate": 2.3244759549981633e-06, "loss": 0.0481, "step": 549125 }, { "epoch": 5.4, "grad_norm": 0.2601526379585266, "learning_rate": 2.3243518325439145e-06, "loss": 0.0911, "step": 549150 }, { "epoch": 5.4, "grad_norm": 4.6118245124816895, "learning_rate": 2.324227710089666e-06, "loss": 0.0423, "step": 549175 }, { "epoch": 5.4, "grad_norm": 9.803291320800781, "learning_rate": 2.3241035876354178e-06, "loss": 0.0799, "step": 549200 }, { "epoch": 5.4, "grad_norm": 11.819320678710938, "learning_rate": 2.3239794651811694e-06, "loss": 0.0556, "step": 549225 }, { "epoch": 5.4, "grad_norm": 0.6564477682113647, "learning_rate": 2.323855342726921e-06, "loss": 0.0924, "step": 549250 }, { "epoch": 5.4, "grad_norm": 11.214818954467773, "learning_rate": 2.3237312202726727e-06, "loss": 0.0425, "step": 549275 }, { "epoch": 5.4, "grad_norm": 2.8067800998687744, "learning_rate": 2.323607097818424e-06, "loss": 0.0613, "step": 549300 }, { "epoch": 5.4, "grad_norm": 9.516674041748047, "learning_rate": 2.3234829753641755e-06, "loss": 0.049, "step": 549325 }, { "epoch": 5.4, "grad_norm": 7.7284135818481445, "learning_rate": 2.323358852909927e-06, "loss": 0.0976, "step": 549350 }, { "epoch": 5.4, "grad_norm": 25.438419342041016, "learning_rate": 2.3232347304556784e-06, "loss": 0.0719, "step": 549375 }, { "epoch": 5.4, "grad_norm": 5.005142688751221, "learning_rate": 2.32311060800143e-06, "loss": 0.1082, "step": 549400 }, { "epoch": 5.4, "grad_norm": 14.743375778198242, "learning_rate": 2.3229864855471816e-06, "loss": 0.0491, "step": 549425 }, { "epoch": 5.4, "grad_norm": 1.1645066738128662, "learning_rate": 2.3228623630929333e-06, "loss": 0.0673, "step": 549450 }, { "epoch": 5.4, "grad_norm": 9.857606887817383, "learning_rate": 2.322738240638685e-06, "loss": 0.0361, "step": 549475 }, { "epoch": 5.4, "grad_norm": 2.1908986568450928, "learning_rate": 2.322614118184436e-06, "loss": 0.0875, "step": 549500 }, { "epoch": 5.4, "grad_norm": 8.011526107788086, "learning_rate": 2.3224899957301877e-06, "loss": 0.063, "step": 549525 }, { "epoch": 5.4, "grad_norm": 5.132257461547852, "learning_rate": 2.3223658732759394e-06, "loss": 0.0806, "step": 549550 }, { "epoch": 5.4, "grad_norm": 12.40185260772705, "learning_rate": 2.3222417508216906e-06, "loss": 0.0585, "step": 549575 }, { "epoch": 5.4, "grad_norm": 8.108564376831055, "learning_rate": 2.3221176283674422e-06, "loss": 0.0863, "step": 549600 }, { "epoch": 5.4, "grad_norm": 8.369696617126465, "learning_rate": 2.321993505913194e-06, "loss": 0.0655, "step": 549625 }, { "epoch": 5.4, "grad_norm": 10.777982711791992, "learning_rate": 2.3218693834589455e-06, "loss": 0.0767, "step": 549650 }, { "epoch": 5.4, "grad_norm": 12.798698425292969, "learning_rate": 2.321745261004697e-06, "loss": 0.0483, "step": 549675 }, { "epoch": 5.4, "grad_norm": 4.17791223526001, "learning_rate": 2.3216211385504488e-06, "loss": 0.0749, "step": 549700 }, { "epoch": 5.4, "grad_norm": 18.763673782348633, "learning_rate": 2.3214970160962e-06, "loss": 0.0504, "step": 549725 }, { "epoch": 5.41, "grad_norm": 2.9918580055236816, "learning_rate": 2.3213728936419516e-06, "loss": 0.083, "step": 549750 }, { "epoch": 5.41, "grad_norm": 12.575238227844238, "learning_rate": 2.3212487711877032e-06, "loss": 0.0393, "step": 549775 }, { "epoch": 5.41, "grad_norm": 0.004188499879091978, "learning_rate": 2.3211246487334545e-06, "loss": 0.0783, "step": 549800 }, { "epoch": 5.41, "grad_norm": 1.6104810237884521, "learning_rate": 2.321000526279206e-06, "loss": 0.043, "step": 549825 }, { "epoch": 5.41, "grad_norm": 0.6919455528259277, "learning_rate": 2.3208764038249577e-06, "loss": 0.0912, "step": 549850 }, { "epoch": 5.41, "grad_norm": 9.640363693237305, "learning_rate": 2.3207522813707094e-06, "loss": 0.0548, "step": 549875 }, { "epoch": 5.41, "grad_norm": 5.444222927093506, "learning_rate": 2.320628158916461e-06, "loss": 0.0851, "step": 549900 }, { "epoch": 5.41, "grad_norm": 9.781499862670898, "learning_rate": 2.320504036462212e-06, "loss": 0.0485, "step": 549925 }, { "epoch": 5.41, "grad_norm": 3.080782413482666, "learning_rate": 2.320379914007964e-06, "loss": 0.074, "step": 549950 }, { "epoch": 5.41, "grad_norm": 5.428316116333008, "learning_rate": 2.3202557915537155e-06, "loss": 0.0575, "step": 549975 }, { "epoch": 5.41, "grad_norm": 5.023244380950928, "learning_rate": 2.3201316690994667e-06, "loss": 0.087, "step": 550000 }, { "epoch": 5.41, "grad_norm": 7.106480598449707, "learning_rate": 2.3200075466452183e-06, "loss": 0.0529, "step": 550025 }, { "epoch": 5.41, "grad_norm": 0.468993216753006, "learning_rate": 2.31988342419097e-06, "loss": 0.0958, "step": 550050 }, { "epoch": 5.41, "grad_norm": 7.914013862609863, "learning_rate": 2.3197593017367216e-06, "loss": 0.0645, "step": 550075 }, { "epoch": 5.41, "grad_norm": 4.82473611831665, "learning_rate": 2.3196351792824732e-06, "loss": 0.0807, "step": 550100 }, { "epoch": 5.41, "grad_norm": 16.068004608154297, "learning_rate": 2.319511056828225e-06, "loss": 0.0425, "step": 550125 }, { "epoch": 5.41, "grad_norm": 0.11126495897769928, "learning_rate": 2.319386934373976e-06, "loss": 0.0704, "step": 550150 }, { "epoch": 5.41, "grad_norm": 8.0562162399292, "learning_rate": 2.3192628119197277e-06, "loss": 0.0415, "step": 550175 }, { "epoch": 5.41, "grad_norm": 8.292499542236328, "learning_rate": 2.3191386894654793e-06, "loss": 0.0701, "step": 550200 }, { "epoch": 5.41, "grad_norm": 9.057175636291504, "learning_rate": 2.3190145670112306e-06, "loss": 0.0659, "step": 550225 }, { "epoch": 5.41, "grad_norm": 0.07312113791704178, "learning_rate": 2.318890444556982e-06, "loss": 0.0764, "step": 550250 }, { "epoch": 5.41, "grad_norm": 1.1694514751434326, "learning_rate": 2.318766322102734e-06, "loss": 0.051, "step": 550275 }, { "epoch": 5.41, "grad_norm": 3.4354794025421143, "learning_rate": 2.3186421996484855e-06, "loss": 0.0901, "step": 550300 }, { "epoch": 5.41, "grad_norm": 3.9854748249053955, "learning_rate": 2.318518077194237e-06, "loss": 0.0499, "step": 550325 }, { "epoch": 5.41, "grad_norm": 0.3372722864151001, "learning_rate": 2.3183939547399883e-06, "loss": 0.0745, "step": 550350 }, { "epoch": 5.41, "grad_norm": 10.576367378234863, "learning_rate": 2.31826983228574e-06, "loss": 0.0409, "step": 550375 }, { "epoch": 5.41, "grad_norm": 0.7881100177764893, "learning_rate": 2.3181457098314916e-06, "loss": 0.1046, "step": 550400 }, { "epoch": 5.41, "grad_norm": 6.782917022705078, "learning_rate": 2.3180215873772428e-06, "loss": 0.0544, "step": 550425 }, { "epoch": 5.41, "grad_norm": 1.1841237545013428, "learning_rate": 2.3178974649229944e-06, "loss": 0.0829, "step": 550450 }, { "epoch": 5.41, "grad_norm": 14.211237907409668, "learning_rate": 2.317773342468746e-06, "loss": 0.0655, "step": 550475 }, { "epoch": 5.41, "grad_norm": 4.610701560974121, "learning_rate": 2.3176492200144977e-06, "loss": 0.0731, "step": 550500 }, { "epoch": 5.41, "grad_norm": 9.72806167602539, "learning_rate": 2.3175250975602493e-06, "loss": 0.0651, "step": 550525 }, { "epoch": 5.41, "grad_norm": 0.29184234142303467, "learning_rate": 2.317400975106001e-06, "loss": 0.0645, "step": 550550 }, { "epoch": 5.41, "grad_norm": 3.7454733848571777, "learning_rate": 2.317276852651752e-06, "loss": 0.0361, "step": 550575 }, { "epoch": 5.41, "grad_norm": 4.067235469818115, "learning_rate": 2.317152730197504e-06, "loss": 0.0803, "step": 550600 }, { "epoch": 5.41, "grad_norm": 22.976116180419922, "learning_rate": 2.3170286077432554e-06, "loss": 0.0594, "step": 550625 }, { "epoch": 5.41, "grad_norm": 0.050876833498477936, "learning_rate": 2.3169044852890067e-06, "loss": 0.0871, "step": 550650 }, { "epoch": 5.41, "grad_norm": 12.72413158416748, "learning_rate": 2.3167803628347583e-06, "loss": 0.0546, "step": 550675 }, { "epoch": 5.41, "grad_norm": 5.490860462188721, "learning_rate": 2.31665624038051e-06, "loss": 0.0603, "step": 550700 }, { "epoch": 5.41, "grad_norm": 3.81463885307312, "learning_rate": 2.3165321179262616e-06, "loss": 0.0575, "step": 550725 }, { "epoch": 5.42, "grad_norm": 1.9359855651855469, "learning_rate": 2.316407995472013e-06, "loss": 0.0748, "step": 550750 }, { "epoch": 5.42, "grad_norm": 5.637910842895508, "learning_rate": 2.3162838730177644e-06, "loss": 0.0435, "step": 550775 }, { "epoch": 5.42, "grad_norm": 1.7937352657318115, "learning_rate": 2.316159750563516e-06, "loss": 0.0855, "step": 550800 }, { "epoch": 5.42, "grad_norm": 6.678713798522949, "learning_rate": 2.3160356281092677e-06, "loss": 0.059, "step": 550825 }, { "epoch": 5.42, "grad_norm": 13.7421875, "learning_rate": 2.3159115056550193e-06, "loss": 0.1009, "step": 550850 }, { "epoch": 5.42, "grad_norm": 9.11542797088623, "learning_rate": 2.315787383200771e-06, "loss": 0.0456, "step": 550875 }, { "epoch": 5.42, "grad_norm": 1.1955397129058838, "learning_rate": 2.315663260746522e-06, "loss": 0.0867, "step": 550900 }, { "epoch": 5.42, "grad_norm": 14.085325241088867, "learning_rate": 2.315539138292274e-06, "loss": 0.0524, "step": 550925 }, { "epoch": 5.42, "grad_norm": 2.955066442489624, "learning_rate": 2.3154150158380254e-06, "loss": 0.088, "step": 550950 }, { "epoch": 5.42, "grad_norm": 6.6178178787231445, "learning_rate": 2.315290893383777e-06, "loss": 0.0623, "step": 550975 }, { "epoch": 5.42, "grad_norm": 0.13263952732086182, "learning_rate": 2.3151667709295283e-06, "loss": 0.0933, "step": 551000 }, { "epoch": 5.42, "grad_norm": 10.447306632995605, "learning_rate": 2.31504264847528e-06, "loss": 0.048, "step": 551025 }, { "epoch": 5.42, "grad_norm": 7.081062316894531, "learning_rate": 2.3149185260210315e-06, "loss": 0.0671, "step": 551050 }, { "epoch": 5.42, "grad_norm": 13.174866676330566, "learning_rate": 2.314794403566783e-06, "loss": 0.042, "step": 551075 }, { "epoch": 5.42, "grad_norm": 1.0234647989273071, "learning_rate": 2.314670281112535e-06, "loss": 0.0994, "step": 551100 }, { "epoch": 5.42, "grad_norm": 10.062638282775879, "learning_rate": 2.3145461586582864e-06, "loss": 0.0648, "step": 551125 }, { "epoch": 5.42, "grad_norm": 0.07579086720943451, "learning_rate": 2.3144220362040377e-06, "loss": 0.0771, "step": 551150 }, { "epoch": 5.42, "grad_norm": 11.84941577911377, "learning_rate": 2.3142979137497893e-06, "loss": 0.0495, "step": 551175 }, { "epoch": 5.42, "grad_norm": 2.055694341659546, "learning_rate": 2.3141737912955405e-06, "loss": 0.0761, "step": 551200 }, { "epoch": 5.42, "grad_norm": 14.372201919555664, "learning_rate": 2.314049668841292e-06, "loss": 0.0524, "step": 551225 }, { "epoch": 5.42, "grad_norm": 6.007203102111816, "learning_rate": 2.3139255463870438e-06, "loss": 0.0938, "step": 551250 }, { "epoch": 5.42, "grad_norm": 11.12437915802002, "learning_rate": 2.3138014239327954e-06, "loss": 0.0527, "step": 551275 }, { "epoch": 5.42, "grad_norm": 0.05309814214706421, "learning_rate": 2.313677301478547e-06, "loss": 0.0707, "step": 551300 }, { "epoch": 5.42, "grad_norm": 8.530644416809082, "learning_rate": 2.3135531790242987e-06, "loss": 0.0588, "step": 551325 }, { "epoch": 5.42, "grad_norm": 7.71519660949707, "learning_rate": 2.31342905657005e-06, "loss": 0.0865, "step": 551350 }, { "epoch": 5.42, "grad_norm": 4.662526607513428, "learning_rate": 2.3133049341158015e-06, "loss": 0.0366, "step": 551375 }, { "epoch": 5.42, "grad_norm": 5.79945707321167, "learning_rate": 2.313180811661553e-06, "loss": 0.0808, "step": 551400 }, { "epoch": 5.42, "grad_norm": 5.855172634124756, "learning_rate": 2.3130566892073044e-06, "loss": 0.0376, "step": 551425 }, { "epoch": 5.42, "grad_norm": 0.34246155619621277, "learning_rate": 2.312932566753056e-06, "loss": 0.0957, "step": 551450 }, { "epoch": 5.42, "grad_norm": 6.675780773162842, "learning_rate": 2.3128084442988076e-06, "loss": 0.047, "step": 551475 }, { "epoch": 5.42, "grad_norm": 4.222568511962891, "learning_rate": 2.3126843218445593e-06, "loss": 0.1081, "step": 551500 }, { "epoch": 5.42, "grad_norm": 5.545970916748047, "learning_rate": 2.312560199390311e-06, "loss": 0.0518, "step": 551525 }, { "epoch": 5.42, "grad_norm": 10.57812786102295, "learning_rate": 2.3124360769360625e-06, "loss": 0.1074, "step": 551550 }, { "epoch": 5.42, "grad_norm": 11.11873722076416, "learning_rate": 2.3123119544818138e-06, "loss": 0.055, "step": 551575 }, { "epoch": 5.42, "grad_norm": 0.29363128542900085, "learning_rate": 2.3121878320275654e-06, "loss": 0.0878, "step": 551600 }, { "epoch": 5.42, "grad_norm": 7.251360893249512, "learning_rate": 2.3120637095733166e-06, "loss": 0.0543, "step": 551625 }, { "epoch": 5.42, "grad_norm": 3.788282632827759, "learning_rate": 2.3119395871190682e-06, "loss": 0.0845, "step": 551650 }, { "epoch": 5.42, "grad_norm": 13.699421882629395, "learning_rate": 2.31181546466482e-06, "loss": 0.0503, "step": 551675 }, { "epoch": 5.42, "grad_norm": 1.2435834407806396, "learning_rate": 2.3116913422105715e-06, "loss": 0.0696, "step": 551700 }, { "epoch": 5.42, "grad_norm": 8.930569648742676, "learning_rate": 2.311567219756323e-06, "loss": 0.0524, "step": 551725 }, { "epoch": 5.42, "grad_norm": 0.9839969873428345, "learning_rate": 2.3114430973020748e-06, "loss": 0.1025, "step": 551750 }, { "epoch": 5.43, "grad_norm": 6.499867916107178, "learning_rate": 2.311318974847826e-06, "loss": 0.0582, "step": 551775 }, { "epoch": 5.43, "grad_norm": 2.6525332927703857, "learning_rate": 2.3111948523935776e-06, "loss": 0.081, "step": 551800 }, { "epoch": 5.43, "grad_norm": 10.647648811340332, "learning_rate": 2.3110707299393293e-06, "loss": 0.0463, "step": 551825 }, { "epoch": 5.43, "grad_norm": 5.5031609535217285, "learning_rate": 2.3109466074850805e-06, "loss": 0.089, "step": 551850 }, { "epoch": 5.43, "grad_norm": 7.762625694274902, "learning_rate": 2.310822485030832e-06, "loss": 0.0413, "step": 551875 }, { "epoch": 5.43, "grad_norm": 1.2657157182693481, "learning_rate": 2.3106983625765837e-06, "loss": 0.0686, "step": 551900 }, { "epoch": 5.43, "grad_norm": 14.273609161376953, "learning_rate": 2.3105742401223354e-06, "loss": 0.055, "step": 551925 }, { "epoch": 5.43, "grad_norm": 2.2749557495117188, "learning_rate": 2.310450117668087e-06, "loss": 0.089, "step": 551950 }, { "epoch": 5.43, "grad_norm": 11.807040214538574, "learning_rate": 2.3103259952138386e-06, "loss": 0.0431, "step": 551975 }, { "epoch": 5.43, "grad_norm": 2.1535918712615967, "learning_rate": 2.31020187275959e-06, "loss": 0.0962, "step": 552000 }, { "epoch": 5.43, "grad_norm": 8.890453338623047, "learning_rate": 2.3100777503053415e-06, "loss": 0.0418, "step": 552025 }, { "epoch": 5.43, "grad_norm": 0.12903353571891785, "learning_rate": 2.3099536278510927e-06, "loss": 0.0848, "step": 552050 }, { "epoch": 5.43, "grad_norm": 10.803181648254395, "learning_rate": 2.3098295053968443e-06, "loss": 0.0505, "step": 552075 }, { "epoch": 5.43, "grad_norm": 3.925694704055786, "learning_rate": 2.309705382942596e-06, "loss": 0.0901, "step": 552100 }, { "epoch": 5.43, "grad_norm": 11.549315452575684, "learning_rate": 2.3095812604883476e-06, "loss": 0.051, "step": 552125 }, { "epoch": 5.43, "grad_norm": 0.3013891577720642, "learning_rate": 2.3094571380340992e-06, "loss": 0.0744, "step": 552150 }, { "epoch": 5.43, "grad_norm": 10.986260414123535, "learning_rate": 2.309333015579851e-06, "loss": 0.0598, "step": 552175 }, { "epoch": 5.43, "grad_norm": 0.718889057636261, "learning_rate": 2.309208893125602e-06, "loss": 0.1075, "step": 552200 }, { "epoch": 5.43, "grad_norm": 11.701465606689453, "learning_rate": 2.3090847706713537e-06, "loss": 0.0728, "step": 552225 }, { "epoch": 5.43, "grad_norm": 12.688136100769043, "learning_rate": 2.3089606482171054e-06, "loss": 0.1061, "step": 552250 }, { "epoch": 5.43, "grad_norm": 2.6413378715515137, "learning_rate": 2.3088365257628566e-06, "loss": 0.0464, "step": 552275 }, { "epoch": 5.43, "grad_norm": 2.165239095687866, "learning_rate": 2.308712403308608e-06, "loss": 0.0843, "step": 552300 }, { "epoch": 5.43, "grad_norm": 12.586745262145996, "learning_rate": 2.30858828085436e-06, "loss": 0.0407, "step": 552325 }, { "epoch": 5.43, "grad_norm": 0.5942385196685791, "learning_rate": 2.3084641584001115e-06, "loss": 0.0898, "step": 552350 }, { "epoch": 5.43, "grad_norm": 8.555453300476074, "learning_rate": 2.308340035945863e-06, "loss": 0.0574, "step": 552375 }, { "epoch": 5.43, "grad_norm": 0.3235742449760437, "learning_rate": 2.3082159134916147e-06, "loss": 0.095, "step": 552400 }, { "epoch": 5.43, "grad_norm": 7.759584426879883, "learning_rate": 2.308091791037366e-06, "loss": 0.0358, "step": 552425 }, { "epoch": 5.43, "grad_norm": 1.4455406665802002, "learning_rate": 2.3079726334812874e-06, "loss": 0.0918, "step": 552450 }, { "epoch": 5.43, "grad_norm": 6.134008407592773, "learning_rate": 2.307848511027039e-06, "loss": 0.0586, "step": 552475 }, { "epoch": 5.43, "grad_norm": 0.32925546169281006, "learning_rate": 2.3077243885727907e-06, "loss": 0.0849, "step": 552500 }, { "epoch": 5.43, "grad_norm": 0.7897787094116211, "learning_rate": 2.307600266118542e-06, "loss": 0.0447, "step": 552525 }, { "epoch": 5.43, "grad_norm": 0.07228608429431915, "learning_rate": 2.3074761436642935e-06, "loss": 0.0786, "step": 552550 }, { "epoch": 5.43, "grad_norm": 3.9781553745269775, "learning_rate": 2.307352021210045e-06, "loss": 0.0404, "step": 552575 }, { "epoch": 5.43, "grad_norm": 3.3657960891723633, "learning_rate": 2.307227898755797e-06, "loss": 0.0956, "step": 552600 }, { "epoch": 5.43, "grad_norm": 7.132869243621826, "learning_rate": 2.3071037763015484e-06, "loss": 0.0348, "step": 552625 }, { "epoch": 5.43, "grad_norm": 8.111883163452148, "learning_rate": 2.3069796538472996e-06, "loss": 0.084, "step": 552650 }, { "epoch": 5.43, "grad_norm": 3.3467864990234375, "learning_rate": 2.3068555313930513e-06, "loss": 0.0576, "step": 552675 }, { "epoch": 5.43, "grad_norm": 0.3277525007724762, "learning_rate": 2.306731408938803e-06, "loss": 0.085, "step": 552700 }, { "epoch": 5.43, "grad_norm": 10.061603546142578, "learning_rate": 2.306607286484554e-06, "loss": 0.0483, "step": 552725 }, { "epoch": 5.43, "grad_norm": 5.6124043464660645, "learning_rate": 2.3064831640303058e-06, "loss": 0.0656, "step": 552750 }, { "epoch": 5.43, "grad_norm": 12.12291431427002, "learning_rate": 2.3063590415760574e-06, "loss": 0.0447, "step": 552775 }, { "epoch": 5.44, "grad_norm": 2.598320245742798, "learning_rate": 2.306234919121809e-06, "loss": 0.0938, "step": 552800 }, { "epoch": 5.44, "grad_norm": 10.333812713623047, "learning_rate": 2.3061107966675607e-06, "loss": 0.0547, "step": 552825 }, { "epoch": 5.44, "grad_norm": 9.465880393981934, "learning_rate": 2.3059866742133123e-06, "loss": 0.0778, "step": 552850 }, { "epoch": 5.44, "grad_norm": 2.4803593158721924, "learning_rate": 2.3058625517590635e-06, "loss": 0.0535, "step": 552875 }, { "epoch": 5.44, "grad_norm": 4.119710445404053, "learning_rate": 2.305738429304815e-06, "loss": 0.0985, "step": 552900 }, { "epoch": 5.44, "grad_norm": 11.19135570526123, "learning_rate": 2.3056143068505668e-06, "loss": 0.0438, "step": 552925 }, { "epoch": 5.44, "grad_norm": 2.653571605682373, "learning_rate": 2.305490184396318e-06, "loss": 0.0841, "step": 552950 }, { "epoch": 5.44, "grad_norm": 17.376846313476562, "learning_rate": 2.3053660619420696e-06, "loss": 0.0438, "step": 552975 }, { "epoch": 5.44, "grad_norm": 0.56424480676651, "learning_rate": 2.3052419394878213e-06, "loss": 0.0749, "step": 553000 }, { "epoch": 5.44, "grad_norm": 10.514669418334961, "learning_rate": 2.305117817033573e-06, "loss": 0.0378, "step": 553025 }, { "epoch": 5.44, "grad_norm": 1.4147043228149414, "learning_rate": 2.3049936945793245e-06, "loss": 0.0794, "step": 553050 }, { "epoch": 5.44, "grad_norm": 7.038449764251709, "learning_rate": 2.3048695721250757e-06, "loss": 0.0584, "step": 553075 }, { "epoch": 5.44, "grad_norm": 7.062272548675537, "learning_rate": 2.3047454496708274e-06, "loss": 0.065, "step": 553100 }, { "epoch": 5.44, "grad_norm": 4.37657356262207, "learning_rate": 2.304621327216579e-06, "loss": 0.0427, "step": 553125 }, { "epoch": 5.44, "grad_norm": 12.094050407409668, "learning_rate": 2.3044972047623302e-06, "loss": 0.0862, "step": 553150 }, { "epoch": 5.44, "grad_norm": 7.249613285064697, "learning_rate": 2.304373082308082e-06, "loss": 0.056, "step": 553175 }, { "epoch": 5.44, "grad_norm": 0.017563000321388245, "learning_rate": 2.3042489598538335e-06, "loss": 0.096, "step": 553200 }, { "epoch": 5.44, "grad_norm": 8.917016983032227, "learning_rate": 2.304124837399585e-06, "loss": 0.0436, "step": 553225 }, { "epoch": 5.44, "grad_norm": 1.2942848205566406, "learning_rate": 2.3040007149453368e-06, "loss": 0.0749, "step": 553250 }, { "epoch": 5.44, "grad_norm": 12.444562911987305, "learning_rate": 2.3038765924910884e-06, "loss": 0.0507, "step": 553275 }, { "epoch": 5.44, "grad_norm": 4.063724517822266, "learning_rate": 2.3037524700368396e-06, "loss": 0.0768, "step": 553300 }, { "epoch": 5.44, "grad_norm": 10.015168190002441, "learning_rate": 2.3036283475825912e-06, "loss": 0.0577, "step": 553325 }, { "epoch": 5.44, "grad_norm": 0.7711217999458313, "learning_rate": 2.303504225128343e-06, "loss": 0.0917, "step": 553350 }, { "epoch": 5.44, "grad_norm": 5.677758693695068, "learning_rate": 2.303380102674094e-06, "loss": 0.0739, "step": 553375 }, { "epoch": 5.44, "grad_norm": 0.21067315340042114, "learning_rate": 2.3032559802198457e-06, "loss": 0.0797, "step": 553400 }, { "epoch": 5.44, "grad_norm": 16.473459243774414, "learning_rate": 2.3031318577655974e-06, "loss": 0.0517, "step": 553425 }, { "epoch": 5.44, "grad_norm": 0.28627467155456543, "learning_rate": 2.303007735311349e-06, "loss": 0.0848, "step": 553450 }, { "epoch": 5.44, "grad_norm": 8.994488716125488, "learning_rate": 2.3028836128571006e-06, "loss": 0.0532, "step": 553475 }, { "epoch": 5.44, "grad_norm": 3.356351375579834, "learning_rate": 2.302759490402852e-06, "loss": 0.076, "step": 553500 }, { "epoch": 5.44, "grad_norm": 18.79749298095703, "learning_rate": 2.3026353679486035e-06, "loss": 0.0478, "step": 553525 }, { "epoch": 5.44, "grad_norm": 3.2138116359710693, "learning_rate": 2.302511245494355e-06, "loss": 0.1034, "step": 553550 }, { "epoch": 5.44, "grad_norm": 7.056569576263428, "learning_rate": 2.3023871230401063e-06, "loss": 0.0582, "step": 553575 }, { "epoch": 5.44, "grad_norm": 0.007051641121506691, "learning_rate": 2.302263000585858e-06, "loss": 0.0569, "step": 553600 }, { "epoch": 5.44, "grad_norm": 9.3860502243042, "learning_rate": 2.3021388781316096e-06, "loss": 0.0591, "step": 553625 }, { "epoch": 5.44, "grad_norm": 5.704294681549072, "learning_rate": 2.3020147556773612e-06, "loss": 0.0666, "step": 553650 }, { "epoch": 5.44, "grad_norm": 11.869447708129883, "learning_rate": 2.301890633223113e-06, "loss": 0.0394, "step": 553675 }, { "epoch": 5.44, "grad_norm": 3.697049617767334, "learning_rate": 2.3017665107688645e-06, "loss": 0.0764, "step": 553700 }, { "epoch": 5.44, "grad_norm": 12.390012741088867, "learning_rate": 2.3016423883146157e-06, "loss": 0.051, "step": 553725 }, { "epoch": 5.44, "grad_norm": 5.382594585418701, "learning_rate": 2.3015182658603673e-06, "loss": 0.1004, "step": 553750 }, { "epoch": 5.44, "grad_norm": 8.361332893371582, "learning_rate": 2.301394143406119e-06, "loss": 0.0467, "step": 553775 }, { "epoch": 5.45, "grad_norm": 0.1167927086353302, "learning_rate": 2.3012700209518706e-06, "loss": 0.0647, "step": 553800 }, { "epoch": 5.45, "grad_norm": 5.7628984451293945, "learning_rate": 2.3011458984976223e-06, "loss": 0.0335, "step": 553825 }, { "epoch": 5.45, "grad_norm": 7.456045627593994, "learning_rate": 2.3010217760433735e-06, "loss": 0.0726, "step": 553850 }, { "epoch": 5.45, "grad_norm": 13.538456916809082, "learning_rate": 2.300897653589125e-06, "loss": 0.0592, "step": 553875 }, { "epoch": 5.45, "grad_norm": 1.5362049341201782, "learning_rate": 2.3007735311348767e-06, "loss": 0.0934, "step": 553900 }, { "epoch": 5.45, "grad_norm": 12.117208480834961, "learning_rate": 2.300649408680628e-06, "loss": 0.0406, "step": 553925 }, { "epoch": 5.45, "grad_norm": 4.253238201141357, "learning_rate": 2.3005252862263796e-06, "loss": 0.0898, "step": 553950 }, { "epoch": 5.45, "grad_norm": 12.859371185302734, "learning_rate": 2.3004011637721312e-06, "loss": 0.0651, "step": 553975 }, { "epoch": 5.45, "grad_norm": 5.786479949951172, "learning_rate": 2.300277041317883e-06, "loss": 0.0636, "step": 554000 }, { "epoch": 5.45, "grad_norm": 11.559409141540527, "learning_rate": 2.3001529188636345e-06, "loss": 0.0541, "step": 554025 }, { "epoch": 5.45, "grad_norm": 0.0516495481133461, "learning_rate": 2.300028796409386e-06, "loss": 0.0761, "step": 554050 }, { "epoch": 5.45, "grad_norm": 11.33336353302002, "learning_rate": 2.2999046739551373e-06, "loss": 0.0503, "step": 554075 }, { "epoch": 5.45, "grad_norm": 7.584184646606445, "learning_rate": 2.299780551500889e-06, "loss": 0.0894, "step": 554100 }, { "epoch": 5.45, "grad_norm": 8.868191719055176, "learning_rate": 2.2996564290466406e-06, "loss": 0.052, "step": 554125 }, { "epoch": 5.45, "grad_norm": 5.628467559814453, "learning_rate": 2.299532306592392e-06, "loss": 0.06, "step": 554150 }, { "epoch": 5.45, "grad_norm": 4.497402191162109, "learning_rate": 2.2994081841381434e-06, "loss": 0.0496, "step": 554175 }, { "epoch": 5.45, "grad_norm": 4.429352760314941, "learning_rate": 2.299284061683895e-06, "loss": 0.0657, "step": 554200 }, { "epoch": 5.45, "grad_norm": 1.2492033243179321, "learning_rate": 2.2991599392296467e-06, "loss": 0.0422, "step": 554225 }, { "epoch": 5.45, "grad_norm": 0.08312416821718216, "learning_rate": 2.2990358167753984e-06, "loss": 0.1053, "step": 554250 }, { "epoch": 5.45, "grad_norm": 6.480466365814209, "learning_rate": 2.2989116943211496e-06, "loss": 0.0642, "step": 554275 }, { "epoch": 5.45, "grad_norm": 0.8550930023193359, "learning_rate": 2.298787571866901e-06, "loss": 0.0911, "step": 554300 }, { "epoch": 5.45, "grad_norm": 3.621823787689209, "learning_rate": 2.298663449412653e-06, "loss": 0.0508, "step": 554325 }, { "epoch": 5.45, "grad_norm": 1.0960698127746582, "learning_rate": 2.298539326958404e-06, "loss": 0.0766, "step": 554350 }, { "epoch": 5.45, "grad_norm": 5.944955348968506, "learning_rate": 2.2984152045041557e-06, "loss": 0.0598, "step": 554375 }, { "epoch": 5.45, "grad_norm": 0.007469496689736843, "learning_rate": 2.2982910820499073e-06, "loss": 0.0839, "step": 554400 }, { "epoch": 5.45, "grad_norm": 14.734353065490723, "learning_rate": 2.298166959595659e-06, "loss": 0.0859, "step": 554425 }, { "epoch": 5.45, "grad_norm": 2.5934600830078125, "learning_rate": 2.2980428371414106e-06, "loss": 0.0804, "step": 554450 }, { "epoch": 5.45, "grad_norm": 7.81258487701416, "learning_rate": 2.2979187146871622e-06, "loss": 0.0633, "step": 554475 }, { "epoch": 5.45, "grad_norm": 7.445236682891846, "learning_rate": 2.2977945922329134e-06, "loss": 0.0639, "step": 554500 }, { "epoch": 5.45, "grad_norm": 8.533895492553711, "learning_rate": 2.297670469778665e-06, "loss": 0.0375, "step": 554525 }, { "epoch": 5.45, "grad_norm": 4.320311546325684, "learning_rate": 2.2975463473244167e-06, "loss": 0.0805, "step": 554550 }, { "epoch": 5.45, "grad_norm": 9.590702056884766, "learning_rate": 2.297422224870168e-06, "loss": 0.0587, "step": 554575 }, { "epoch": 5.45, "grad_norm": 0.2817603647708893, "learning_rate": 2.2972981024159195e-06, "loss": 0.0748, "step": 554600 }, { "epoch": 5.45, "grad_norm": 16.253599166870117, "learning_rate": 2.297173979961671e-06, "loss": 0.0341, "step": 554625 }, { "epoch": 5.45, "grad_norm": 1.7262965440750122, "learning_rate": 2.297049857507423e-06, "loss": 0.0928, "step": 554650 }, { "epoch": 5.45, "grad_norm": 19.561147689819336, "learning_rate": 2.2969257350531745e-06, "loss": 0.0615, "step": 554675 }, { "epoch": 5.45, "grad_norm": 6.0202436447143555, "learning_rate": 2.296801612598926e-06, "loss": 0.0577, "step": 554700 }, { "epoch": 5.45, "grad_norm": 14.588812828063965, "learning_rate": 2.2966774901446773e-06, "loss": 0.0567, "step": 554725 }, { "epoch": 5.45, "grad_norm": 1.4340893030166626, "learning_rate": 2.296553367690429e-06, "loss": 0.1019, "step": 554750 }, { "epoch": 5.45, "grad_norm": 12.49295711517334, "learning_rate": 2.29642924523618e-06, "loss": 0.0635, "step": 554775 }, { "epoch": 5.45, "grad_norm": 6.848259449005127, "learning_rate": 2.2963051227819318e-06, "loss": 0.1018, "step": 554800 }, { "epoch": 5.46, "grad_norm": 10.330544471740723, "learning_rate": 2.2961810003276834e-06, "loss": 0.0413, "step": 554825 }, { "epoch": 5.46, "grad_norm": 0.4471493065357208, "learning_rate": 2.296056877873435e-06, "loss": 0.0905, "step": 554850 }, { "epoch": 5.46, "grad_norm": 9.784457206726074, "learning_rate": 2.2959327554191867e-06, "loss": 0.0659, "step": 554875 }, { "epoch": 5.46, "grad_norm": 0.026347119361162186, "learning_rate": 2.295813597863108e-06, "loss": 0.0759, "step": 554900 }, { "epoch": 5.46, "grad_norm": 7.565000534057617, "learning_rate": 2.2956894754088598e-06, "loss": 0.05, "step": 554925 }, { "epoch": 5.46, "grad_norm": 0.017124461010098457, "learning_rate": 2.295565352954611e-06, "loss": 0.0809, "step": 554950 }, { "epoch": 5.46, "grad_norm": 14.850417137145996, "learning_rate": 2.2954412305003626e-06, "loss": 0.0467, "step": 554975 }, { "epoch": 5.46, "grad_norm": 1.0442436933517456, "learning_rate": 2.2953171080461143e-06, "loss": 0.0756, "step": 555000 }, { "epoch": 5.46, "grad_norm": 1.9339280128479004, "learning_rate": 2.2951929855918655e-06, "loss": 0.0343, "step": 555025 }, { "epoch": 5.46, "grad_norm": 0.47770726680755615, "learning_rate": 2.295068863137617e-06, "loss": 0.0682, "step": 555050 }, { "epoch": 5.46, "grad_norm": 8.519561767578125, "learning_rate": 2.2949447406833687e-06, "loss": 0.0527, "step": 555075 }, { "epoch": 5.46, "grad_norm": 5.992860794067383, "learning_rate": 2.2948206182291204e-06, "loss": 0.0947, "step": 555100 }, { "epoch": 5.46, "grad_norm": 8.745931625366211, "learning_rate": 2.294696495774872e-06, "loss": 0.0324, "step": 555125 }, { "epoch": 5.46, "grad_norm": 0.005414098035544157, "learning_rate": 2.2945723733206236e-06, "loss": 0.0574, "step": 555150 }, { "epoch": 5.46, "grad_norm": 11.438125610351562, "learning_rate": 2.294448250866375e-06, "loss": 0.0474, "step": 555175 }, { "epoch": 5.46, "grad_norm": 0.5871058106422424, "learning_rate": 2.2943241284121265e-06, "loss": 0.0908, "step": 555200 }, { "epoch": 5.46, "grad_norm": 9.118029594421387, "learning_rate": 2.294200005957878e-06, "loss": 0.0401, "step": 555225 }, { "epoch": 5.46, "grad_norm": 3.6364386081695557, "learning_rate": 2.2940758835036293e-06, "loss": 0.0974, "step": 555250 }, { "epoch": 5.46, "grad_norm": 10.321684837341309, "learning_rate": 2.293951761049381e-06, "loss": 0.0637, "step": 555275 }, { "epoch": 5.46, "grad_norm": 0.6180789470672607, "learning_rate": 2.2938276385951326e-06, "loss": 0.0709, "step": 555300 }, { "epoch": 5.46, "grad_norm": 10.202478408813477, "learning_rate": 2.2937035161408842e-06, "loss": 0.0461, "step": 555325 }, { "epoch": 5.46, "grad_norm": 3.198599338531494, "learning_rate": 2.293579393686636e-06, "loss": 0.1162, "step": 555350 }, { "epoch": 5.46, "grad_norm": 13.745823860168457, "learning_rate": 2.293455271232387e-06, "loss": 0.0434, "step": 555375 }, { "epoch": 5.46, "grad_norm": 1.3151934146881104, "learning_rate": 2.2933311487781387e-06, "loss": 0.0789, "step": 555400 }, { "epoch": 5.46, "grad_norm": 4.714084625244141, "learning_rate": 2.2932070263238904e-06, "loss": 0.0524, "step": 555425 }, { "epoch": 5.46, "grad_norm": 2.7629175186157227, "learning_rate": 2.2930829038696416e-06, "loss": 0.0852, "step": 555450 }, { "epoch": 5.46, "grad_norm": 20.82301902770996, "learning_rate": 2.292958781415393e-06, "loss": 0.065, "step": 555475 }, { "epoch": 5.46, "grad_norm": 0.1942315250635147, "learning_rate": 2.292834658961145e-06, "loss": 0.0852, "step": 555500 }, { "epoch": 5.46, "grad_norm": 6.699972629547119, "learning_rate": 2.2927105365068965e-06, "loss": 0.0789, "step": 555525 }, { "epoch": 5.46, "grad_norm": 1.4103471040725708, "learning_rate": 2.292586414052648e-06, "loss": 0.0765, "step": 555550 }, { "epoch": 5.46, "grad_norm": 5.679024696350098, "learning_rate": 2.2924622915983997e-06, "loss": 0.0326, "step": 555575 }, { "epoch": 5.46, "grad_norm": 0.2547333538532257, "learning_rate": 2.292338169144151e-06, "loss": 0.0734, "step": 555600 }, { "epoch": 5.46, "grad_norm": 16.53875160217285, "learning_rate": 2.2922140466899026e-06, "loss": 0.0374, "step": 555625 }, { "epoch": 5.46, "grad_norm": 2.7120673656463623, "learning_rate": 2.2920899242356542e-06, "loss": 0.0625, "step": 555650 }, { "epoch": 5.46, "grad_norm": 8.982427597045898, "learning_rate": 2.2919658017814054e-06, "loss": 0.0473, "step": 555675 }, { "epoch": 5.46, "grad_norm": 9.464524269104004, "learning_rate": 2.291841679327157e-06, "loss": 0.1094, "step": 555700 }, { "epoch": 5.46, "grad_norm": 5.5266947746276855, "learning_rate": 2.2917175568729087e-06, "loss": 0.0585, "step": 555725 }, { "epoch": 5.46, "grad_norm": 3.52934193611145, "learning_rate": 2.2915934344186603e-06, "loss": 0.0775, "step": 555750 }, { "epoch": 5.46, "grad_norm": 14.961148262023926, "learning_rate": 2.291469311964412e-06, "loss": 0.0636, "step": 555775 }, { "epoch": 5.46, "grad_norm": 1.4551753997802734, "learning_rate": 2.291345189510163e-06, "loss": 0.0715, "step": 555800 }, { "epoch": 5.46, "grad_norm": 2.166172981262207, "learning_rate": 2.291221067055915e-06, "loss": 0.0421, "step": 555825 }, { "epoch": 5.47, "grad_norm": 0.4543420672416687, "learning_rate": 2.2910969446016665e-06, "loss": 0.0562, "step": 555850 }, { "epoch": 5.47, "grad_norm": 19.357526779174805, "learning_rate": 2.2909728221474177e-06, "loss": 0.0793, "step": 555875 }, { "epoch": 5.47, "grad_norm": 3.8382930755615234, "learning_rate": 2.2908486996931693e-06, "loss": 0.0787, "step": 555900 }, { "epoch": 5.47, "grad_norm": 9.512044906616211, "learning_rate": 2.290724577238921e-06, "loss": 0.0571, "step": 555925 }, { "epoch": 5.47, "grad_norm": 4.427781581878662, "learning_rate": 2.2906004547846726e-06, "loss": 0.0849, "step": 555950 }, { "epoch": 5.47, "grad_norm": 5.7978010177612305, "learning_rate": 2.290476332330424e-06, "loss": 0.0585, "step": 555975 }, { "epoch": 5.47, "grad_norm": 0.22101753950119019, "learning_rate": 2.290352209876176e-06, "loss": 0.1134, "step": 556000 }, { "epoch": 5.47, "grad_norm": 5.89842414855957, "learning_rate": 2.290228087421927e-06, "loss": 0.0436, "step": 556025 }, { "epoch": 5.47, "grad_norm": 2.4882543087005615, "learning_rate": 2.2901039649676787e-06, "loss": 0.0667, "step": 556050 }, { "epoch": 5.47, "grad_norm": 7.099672317504883, "learning_rate": 2.2899798425134303e-06, "loss": 0.0403, "step": 556075 }, { "epoch": 5.47, "grad_norm": 3.0561695098876953, "learning_rate": 2.2898557200591815e-06, "loss": 0.0741, "step": 556100 }, { "epoch": 5.47, "grad_norm": 16.2537784576416, "learning_rate": 2.289731597604933e-06, "loss": 0.0573, "step": 556125 }, { "epoch": 5.47, "grad_norm": 0.23542776703834534, "learning_rate": 2.289607475150685e-06, "loss": 0.0787, "step": 556150 }, { "epoch": 5.47, "grad_norm": 16.089561462402344, "learning_rate": 2.2894833526964364e-06, "loss": 0.0536, "step": 556175 }, { "epoch": 5.47, "grad_norm": 4.968388557434082, "learning_rate": 2.289359230242188e-06, "loss": 0.0872, "step": 556200 }, { "epoch": 5.47, "grad_norm": 8.513317108154297, "learning_rate": 2.2892351077879393e-06, "loss": 0.048, "step": 556225 }, { "epoch": 5.47, "grad_norm": 1.4178117513656616, "learning_rate": 2.289110985333691e-06, "loss": 0.0687, "step": 556250 }, { "epoch": 5.47, "grad_norm": 6.2649149894714355, "learning_rate": 2.2889868628794426e-06, "loss": 0.0381, "step": 556275 }, { "epoch": 5.47, "grad_norm": 5.4266133308410645, "learning_rate": 2.2888627404251938e-06, "loss": 0.0697, "step": 556300 }, { "epoch": 5.47, "grad_norm": 4.926638126373291, "learning_rate": 2.2887386179709454e-06, "loss": 0.041, "step": 556325 }, { "epoch": 5.47, "grad_norm": 5.3021016120910645, "learning_rate": 2.288614495516697e-06, "loss": 0.1112, "step": 556350 }, { "epoch": 5.47, "grad_norm": 12.744074821472168, "learning_rate": 2.2884903730624487e-06, "loss": 0.0469, "step": 556375 }, { "epoch": 5.47, "grad_norm": 4.270826816558838, "learning_rate": 2.2883662506082003e-06, "loss": 0.0852, "step": 556400 }, { "epoch": 5.47, "grad_norm": 4.892338752746582, "learning_rate": 2.288242128153952e-06, "loss": 0.0366, "step": 556425 }, { "epoch": 5.47, "grad_norm": 1.8648805618286133, "learning_rate": 2.288118005699703e-06, "loss": 0.0702, "step": 556450 }, { "epoch": 5.47, "grad_norm": 13.450180053710938, "learning_rate": 2.2879938832454548e-06, "loss": 0.0468, "step": 556475 }, { "epoch": 5.47, "grad_norm": 0.04564831405878067, "learning_rate": 2.2878697607912064e-06, "loss": 0.0761, "step": 556500 }, { "epoch": 5.47, "grad_norm": 9.346415519714355, "learning_rate": 2.2877456383369576e-06, "loss": 0.0484, "step": 556525 }, { "epoch": 5.47, "grad_norm": 1.8396941423416138, "learning_rate": 2.2876215158827093e-06, "loss": 0.1006, "step": 556550 }, { "epoch": 5.47, "grad_norm": 3.797498941421509, "learning_rate": 2.287497393428461e-06, "loss": 0.0702, "step": 556575 }, { "epoch": 5.47, "grad_norm": 5.902445316314697, "learning_rate": 2.2873732709742125e-06, "loss": 0.0841, "step": 556600 }, { "epoch": 5.47, "grad_norm": 10.322344779968262, "learning_rate": 2.287249148519964e-06, "loss": 0.0509, "step": 556625 }, { "epoch": 5.47, "grad_norm": 7.0843119621276855, "learning_rate": 2.2871250260657154e-06, "loss": 0.0815, "step": 556650 }, { "epoch": 5.47, "grad_norm": 12.021787643432617, "learning_rate": 2.287000903611467e-06, "loss": 0.0635, "step": 556675 }, { "epoch": 5.47, "grad_norm": 0.1333490014076233, "learning_rate": 2.2868767811572187e-06, "loss": 0.0918, "step": 556700 }, { "epoch": 5.47, "grad_norm": 7.1953444480896, "learning_rate": 2.2867526587029703e-06, "loss": 0.0389, "step": 556725 }, { "epoch": 5.47, "grad_norm": 0.41721397638320923, "learning_rate": 2.286628536248722e-06, "loss": 0.1049, "step": 556750 }, { "epoch": 5.47, "grad_norm": 25.895732879638672, "learning_rate": 2.2865044137944736e-06, "loss": 0.0495, "step": 556775 }, { "epoch": 5.47, "grad_norm": 2.578716993331909, "learning_rate": 2.2863802913402248e-06, "loss": 0.1123, "step": 556800 }, { "epoch": 5.47, "grad_norm": 12.440814018249512, "learning_rate": 2.2862561688859764e-06, "loss": 0.0521, "step": 556825 }, { "epoch": 5.48, "grad_norm": 2.6886980533599854, "learning_rate": 2.286132046431728e-06, "loss": 0.1134, "step": 556850 }, { "epoch": 5.48, "grad_norm": 9.819201469421387, "learning_rate": 2.2860079239774793e-06, "loss": 0.0484, "step": 556875 }, { "epoch": 5.48, "grad_norm": 1.0659643411636353, "learning_rate": 2.285883801523231e-06, "loss": 0.059, "step": 556900 }, { "epoch": 5.48, "grad_norm": 20.10331153869629, "learning_rate": 2.2857596790689825e-06, "loss": 0.0639, "step": 556925 }, { "epoch": 5.48, "grad_norm": 0.13350152969360352, "learning_rate": 2.285635556614734e-06, "loss": 0.0755, "step": 556950 }, { "epoch": 5.48, "grad_norm": 11.15888500213623, "learning_rate": 2.285511434160486e-06, "loss": 0.0645, "step": 556975 }, { "epoch": 5.48, "grad_norm": 0.01777813211083412, "learning_rate": 2.285387311706237e-06, "loss": 0.0707, "step": 557000 }, { "epoch": 5.48, "grad_norm": 20.309236526489258, "learning_rate": 2.2852631892519886e-06, "loss": 0.0472, "step": 557025 }, { "epoch": 5.48, "grad_norm": 0.5189229249954224, "learning_rate": 2.2851390667977403e-06, "loss": 0.0667, "step": 557050 }, { "epoch": 5.48, "grad_norm": 9.349560737609863, "learning_rate": 2.2850149443434915e-06, "loss": 0.0458, "step": 557075 }, { "epoch": 5.48, "grad_norm": 4.928231716156006, "learning_rate": 2.284890821889243e-06, "loss": 0.0688, "step": 557100 }, { "epoch": 5.48, "grad_norm": 6.130425930023193, "learning_rate": 2.2847666994349948e-06, "loss": 0.0426, "step": 557125 }, { "epoch": 5.48, "grad_norm": 4.239280700683594, "learning_rate": 2.2846425769807464e-06, "loss": 0.0922, "step": 557150 }, { "epoch": 5.48, "grad_norm": 12.930228233337402, "learning_rate": 2.284518454526498e-06, "loss": 0.0619, "step": 557175 }, { "epoch": 5.48, "grad_norm": 0.045153383165597916, "learning_rate": 2.2843943320722497e-06, "loss": 0.0683, "step": 557200 }, { "epoch": 5.48, "grad_norm": 10.576355934143066, "learning_rate": 2.284270209618001e-06, "loss": 0.0439, "step": 557225 }, { "epoch": 5.48, "grad_norm": 0.0742923840880394, "learning_rate": 2.2841460871637525e-06, "loss": 0.0754, "step": 557250 }, { "epoch": 5.48, "grad_norm": 13.957002639770508, "learning_rate": 2.284021964709504e-06, "loss": 0.0421, "step": 557275 }, { "epoch": 5.48, "grad_norm": 9.761736869812012, "learning_rate": 2.2838978422552554e-06, "loss": 0.0811, "step": 557300 }, { "epoch": 5.48, "grad_norm": 14.444089889526367, "learning_rate": 2.283773719801007e-06, "loss": 0.0518, "step": 557325 }, { "epoch": 5.48, "grad_norm": 2.413076639175415, "learning_rate": 2.2836495973467586e-06, "loss": 0.0859, "step": 557350 }, { "epoch": 5.48, "grad_norm": 15.18664264678955, "learning_rate": 2.2835254748925103e-06, "loss": 0.0549, "step": 557375 }, { "epoch": 5.48, "grad_norm": 0.9589723944664001, "learning_rate": 2.283401352438262e-06, "loss": 0.0767, "step": 557400 }, { "epoch": 5.48, "grad_norm": 9.813549995422363, "learning_rate": 2.283277229984013e-06, "loss": 0.0452, "step": 557425 }, { "epoch": 5.48, "grad_norm": 2.9196269512176514, "learning_rate": 2.2831531075297647e-06, "loss": 0.0636, "step": 557450 }, { "epoch": 5.48, "grad_norm": 6.749583721160889, "learning_rate": 2.2830289850755164e-06, "loss": 0.0496, "step": 557475 }, { "epoch": 5.48, "grad_norm": 6.325811386108398, "learning_rate": 2.2829048626212676e-06, "loss": 0.0886, "step": 557500 }, { "epoch": 5.48, "grad_norm": 8.86213493347168, "learning_rate": 2.2827807401670192e-06, "loss": 0.0407, "step": 557525 }, { "epoch": 5.48, "grad_norm": 2.996696949005127, "learning_rate": 2.282656617712771e-06, "loss": 0.0825, "step": 557550 }, { "epoch": 5.48, "grad_norm": 9.242919921875, "learning_rate": 2.2825324952585225e-06, "loss": 0.0748, "step": 557575 }, { "epoch": 5.48, "grad_norm": 5.7553791999816895, "learning_rate": 2.282408372804274e-06, "loss": 0.1061, "step": 557600 }, { "epoch": 5.48, "grad_norm": 13.469170570373535, "learning_rate": 2.2822842503500258e-06, "loss": 0.0795, "step": 557625 }, { "epoch": 5.48, "grad_norm": 0.5933290719985962, "learning_rate": 2.282160127895777e-06, "loss": 0.0813, "step": 557650 }, { "epoch": 5.48, "grad_norm": 20.42399024963379, "learning_rate": 2.2820360054415286e-06, "loss": 0.0602, "step": 557675 }, { "epoch": 5.48, "grad_norm": 5.241450786590576, "learning_rate": 2.2819118829872802e-06, "loss": 0.0618, "step": 557700 }, { "epoch": 5.48, "grad_norm": 10.083463668823242, "learning_rate": 2.2817877605330315e-06, "loss": 0.0505, "step": 557725 }, { "epoch": 5.48, "grad_norm": 1.3363068103790283, "learning_rate": 2.281663638078783e-06, "loss": 0.0814, "step": 557750 }, { "epoch": 5.48, "grad_norm": 14.42351245880127, "learning_rate": 2.2815395156245347e-06, "loss": 0.0515, "step": 557775 }, { "epoch": 5.48, "grad_norm": 3.806405544281006, "learning_rate": 2.281420358068456e-06, "loss": 0.0973, "step": 557800 }, { "epoch": 5.48, "grad_norm": 20.928783416748047, "learning_rate": 2.281296235614208e-06, "loss": 0.0332, "step": 557825 }, { "epoch": 5.48, "grad_norm": 0.01684335246682167, "learning_rate": 2.2811721131599594e-06, "loss": 0.0814, "step": 557850 }, { "epoch": 5.49, "grad_norm": 1.1208508014678955, "learning_rate": 2.281047990705711e-06, "loss": 0.0435, "step": 557875 }, { "epoch": 5.49, "grad_norm": 2.2397782802581787, "learning_rate": 2.2809238682514623e-06, "loss": 0.0742, "step": 557900 }, { "epoch": 5.49, "grad_norm": 16.818241119384766, "learning_rate": 2.280799745797214e-06, "loss": 0.0472, "step": 557925 }, { "epoch": 5.49, "grad_norm": 1.6410917043685913, "learning_rate": 2.2806756233429656e-06, "loss": 0.0819, "step": 557950 }, { "epoch": 5.49, "grad_norm": 8.551397323608398, "learning_rate": 2.2805515008887168e-06, "loss": 0.074, "step": 557975 }, { "epoch": 5.49, "grad_norm": 2.556887626647949, "learning_rate": 2.2804273784344684e-06, "loss": 0.0726, "step": 558000 }, { "epoch": 5.49, "grad_norm": 12.003061294555664, "learning_rate": 2.28030325598022e-06, "loss": 0.0485, "step": 558025 }, { "epoch": 5.49, "grad_norm": 9.506329536437988, "learning_rate": 2.2801791335259717e-06, "loss": 0.0632, "step": 558050 }, { "epoch": 5.49, "grad_norm": 4.0448408126831055, "learning_rate": 2.2800550110717233e-06, "loss": 0.0443, "step": 558075 }, { "epoch": 5.49, "grad_norm": 1.530718207359314, "learning_rate": 2.2799308886174745e-06, "loss": 0.0669, "step": 558100 }, { "epoch": 5.49, "grad_norm": 14.082072257995605, "learning_rate": 2.279806766163226e-06, "loss": 0.0495, "step": 558125 }, { "epoch": 5.49, "grad_norm": 5.414853096008301, "learning_rate": 2.279682643708978e-06, "loss": 0.0895, "step": 558150 }, { "epoch": 5.49, "grad_norm": 11.064483642578125, "learning_rate": 2.279558521254729e-06, "loss": 0.0646, "step": 558175 }, { "epoch": 5.49, "grad_norm": 0.7136454582214355, "learning_rate": 2.2794343988004806e-06, "loss": 0.1197, "step": 558200 }, { "epoch": 5.49, "grad_norm": 3.4878900051116943, "learning_rate": 2.2793102763462323e-06, "loss": 0.049, "step": 558225 }, { "epoch": 5.49, "grad_norm": 4.213468551635742, "learning_rate": 2.279186153891984e-06, "loss": 0.0764, "step": 558250 }, { "epoch": 5.49, "grad_norm": 10.429990768432617, "learning_rate": 2.2790620314377355e-06, "loss": 0.0597, "step": 558275 }, { "epoch": 5.49, "grad_norm": 40.13264083862305, "learning_rate": 2.278937908983487e-06, "loss": 0.1154, "step": 558300 }, { "epoch": 5.49, "grad_norm": 3.8571457862854004, "learning_rate": 2.2788137865292384e-06, "loss": 0.0456, "step": 558325 }, { "epoch": 5.49, "grad_norm": 1.3475736379623413, "learning_rate": 2.27868966407499e-06, "loss": 0.0802, "step": 558350 }, { "epoch": 5.49, "grad_norm": 8.59029769897461, "learning_rate": 2.2785655416207417e-06, "loss": 0.0461, "step": 558375 }, { "epoch": 5.49, "grad_norm": 0.3751082122325897, "learning_rate": 2.278441419166493e-06, "loss": 0.0774, "step": 558400 }, { "epoch": 5.49, "grad_norm": 17.920387268066406, "learning_rate": 2.2783172967122445e-06, "loss": 0.0428, "step": 558425 }, { "epoch": 5.49, "grad_norm": 0.3588126301765442, "learning_rate": 2.278193174257996e-06, "loss": 0.1035, "step": 558450 }, { "epoch": 5.49, "grad_norm": 14.712653160095215, "learning_rate": 2.2780690518037478e-06, "loss": 0.0474, "step": 558475 }, { "epoch": 5.49, "grad_norm": 0.04578183963894844, "learning_rate": 2.2779449293494994e-06, "loss": 0.0708, "step": 558500 }, { "epoch": 5.49, "grad_norm": 12.600252151489258, "learning_rate": 2.2778208068952506e-06, "loss": 0.0318, "step": 558525 }, { "epoch": 5.49, "grad_norm": 3.532215118408203, "learning_rate": 2.2776966844410023e-06, "loss": 0.0758, "step": 558550 }, { "epoch": 5.49, "grad_norm": 8.991350173950195, "learning_rate": 2.277572561986754e-06, "loss": 0.0468, "step": 558575 }, { "epoch": 5.49, "grad_norm": 4.3112688064575195, "learning_rate": 2.277448439532505e-06, "loss": 0.0847, "step": 558600 }, { "epoch": 5.49, "grad_norm": 16.776277542114258, "learning_rate": 2.2773243170782567e-06, "loss": 0.0636, "step": 558625 }, { "epoch": 5.49, "grad_norm": 0.03814195469021797, "learning_rate": 2.2772001946240084e-06, "loss": 0.0779, "step": 558650 }, { "epoch": 5.49, "grad_norm": 13.40699577331543, "learning_rate": 2.27707607216976e-06, "loss": 0.0394, "step": 558675 }, { "epoch": 5.49, "grad_norm": 5.263749122619629, "learning_rate": 2.2769519497155116e-06, "loss": 0.0969, "step": 558700 }, { "epoch": 5.49, "grad_norm": 8.62038516998291, "learning_rate": 2.2768278272612633e-06, "loss": 0.0563, "step": 558725 }, { "epoch": 5.49, "grad_norm": 3.2764434814453125, "learning_rate": 2.2767037048070145e-06, "loss": 0.0788, "step": 558750 }, { "epoch": 5.49, "grad_norm": 11.558778762817383, "learning_rate": 2.276579582352766e-06, "loss": 0.04, "step": 558775 }, { "epoch": 5.49, "grad_norm": 1.9542053937911987, "learning_rate": 2.2764554598985178e-06, "loss": 0.0965, "step": 558800 }, { "epoch": 5.49, "grad_norm": 10.588908195495605, "learning_rate": 2.276331337444269e-06, "loss": 0.0559, "step": 558825 }, { "epoch": 5.49, "grad_norm": 1.4299166202545166, "learning_rate": 2.2762072149900206e-06, "loss": 0.0837, "step": 558850 }, { "epoch": 5.49, "grad_norm": 13.018866539001465, "learning_rate": 2.2760830925357722e-06, "loss": 0.0741, "step": 558875 }, { "epoch": 5.5, "grad_norm": 3.6491103172302246, "learning_rate": 2.275958970081524e-06, "loss": 0.0694, "step": 558900 }, { "epoch": 5.5, "grad_norm": 10.422082901000977, "learning_rate": 2.2758348476272755e-06, "loss": 0.0464, "step": 558925 }, { "epoch": 5.5, "grad_norm": 0.04182844236493111, "learning_rate": 2.2757107251730267e-06, "loss": 0.0538, "step": 558950 }, { "epoch": 5.5, "grad_norm": 7.581455230712891, "learning_rate": 2.2755866027187784e-06, "loss": 0.0421, "step": 558975 }, { "epoch": 5.5, "grad_norm": 0.2461751103401184, "learning_rate": 2.27546248026453e-06, "loss": 0.0679, "step": 559000 }, { "epoch": 5.5, "grad_norm": 8.213617324829102, "learning_rate": 2.275338357810281e-06, "loss": 0.0528, "step": 559025 }, { "epoch": 5.5, "grad_norm": 0.6500251293182373, "learning_rate": 2.275214235356033e-06, "loss": 0.0623, "step": 559050 }, { "epoch": 5.5, "grad_norm": 7.9503631591796875, "learning_rate": 2.2750901129017845e-06, "loss": 0.0409, "step": 559075 }, { "epoch": 5.5, "grad_norm": 2.666675567626953, "learning_rate": 2.274965990447536e-06, "loss": 0.0601, "step": 559100 }, { "epoch": 5.5, "grad_norm": 14.10339069366455, "learning_rate": 2.2748418679932877e-06, "loss": 0.0616, "step": 559125 }, { "epoch": 5.5, "grad_norm": 0.6857892274856567, "learning_rate": 2.2747177455390394e-06, "loss": 0.1003, "step": 559150 }, { "epoch": 5.5, "grad_norm": 5.850159645080566, "learning_rate": 2.2745936230847906e-06, "loss": 0.05, "step": 559175 }, { "epoch": 5.5, "grad_norm": 2.0373144149780273, "learning_rate": 2.2744695006305422e-06, "loss": 0.1013, "step": 559200 }, { "epoch": 5.5, "grad_norm": 5.807002067565918, "learning_rate": 2.274345378176294e-06, "loss": 0.0383, "step": 559225 }, { "epoch": 5.5, "grad_norm": 0.6159390807151794, "learning_rate": 2.274221255722045e-06, "loss": 0.0859, "step": 559250 }, { "epoch": 5.5, "grad_norm": 7.952756404876709, "learning_rate": 2.2740971332677967e-06, "loss": 0.0718, "step": 559275 }, { "epoch": 5.5, "grad_norm": 2.1344192028045654, "learning_rate": 2.2739730108135483e-06, "loss": 0.0667, "step": 559300 }, { "epoch": 5.5, "grad_norm": 11.818041801452637, "learning_rate": 2.2738488883593e-06, "loss": 0.0656, "step": 559325 }, { "epoch": 5.5, "grad_norm": 3.5760016441345215, "learning_rate": 2.2737247659050516e-06, "loss": 0.0929, "step": 559350 }, { "epoch": 5.5, "grad_norm": 11.901744842529297, "learning_rate": 2.273600643450803e-06, "loss": 0.0658, "step": 559375 }, { "epoch": 5.5, "grad_norm": 4.2869720458984375, "learning_rate": 2.2734765209965545e-06, "loss": 0.0913, "step": 559400 }, { "epoch": 5.5, "grad_norm": 10.998351097106934, "learning_rate": 2.273352398542306e-06, "loss": 0.0538, "step": 559425 }, { "epoch": 5.5, "grad_norm": 1.4159644842147827, "learning_rate": 2.2732282760880573e-06, "loss": 0.0799, "step": 559450 }, { "epoch": 5.5, "grad_norm": 8.210613250732422, "learning_rate": 2.273104153633809e-06, "loss": 0.0653, "step": 559475 }, { "epoch": 5.5, "grad_norm": 1.524701476097107, "learning_rate": 2.2729800311795606e-06, "loss": 0.0714, "step": 559500 }, { "epoch": 5.5, "grad_norm": 13.802842140197754, "learning_rate": 2.2728559087253122e-06, "loss": 0.0568, "step": 559525 }, { "epoch": 5.5, "grad_norm": 7.363801002502441, "learning_rate": 2.272731786271064e-06, "loss": 0.0751, "step": 559550 }, { "epoch": 5.5, "grad_norm": 5.951483249664307, "learning_rate": 2.2726076638168155e-06, "loss": 0.0507, "step": 559575 }, { "epoch": 5.5, "grad_norm": 0.6791064143180847, "learning_rate": 2.2724835413625667e-06, "loss": 0.1038, "step": 559600 }, { "epoch": 5.5, "grad_norm": 14.519061088562012, "learning_rate": 2.2723594189083183e-06, "loss": 0.0659, "step": 559625 }, { "epoch": 5.5, "grad_norm": 0.14167623221874237, "learning_rate": 2.27223529645407e-06, "loss": 0.0796, "step": 559650 }, { "epoch": 5.5, "grad_norm": 9.473336219787598, "learning_rate": 2.2721111739998216e-06, "loss": 0.0583, "step": 559675 }, { "epoch": 5.5, "grad_norm": 1.801247239112854, "learning_rate": 2.2719870515455732e-06, "loss": 0.1002, "step": 559700 }, { "epoch": 5.5, "grad_norm": 19.33139419555664, "learning_rate": 2.2718629290913244e-06, "loss": 0.0367, "step": 559725 }, { "epoch": 5.5, "grad_norm": 0.50077885389328, "learning_rate": 2.271738806637076e-06, "loss": 0.0728, "step": 559750 }, { "epoch": 5.5, "grad_norm": 14.173636436462402, "learning_rate": 2.2716146841828277e-06, "loss": 0.0541, "step": 559775 }, { "epoch": 5.5, "grad_norm": 3.53104829788208, "learning_rate": 2.271490561728579e-06, "loss": 0.0846, "step": 559800 }, { "epoch": 5.5, "grad_norm": 14.324981689453125, "learning_rate": 2.2713664392743306e-06, "loss": 0.0635, "step": 559825 }, { "epoch": 5.5, "grad_norm": 1.7340672016143799, "learning_rate": 2.271242316820082e-06, "loss": 0.0678, "step": 559850 }, { "epoch": 5.5, "grad_norm": 17.3398494720459, "learning_rate": 2.271118194365834e-06, "loss": 0.0522, "step": 559875 }, { "epoch": 5.51, "grad_norm": 1.5906906127929688, "learning_rate": 2.2709940719115855e-06, "loss": 0.0635, "step": 559900 }, { "epoch": 5.51, "grad_norm": 13.622804641723633, "learning_rate": 2.270869949457337e-06, "loss": 0.0461, "step": 559925 }, { "epoch": 5.51, "grad_norm": 0.2946966588497162, "learning_rate": 2.2707458270030883e-06, "loss": 0.0763, "step": 559950 }, { "epoch": 5.51, "grad_norm": 13.450350761413574, "learning_rate": 2.27062170454884e-06, "loss": 0.0537, "step": 559975 }, { "epoch": 5.51, "grad_norm": 4.460964202880859, "learning_rate": 2.2704975820945916e-06, "loss": 0.0963, "step": 560000 }, { "epoch": 5.51, "eval_loss": 0.7847653031349182, "eval_runtime": 6131.1025, "eval_samples_per_second": 1.544, "eval_steps_per_second": 0.193, "eval_wer": 0.11573746217729994, "step": 560000 }, { "epoch": 5.51, "grad_norm": 10.904921531677246, "learning_rate": 2.270373459640343e-06, "loss": 0.0499, "step": 560025 }, { "epoch": 5.51, "grad_norm": 3.9054338932037354, "learning_rate": 2.2702493371860944e-06, "loss": 0.0926, "step": 560050 }, { "epoch": 5.51, "grad_norm": 9.796503067016602, "learning_rate": 2.270125214731846e-06, "loss": 0.0697, "step": 560075 }, { "epoch": 5.51, "grad_norm": 4.898519039154053, "learning_rate": 2.2700010922775977e-06, "loss": 0.0963, "step": 560100 }, { "epoch": 5.51, "grad_norm": 5.6550068855285645, "learning_rate": 2.2698769698233493e-06, "loss": 0.0445, "step": 560125 }, { "epoch": 5.51, "grad_norm": 1.2533421516418457, "learning_rate": 2.2697528473691005e-06, "loss": 0.1132, "step": 560150 }, { "epoch": 5.51, "grad_norm": 10.206337928771973, "learning_rate": 2.269628724914852e-06, "loss": 0.0357, "step": 560175 }, { "epoch": 5.51, "grad_norm": 5.4417338371276855, "learning_rate": 2.2695095673587736e-06, "loss": 0.1217, "step": 560200 }, { "epoch": 5.51, "grad_norm": 6.52938175201416, "learning_rate": 2.2693854449045253e-06, "loss": 0.0456, "step": 560225 }, { "epoch": 5.51, "grad_norm": 4.502805709838867, "learning_rate": 2.269261322450277e-06, "loss": 0.0562, "step": 560250 }, { "epoch": 5.51, "grad_norm": 6.5247063636779785, "learning_rate": 2.269137199996028e-06, "loss": 0.0772, "step": 560275 }, { "epoch": 5.51, "grad_norm": 0.5584306716918945, "learning_rate": 2.2690130775417797e-06, "loss": 0.0712, "step": 560300 }, { "epoch": 5.51, "grad_norm": 10.328300476074219, "learning_rate": 2.2688889550875314e-06, "loss": 0.0651, "step": 560325 }, { "epoch": 5.51, "grad_norm": 0.026139216497540474, "learning_rate": 2.268764832633283e-06, "loss": 0.0713, "step": 560350 }, { "epoch": 5.51, "grad_norm": 3.123450517654419, "learning_rate": 2.2686407101790347e-06, "loss": 0.0518, "step": 560375 }, { "epoch": 5.51, "grad_norm": 0.10053666681051254, "learning_rate": 2.268516587724786e-06, "loss": 0.0783, "step": 560400 }, { "epoch": 5.51, "grad_norm": 6.681497097015381, "learning_rate": 2.2683924652705375e-06, "loss": 0.065, "step": 560425 }, { "epoch": 5.51, "grad_norm": 0.140524759888649, "learning_rate": 2.268268342816289e-06, "loss": 0.0914, "step": 560450 }, { "epoch": 5.51, "grad_norm": 10.243101119995117, "learning_rate": 2.2681442203620403e-06, "loss": 0.0733, "step": 560475 }, { "epoch": 5.51, "grad_norm": 0.24169579148292542, "learning_rate": 2.268020097907792e-06, "loss": 0.0771, "step": 560500 }, { "epoch": 5.51, "grad_norm": 7.129365921020508, "learning_rate": 2.2678959754535436e-06, "loss": 0.0431, "step": 560525 }, { "epoch": 5.51, "grad_norm": 1.851527214050293, "learning_rate": 2.2677718529992953e-06, "loss": 0.0658, "step": 560550 }, { "epoch": 5.51, "grad_norm": 4.809375762939453, "learning_rate": 2.267647730545047e-06, "loss": 0.054, "step": 560575 }, { "epoch": 5.51, "grad_norm": 0.05280286446213722, "learning_rate": 2.2675236080907985e-06, "loss": 0.0786, "step": 560600 }, { "epoch": 5.51, "grad_norm": 6.711270809173584, "learning_rate": 2.2673994856365497e-06, "loss": 0.0477, "step": 560625 }, { "epoch": 5.51, "grad_norm": 4.013094425201416, "learning_rate": 2.2672753631823014e-06, "loss": 0.0959, "step": 560650 }, { "epoch": 5.51, "grad_norm": 7.78214168548584, "learning_rate": 2.267151240728053e-06, "loss": 0.0663, "step": 560675 }, { "epoch": 5.51, "grad_norm": 3.4772703647613525, "learning_rate": 2.2670271182738042e-06, "loss": 0.0661, "step": 560700 }, { "epoch": 5.51, "grad_norm": 12.23577880859375, "learning_rate": 2.266902995819556e-06, "loss": 0.0438, "step": 560725 }, { "epoch": 5.51, "grad_norm": 0.898360550403595, "learning_rate": 2.2667788733653075e-06, "loss": 0.0784, "step": 560750 }, { "epoch": 5.51, "grad_norm": 8.551321029663086, "learning_rate": 2.266654750911059e-06, "loss": 0.0681, "step": 560775 }, { "epoch": 5.51, "grad_norm": 37.47381591796875, "learning_rate": 2.2665306284568108e-06, "loss": 0.093, "step": 560800 }, { "epoch": 5.51, "grad_norm": 6.341025352478027, "learning_rate": 2.266406506002562e-06, "loss": 0.0583, "step": 560825 }, { "epoch": 5.51, "grad_norm": 5.167139053344727, "learning_rate": 2.2662823835483136e-06, "loss": 0.0888, "step": 560850 }, { "epoch": 5.51, "grad_norm": 11.07844066619873, "learning_rate": 2.2661582610940652e-06, "loss": 0.0529, "step": 560875 }, { "epoch": 5.51, "grad_norm": 2.7989110946655273, "learning_rate": 2.2660341386398164e-06, "loss": 0.074, "step": 560900 }, { "epoch": 5.52, "grad_norm": 7.355566024780273, "learning_rate": 2.265910016185568e-06, "loss": 0.0407, "step": 560925 }, { "epoch": 5.52, "grad_norm": 43.16305923461914, "learning_rate": 2.2657858937313197e-06, "loss": 0.0753, "step": 560950 }, { "epoch": 5.52, "grad_norm": 12.360092163085938, "learning_rate": 2.2656617712770714e-06, "loss": 0.0499, "step": 560975 }, { "epoch": 5.52, "grad_norm": 6.773451805114746, "learning_rate": 2.265537648822823e-06, "loss": 0.0807, "step": 561000 }, { "epoch": 5.52, "grad_norm": 8.714288711547852, "learning_rate": 2.2654135263685746e-06, "loss": 0.0517, "step": 561025 }, { "epoch": 5.52, "grad_norm": 7.9127984046936035, "learning_rate": 2.265289403914326e-06, "loss": 0.0731, "step": 561050 }, { "epoch": 5.52, "grad_norm": 17.726896286010742, "learning_rate": 2.2651652814600775e-06, "loss": 0.0538, "step": 561075 }, { "epoch": 5.52, "grad_norm": 3.1354544162750244, "learning_rate": 2.265041159005829e-06, "loss": 0.0737, "step": 561100 }, { "epoch": 5.52, "grad_norm": 10.54277229309082, "learning_rate": 2.2649170365515803e-06, "loss": 0.0432, "step": 561125 }, { "epoch": 5.52, "grad_norm": 12.004859924316406, "learning_rate": 2.264792914097332e-06, "loss": 0.0891, "step": 561150 }, { "epoch": 5.52, "grad_norm": 15.290194511413574, "learning_rate": 2.2646687916430836e-06, "loss": 0.0661, "step": 561175 }, { "epoch": 5.52, "grad_norm": 0.03440745547413826, "learning_rate": 2.2645446691888352e-06, "loss": 0.0863, "step": 561200 }, { "epoch": 5.52, "grad_norm": 16.848468780517578, "learning_rate": 2.264420546734587e-06, "loss": 0.0394, "step": 561225 }, { "epoch": 5.52, "grad_norm": 0.8584825992584229, "learning_rate": 2.264296424280338e-06, "loss": 0.0796, "step": 561250 }, { "epoch": 5.52, "grad_norm": 17.76150894165039, "learning_rate": 2.2641723018260897e-06, "loss": 0.0473, "step": 561275 }, { "epoch": 5.52, "grad_norm": 0.14726151525974274, "learning_rate": 2.2640481793718413e-06, "loss": 0.0541, "step": 561300 }, { "epoch": 5.52, "grad_norm": 12.100648880004883, "learning_rate": 2.2639240569175925e-06, "loss": 0.0377, "step": 561325 }, { "epoch": 5.52, "grad_norm": 2.9453842639923096, "learning_rate": 2.263799934463344e-06, "loss": 0.0798, "step": 561350 }, { "epoch": 5.52, "grad_norm": 10.000086784362793, "learning_rate": 2.263675812009096e-06, "loss": 0.0366, "step": 561375 }, { "epoch": 5.52, "grad_norm": 2.76674747467041, "learning_rate": 2.2635516895548475e-06, "loss": 0.0868, "step": 561400 }, { "epoch": 5.52, "grad_norm": 10.101680755615234, "learning_rate": 2.263427567100599e-06, "loss": 0.0709, "step": 561425 }, { "epoch": 5.52, "grad_norm": 1.796608805656433, "learning_rate": 2.2633034446463507e-06, "loss": 0.0646, "step": 561450 }, { "epoch": 5.52, "grad_norm": 16.741085052490234, "learning_rate": 2.263179322192102e-06, "loss": 0.0441, "step": 561475 }, { "epoch": 5.52, "grad_norm": 0.5369470715522766, "learning_rate": 2.2630551997378536e-06, "loss": 0.0684, "step": 561500 }, { "epoch": 5.52, "grad_norm": 5.63537073135376, "learning_rate": 2.262931077283605e-06, "loss": 0.0396, "step": 561525 }, { "epoch": 5.52, "grad_norm": 1.7547836303710938, "learning_rate": 2.2628069548293564e-06, "loss": 0.0998, "step": 561550 }, { "epoch": 5.52, "grad_norm": 7.947826862335205, "learning_rate": 2.262682832375108e-06, "loss": 0.0607, "step": 561575 }, { "epoch": 5.52, "grad_norm": 4.79409646987915, "learning_rate": 2.2625587099208597e-06, "loss": 0.0743, "step": 561600 }, { "epoch": 5.52, "grad_norm": 10.463824272155762, "learning_rate": 2.2624345874666113e-06, "loss": 0.0524, "step": 561625 }, { "epoch": 5.52, "grad_norm": 6.942837238311768, "learning_rate": 2.262310465012363e-06, "loss": 0.0726, "step": 561650 }, { "epoch": 5.52, "grad_norm": 12.55427360534668, "learning_rate": 2.262186342558114e-06, "loss": 0.0573, "step": 561675 }, { "epoch": 5.52, "grad_norm": 0.5432572960853577, "learning_rate": 2.262062220103866e-06, "loss": 0.0731, "step": 561700 }, { "epoch": 5.52, "grad_norm": 4.648562431335449, "learning_rate": 2.2619380976496174e-06, "loss": 0.0437, "step": 561725 }, { "epoch": 5.52, "grad_norm": 0.5916716456413269, "learning_rate": 2.2618139751953686e-06, "loss": 0.0614, "step": 561750 }, { "epoch": 5.52, "grad_norm": 13.882696151733398, "learning_rate": 2.2616898527411203e-06, "loss": 0.0617, "step": 561775 }, { "epoch": 5.52, "grad_norm": 0.06801516562700272, "learning_rate": 2.261565730286872e-06, "loss": 0.0942, "step": 561800 }, { "epoch": 5.52, "grad_norm": 9.303658485412598, "learning_rate": 2.2614416078326236e-06, "loss": 0.0416, "step": 561825 }, { "epoch": 5.52, "grad_norm": 6.347496509552002, "learning_rate": 2.261317485378375e-06, "loss": 0.0805, "step": 561850 }, { "epoch": 5.52, "grad_norm": 8.215450286865234, "learning_rate": 2.261193362924127e-06, "loss": 0.0731, "step": 561875 }, { "epoch": 5.52, "grad_norm": 5.900129318237305, "learning_rate": 2.261069240469878e-06, "loss": 0.1211, "step": 561900 }, { "epoch": 5.52, "grad_norm": 6.841125965118408, "learning_rate": 2.2609451180156297e-06, "loss": 0.0393, "step": 561925 }, { "epoch": 5.53, "grad_norm": 0.31637871265411377, "learning_rate": 2.2608209955613813e-06, "loss": 0.0858, "step": 561950 }, { "epoch": 5.53, "grad_norm": 8.633883476257324, "learning_rate": 2.2606968731071325e-06, "loss": 0.0429, "step": 561975 }, { "epoch": 5.53, "grad_norm": 6.65751838684082, "learning_rate": 2.260572750652884e-06, "loss": 0.0683, "step": 562000 }, { "epoch": 5.53, "grad_norm": 19.43075942993164, "learning_rate": 2.2604486281986358e-06, "loss": 0.0609, "step": 562025 }, { "epoch": 5.53, "grad_norm": 4.145364284515381, "learning_rate": 2.2603245057443874e-06, "loss": 0.0952, "step": 562050 }, { "epoch": 5.53, "grad_norm": 16.416349411010742, "learning_rate": 2.260200383290139e-06, "loss": 0.0491, "step": 562075 }, { "epoch": 5.53, "grad_norm": 18.551830291748047, "learning_rate": 2.2600762608358903e-06, "loss": 0.073, "step": 562100 }, { "epoch": 5.53, "grad_norm": 6.536586761474609, "learning_rate": 2.259952138381642e-06, "loss": 0.0659, "step": 562125 }, { "epoch": 5.53, "grad_norm": 0.48807796835899353, "learning_rate": 2.2598280159273935e-06, "loss": 0.0973, "step": 562150 }, { "epoch": 5.53, "grad_norm": 12.76976490020752, "learning_rate": 2.2597038934731447e-06, "loss": 0.0625, "step": 562175 }, { "epoch": 5.53, "grad_norm": 6.545858383178711, "learning_rate": 2.2595797710188964e-06, "loss": 0.0887, "step": 562200 }, { "epoch": 5.53, "grad_norm": 14.920260429382324, "learning_rate": 2.259455648564648e-06, "loss": 0.0611, "step": 562225 }, { "epoch": 5.53, "grad_norm": 3.2427868843078613, "learning_rate": 2.2593315261103997e-06, "loss": 0.0685, "step": 562250 }, { "epoch": 5.53, "grad_norm": 11.430695533752441, "learning_rate": 2.2592074036561513e-06, "loss": 0.0426, "step": 562275 }, { "epoch": 5.53, "grad_norm": 2.940047025680542, "learning_rate": 2.259083281201903e-06, "loss": 0.0624, "step": 562300 }, { "epoch": 5.53, "grad_norm": 12.388772964477539, "learning_rate": 2.258959158747654e-06, "loss": 0.0608, "step": 562325 }, { "epoch": 5.53, "grad_norm": 1.7192219495773315, "learning_rate": 2.2588350362934058e-06, "loss": 0.1024, "step": 562350 }, { "epoch": 5.53, "grad_norm": 5.8416948318481445, "learning_rate": 2.2587109138391574e-06, "loss": 0.0545, "step": 562375 }, { "epoch": 5.53, "grad_norm": 1.0481622219085693, "learning_rate": 2.258586791384909e-06, "loss": 0.0868, "step": 562400 }, { "epoch": 5.53, "grad_norm": 8.873424530029297, "learning_rate": 2.2584626689306603e-06, "loss": 0.0478, "step": 562425 }, { "epoch": 5.53, "grad_norm": 1.50017249584198, "learning_rate": 2.258338546476412e-06, "loss": 0.0909, "step": 562450 }, { "epoch": 5.53, "grad_norm": 6.542482376098633, "learning_rate": 2.2582144240221635e-06, "loss": 0.0445, "step": 562475 }, { "epoch": 5.53, "grad_norm": 5.451110363006592, "learning_rate": 2.258090301567915e-06, "loss": 0.0939, "step": 562500 }, { "epoch": 5.53, "grad_norm": 7.0787529945373535, "learning_rate": 2.2579661791136664e-06, "loss": 0.0509, "step": 562525 }, { "epoch": 5.53, "grad_norm": 0.2752423584461212, "learning_rate": 2.257842056659418e-06, "loss": 0.0954, "step": 562550 }, { "epoch": 5.53, "grad_norm": 12.198101997375488, "learning_rate": 2.2577179342051696e-06, "loss": 0.0464, "step": 562575 }, { "epoch": 5.53, "grad_norm": 1.4866007566452026, "learning_rate": 2.2575938117509213e-06, "loss": 0.0901, "step": 562600 }, { "epoch": 5.53, "grad_norm": 9.905811309814453, "learning_rate": 2.257469689296673e-06, "loss": 0.0565, "step": 562625 }, { "epoch": 5.53, "grad_norm": 4.784850120544434, "learning_rate": 2.2573455668424245e-06, "loss": 0.0737, "step": 562650 }, { "epoch": 5.53, "grad_norm": 3.0023064613342285, "learning_rate": 2.2572214443881758e-06, "loss": 0.0504, "step": 562675 }, { "epoch": 5.53, "grad_norm": 0.3260493576526642, "learning_rate": 2.2570973219339274e-06, "loss": 0.0661, "step": 562700 }, { "epoch": 5.53, "grad_norm": 8.368227005004883, "learning_rate": 2.256973199479679e-06, "loss": 0.0674, "step": 562725 }, { "epoch": 5.53, "grad_norm": 0.9179393649101257, "learning_rate": 2.2568540419236005e-06, "loss": 0.0913, "step": 562750 }, { "epoch": 5.53, "grad_norm": 3.516834259033203, "learning_rate": 2.2567299194693517e-06, "loss": 0.0621, "step": 562775 }, { "epoch": 5.53, "grad_norm": 1.962911605834961, "learning_rate": 2.2566057970151033e-06, "loss": 0.0748, "step": 562800 }, { "epoch": 5.53, "grad_norm": 11.42750358581543, "learning_rate": 2.256481674560855e-06, "loss": 0.0524, "step": 562825 }, { "epoch": 5.53, "grad_norm": 9.36510181427002, "learning_rate": 2.2563575521066066e-06, "loss": 0.1031, "step": 562850 }, { "epoch": 5.53, "grad_norm": 7.870849132537842, "learning_rate": 2.2562334296523582e-06, "loss": 0.0499, "step": 562875 }, { "epoch": 5.53, "grad_norm": 5.586836814880371, "learning_rate": 2.25610930719811e-06, "loss": 0.0711, "step": 562900 }, { "epoch": 5.53, "grad_norm": 7.259192943572998, "learning_rate": 2.255985184743861e-06, "loss": 0.0561, "step": 562925 }, { "epoch": 5.54, "grad_norm": 1.5526401996612549, "learning_rate": 2.2558610622896127e-06, "loss": 0.0792, "step": 562950 }, { "epoch": 5.54, "grad_norm": 10.415852546691895, "learning_rate": 2.2557369398353643e-06, "loss": 0.0304, "step": 562975 }, { "epoch": 5.54, "grad_norm": 3.9492125511169434, "learning_rate": 2.2556128173811156e-06, "loss": 0.09, "step": 563000 }, { "epoch": 5.54, "grad_norm": 5.623780727386475, "learning_rate": 2.255488694926867e-06, "loss": 0.0398, "step": 563025 }, { "epoch": 5.54, "grad_norm": 1.2503571510314941, "learning_rate": 2.255364572472619e-06, "loss": 0.1019, "step": 563050 }, { "epoch": 5.54, "grad_norm": 5.207643508911133, "learning_rate": 2.2552404500183705e-06, "loss": 0.03, "step": 563075 }, { "epoch": 5.54, "grad_norm": 2.7885026931762695, "learning_rate": 2.255116327564122e-06, "loss": 0.1104, "step": 563100 }, { "epoch": 5.54, "grad_norm": 10.875645637512207, "learning_rate": 2.2549922051098733e-06, "loss": 0.0525, "step": 563125 }, { "epoch": 5.54, "grad_norm": 6.2962751388549805, "learning_rate": 2.254868082655625e-06, "loss": 0.0873, "step": 563150 }, { "epoch": 5.54, "grad_norm": 12.486471176147461, "learning_rate": 2.2547439602013766e-06, "loss": 0.0504, "step": 563175 }, { "epoch": 5.54, "grad_norm": 0.16584375500679016, "learning_rate": 2.2546198377471278e-06, "loss": 0.104, "step": 563200 }, { "epoch": 5.54, "grad_norm": 2.337651014328003, "learning_rate": 2.2544957152928794e-06, "loss": 0.0518, "step": 563225 }, { "epoch": 5.54, "grad_norm": 1.0957551002502441, "learning_rate": 2.254371592838631e-06, "loss": 0.0892, "step": 563250 }, { "epoch": 5.54, "grad_norm": 10.448020935058594, "learning_rate": 2.2542474703843827e-06, "loss": 0.0551, "step": 563275 }, { "epoch": 5.54, "grad_norm": 6.037144660949707, "learning_rate": 2.2541233479301343e-06, "loss": 0.0876, "step": 563300 }, { "epoch": 5.54, "grad_norm": 2.2043066024780273, "learning_rate": 2.253999225475886e-06, "loss": 0.0652, "step": 563325 }, { "epoch": 5.54, "grad_norm": 1.3914294242858887, "learning_rate": 2.253875103021637e-06, "loss": 0.0801, "step": 563350 }, { "epoch": 5.54, "grad_norm": 9.466754913330078, "learning_rate": 2.253750980567389e-06, "loss": 0.0541, "step": 563375 }, { "epoch": 5.54, "grad_norm": 1.311716079711914, "learning_rate": 2.2536268581131404e-06, "loss": 0.087, "step": 563400 }, { "epoch": 5.54, "grad_norm": 16.736175537109375, "learning_rate": 2.2535027356588917e-06, "loss": 0.0528, "step": 563425 }, { "epoch": 5.54, "grad_norm": 8.040536880493164, "learning_rate": 2.2533786132046433e-06, "loss": 0.1015, "step": 563450 }, { "epoch": 5.54, "grad_norm": 8.868186950683594, "learning_rate": 2.253254490750395e-06, "loss": 0.0609, "step": 563475 }, { "epoch": 5.54, "grad_norm": 0.39324870705604553, "learning_rate": 2.2531303682961466e-06, "loss": 0.1057, "step": 563500 }, { "epoch": 5.54, "grad_norm": 16.2302303314209, "learning_rate": 2.253006245841898e-06, "loss": 0.0759, "step": 563525 }, { "epoch": 5.54, "grad_norm": 4.32024621963501, "learning_rate": 2.2528821233876494e-06, "loss": 0.1012, "step": 563550 }, { "epoch": 5.54, "grad_norm": 13.994118690490723, "learning_rate": 2.252758000933401e-06, "loss": 0.0552, "step": 563575 }, { "epoch": 5.54, "grad_norm": 0.4754157066345215, "learning_rate": 2.2526338784791527e-06, "loss": 0.076, "step": 563600 }, { "epoch": 5.54, "grad_norm": 11.43466854095459, "learning_rate": 2.252509756024904e-06, "loss": 0.0507, "step": 563625 }, { "epoch": 5.54, "grad_norm": 1.1670095920562744, "learning_rate": 2.2523856335706555e-06, "loss": 0.0843, "step": 563650 }, { "epoch": 5.54, "grad_norm": 4.6252031326293945, "learning_rate": 2.252261511116407e-06, "loss": 0.0355, "step": 563675 }, { "epoch": 5.54, "grad_norm": 0.6327881217002869, "learning_rate": 2.252137388662159e-06, "loss": 0.1119, "step": 563700 }, { "epoch": 5.54, "grad_norm": 15.829794883728027, "learning_rate": 2.2520132662079104e-06, "loss": 0.061, "step": 563725 }, { "epoch": 5.54, "grad_norm": 0.4311290979385376, "learning_rate": 2.251889143753662e-06, "loss": 0.0765, "step": 563750 }, { "epoch": 5.54, "grad_norm": 5.561509132385254, "learning_rate": 2.2517650212994133e-06, "loss": 0.0405, "step": 563775 }, { "epoch": 5.54, "grad_norm": 3.538607597351074, "learning_rate": 2.251640898845165e-06, "loss": 0.1003, "step": 563800 }, { "epoch": 5.54, "grad_norm": 8.942222595214844, "learning_rate": 2.2515167763909165e-06, "loss": 0.061, "step": 563825 }, { "epoch": 5.54, "grad_norm": 0.02230742573738098, "learning_rate": 2.2513926539366678e-06, "loss": 0.0536, "step": 563850 }, { "epoch": 5.54, "grad_norm": 12.35419750213623, "learning_rate": 2.2512685314824194e-06, "loss": 0.053, "step": 563875 }, { "epoch": 5.54, "grad_norm": 0.382297158241272, "learning_rate": 2.251144409028171e-06, "loss": 0.096, "step": 563900 }, { "epoch": 5.54, "grad_norm": 2.3984732627868652, "learning_rate": 2.2510202865739227e-06, "loss": 0.0447, "step": 563925 }, { "epoch": 5.54, "grad_norm": 9.012277603149414, "learning_rate": 2.2508961641196743e-06, "loss": 0.0823, "step": 563950 }, { "epoch": 5.55, "grad_norm": 11.758251190185547, "learning_rate": 2.2507720416654255e-06, "loss": 0.0533, "step": 563975 }, { "epoch": 5.55, "grad_norm": 2.9760231971740723, "learning_rate": 2.250647919211177e-06, "loss": 0.0814, "step": 564000 }, { "epoch": 5.55, "grad_norm": 0.4388998746871948, "learning_rate": 2.2505237967569288e-06, "loss": 0.0363, "step": 564025 }, { "epoch": 5.55, "grad_norm": 3.9268124103546143, "learning_rate": 2.25039967430268e-06, "loss": 0.0767, "step": 564050 }, { "epoch": 5.55, "grad_norm": 8.089261054992676, "learning_rate": 2.2502755518484316e-06, "loss": 0.0496, "step": 564075 }, { "epoch": 5.55, "grad_norm": 0.1941232979297638, "learning_rate": 2.2501514293941833e-06, "loss": 0.081, "step": 564100 }, { "epoch": 5.55, "grad_norm": 15.97840690612793, "learning_rate": 2.250027306939935e-06, "loss": 0.0474, "step": 564125 }, { "epoch": 5.55, "grad_norm": 0.08666666597127914, "learning_rate": 2.2499031844856865e-06, "loss": 0.0799, "step": 564150 }, { "epoch": 5.55, "grad_norm": 1.2459475994110107, "learning_rate": 2.249779062031438e-06, "loss": 0.0577, "step": 564175 }, { "epoch": 5.55, "grad_norm": 8.826072692871094, "learning_rate": 2.2496549395771894e-06, "loss": 0.0837, "step": 564200 }, { "epoch": 5.55, "grad_norm": 12.564715385437012, "learning_rate": 2.249530817122941e-06, "loss": 0.0606, "step": 564225 }, { "epoch": 5.55, "grad_norm": 9.888723373413086, "learning_rate": 2.2494066946686926e-06, "loss": 0.0982, "step": 564250 }, { "epoch": 5.55, "grad_norm": 9.898591995239258, "learning_rate": 2.249282572214444e-06, "loss": 0.0433, "step": 564275 }, { "epoch": 5.55, "grad_norm": 0.15831901133060455, "learning_rate": 2.2491584497601955e-06, "loss": 0.0741, "step": 564300 }, { "epoch": 5.55, "grad_norm": 17.37645721435547, "learning_rate": 2.249034327305947e-06, "loss": 0.0416, "step": 564325 }, { "epoch": 5.55, "grad_norm": 1.2873878479003906, "learning_rate": 2.2489102048516988e-06, "loss": 0.0849, "step": 564350 }, { "epoch": 5.55, "grad_norm": 11.067401885986328, "learning_rate": 2.2487860823974504e-06, "loss": 0.0484, "step": 564375 }, { "epoch": 5.55, "grad_norm": 5.067242622375488, "learning_rate": 2.2486619599432016e-06, "loss": 0.0635, "step": 564400 }, { "epoch": 5.55, "grad_norm": 10.54462718963623, "learning_rate": 2.2485378374889532e-06, "loss": 0.053, "step": 564425 }, { "epoch": 5.55, "grad_norm": 6.618411540985107, "learning_rate": 2.248413715034705e-06, "loss": 0.081, "step": 564450 }, { "epoch": 5.55, "grad_norm": 14.308606147766113, "learning_rate": 2.248289592580456e-06, "loss": 0.051, "step": 564475 }, { "epoch": 5.55, "grad_norm": 151.23924255371094, "learning_rate": 2.2481654701262077e-06, "loss": 0.1044, "step": 564500 }, { "epoch": 5.55, "grad_norm": 9.392547607421875, "learning_rate": 2.2480413476719594e-06, "loss": 0.0548, "step": 564525 }, { "epoch": 5.55, "grad_norm": 0.9243641495704651, "learning_rate": 2.247917225217711e-06, "loss": 0.0734, "step": 564550 }, { "epoch": 5.55, "grad_norm": 15.546062469482422, "learning_rate": 2.2477931027634626e-06, "loss": 0.049, "step": 564575 }, { "epoch": 5.55, "grad_norm": 0.5781640410423279, "learning_rate": 2.2476689803092143e-06, "loss": 0.0761, "step": 564600 }, { "epoch": 5.55, "grad_norm": 7.092000961303711, "learning_rate": 2.2475448578549655e-06, "loss": 0.0477, "step": 564625 }, { "epoch": 5.55, "grad_norm": 0.16110558807849884, "learning_rate": 2.247420735400717e-06, "loss": 0.0886, "step": 564650 }, { "epoch": 5.55, "grad_norm": 8.772695541381836, "learning_rate": 2.2472966129464687e-06, "loss": 0.0649, "step": 564675 }, { "epoch": 5.55, "grad_norm": 0.021183602511882782, "learning_rate": 2.24717249049222e-06, "loss": 0.1098, "step": 564700 }, { "epoch": 5.55, "grad_norm": 12.571481704711914, "learning_rate": 2.2470483680379716e-06, "loss": 0.0433, "step": 564725 }, { "epoch": 5.55, "grad_norm": 0.23612143099308014, "learning_rate": 2.2469242455837232e-06, "loss": 0.0715, "step": 564750 }, { "epoch": 5.55, "grad_norm": 12.119372367858887, "learning_rate": 2.246800123129475e-06, "loss": 0.0625, "step": 564775 }, { "epoch": 5.55, "grad_norm": 2.042113780975342, "learning_rate": 2.2466760006752265e-06, "loss": 0.0781, "step": 564800 }, { "epoch": 5.55, "grad_norm": 7.460128307342529, "learning_rate": 2.2465518782209777e-06, "loss": 0.0365, "step": 564825 }, { "epoch": 5.55, "grad_norm": 3.0873301029205322, "learning_rate": 2.2464277557667293e-06, "loss": 0.0752, "step": 564850 }, { "epoch": 5.55, "grad_norm": 13.990117073059082, "learning_rate": 2.246303633312481e-06, "loss": 0.0433, "step": 564875 }, { "epoch": 5.55, "grad_norm": 3.1319925785064697, "learning_rate": 2.2461844757564024e-06, "loss": 0.0946, "step": 564900 }, { "epoch": 5.55, "grad_norm": 9.861111640930176, "learning_rate": 2.2460603533021536e-06, "loss": 0.0557, "step": 564925 }, { "epoch": 5.55, "grad_norm": 8.345887184143066, "learning_rate": 2.2459362308479053e-06, "loss": 0.0749, "step": 564950 }, { "epoch": 5.55, "grad_norm": 3.8602335453033447, "learning_rate": 2.245812108393657e-06, "loss": 0.0525, "step": 564975 }, { "epoch": 5.56, "grad_norm": 0.8071508407592773, "learning_rate": 2.2456879859394085e-06, "loss": 0.0791, "step": 565000 }, { "epoch": 5.56, "grad_norm": 11.589740753173828, "learning_rate": 2.24556386348516e-06, "loss": 0.0517, "step": 565025 }, { "epoch": 5.56, "grad_norm": 4.002706050872803, "learning_rate": 2.245439741030912e-06, "loss": 0.0695, "step": 565050 }, { "epoch": 5.56, "grad_norm": 21.826810836791992, "learning_rate": 2.245315618576663e-06, "loss": 0.0576, "step": 565075 }, { "epoch": 5.56, "grad_norm": 4.210087776184082, "learning_rate": 2.2451914961224147e-06, "loss": 0.0998, "step": 565100 }, { "epoch": 5.56, "grad_norm": 8.349876403808594, "learning_rate": 2.2450673736681663e-06, "loss": 0.0523, "step": 565125 }, { "epoch": 5.56, "grad_norm": 4.704690456390381, "learning_rate": 2.2449432512139175e-06, "loss": 0.0688, "step": 565150 }, { "epoch": 5.56, "grad_norm": 5.531768798828125, "learning_rate": 2.244819128759669e-06, "loss": 0.0477, "step": 565175 }, { "epoch": 5.56, "grad_norm": 5.716031074523926, "learning_rate": 2.2446950063054208e-06, "loss": 0.072, "step": 565200 }, { "epoch": 5.56, "grad_norm": 13.810951232910156, "learning_rate": 2.2445708838511724e-06, "loss": 0.0506, "step": 565225 }, { "epoch": 5.56, "grad_norm": 9.919452667236328, "learning_rate": 2.244446761396924e-06, "loss": 0.1019, "step": 565250 }, { "epoch": 5.56, "grad_norm": 10.4664306640625, "learning_rate": 2.2443226389426757e-06, "loss": 0.0356, "step": 565275 }, { "epoch": 5.56, "grad_norm": 8.126338005065918, "learning_rate": 2.244198516488427e-06, "loss": 0.0565, "step": 565300 }, { "epoch": 5.56, "grad_norm": 17.284515380859375, "learning_rate": 2.2440743940341785e-06, "loss": 0.0463, "step": 565325 }, { "epoch": 5.56, "grad_norm": 0.817518413066864, "learning_rate": 2.2439502715799297e-06, "loss": 0.0893, "step": 565350 }, { "epoch": 5.56, "grad_norm": 11.482110023498535, "learning_rate": 2.2438261491256814e-06, "loss": 0.0479, "step": 565375 }, { "epoch": 5.56, "grad_norm": 3.424478769302368, "learning_rate": 2.243702026671433e-06, "loss": 0.0816, "step": 565400 }, { "epoch": 5.56, "grad_norm": 13.391840934753418, "learning_rate": 2.2435779042171846e-06, "loss": 0.0356, "step": 565425 }, { "epoch": 5.56, "grad_norm": 0.44989392161369324, "learning_rate": 2.2434537817629363e-06, "loss": 0.0615, "step": 565450 }, { "epoch": 5.56, "grad_norm": 9.79871654510498, "learning_rate": 2.243329659308688e-06, "loss": 0.041, "step": 565475 }, { "epoch": 5.56, "grad_norm": 2.4899752140045166, "learning_rate": 2.243205536854439e-06, "loss": 0.0833, "step": 565500 }, { "epoch": 5.56, "grad_norm": 9.015246391296387, "learning_rate": 2.2430814144001908e-06, "loss": 0.0491, "step": 565525 }, { "epoch": 5.56, "grad_norm": 0.7102185487747192, "learning_rate": 2.2429572919459424e-06, "loss": 0.0813, "step": 565550 }, { "epoch": 5.56, "grad_norm": 8.632237434387207, "learning_rate": 2.242833169491694e-06, "loss": 0.047, "step": 565575 }, { "epoch": 5.56, "grad_norm": 4.142697334289551, "learning_rate": 2.2427090470374457e-06, "loss": 0.1003, "step": 565600 }, { "epoch": 5.56, "grad_norm": 1.6756762266159058, "learning_rate": 2.2425849245831973e-06, "loss": 0.0551, "step": 565625 }, { "epoch": 5.56, "grad_norm": 1.7230573892593384, "learning_rate": 2.2424608021289485e-06, "loss": 0.0872, "step": 565650 }, { "epoch": 5.56, "grad_norm": 14.617136001586914, "learning_rate": 2.2423366796747e-06, "loss": 0.0466, "step": 565675 }, { "epoch": 5.56, "grad_norm": 0.46997156739234924, "learning_rate": 2.2422125572204518e-06, "loss": 0.0722, "step": 565700 }, { "epoch": 5.56, "grad_norm": 7.718843936920166, "learning_rate": 2.242088434766203e-06, "loss": 0.0437, "step": 565725 }, { "epoch": 5.56, "grad_norm": 0.22996041178703308, "learning_rate": 2.2419643123119546e-06, "loss": 0.0679, "step": 565750 }, { "epoch": 5.56, "grad_norm": 8.34186840057373, "learning_rate": 2.2418401898577063e-06, "loss": 0.065, "step": 565775 }, { "epoch": 5.56, "grad_norm": 1.6133924722671509, "learning_rate": 2.241716067403458e-06, "loss": 0.0556, "step": 565800 }, { "epoch": 5.56, "grad_norm": 8.129605293273926, "learning_rate": 2.2415919449492095e-06, "loss": 0.0528, "step": 565825 }, { "epoch": 5.56, "grad_norm": 0.11753673851490021, "learning_rate": 2.2414678224949607e-06, "loss": 0.0847, "step": 565850 }, { "epoch": 5.56, "grad_norm": 16.072002410888672, "learning_rate": 2.2413437000407124e-06, "loss": 0.0392, "step": 565875 }, { "epoch": 5.56, "grad_norm": 3.073172092437744, "learning_rate": 2.241219577586464e-06, "loss": 0.0755, "step": 565900 }, { "epoch": 5.56, "grad_norm": 9.297318458557129, "learning_rate": 2.2410954551322152e-06, "loss": 0.0465, "step": 565925 }, { "epoch": 5.56, "grad_norm": 0.7164002656936646, "learning_rate": 2.240971332677967e-06, "loss": 0.0836, "step": 565950 }, { "epoch": 5.56, "grad_norm": 7.065037250518799, "learning_rate": 2.2408472102237185e-06, "loss": 0.0377, "step": 565975 }, { "epoch": 5.57, "grad_norm": 2.082179069519043, "learning_rate": 2.24072308776947e-06, "loss": 0.0815, "step": 566000 }, { "epoch": 5.57, "grad_norm": 15.29854965209961, "learning_rate": 2.2405989653152218e-06, "loss": 0.0464, "step": 566025 }, { "epoch": 5.57, "grad_norm": 1.0900073051452637, "learning_rate": 2.2404748428609734e-06, "loss": 0.0742, "step": 566050 }, { "epoch": 5.57, "grad_norm": 11.498869895935059, "learning_rate": 2.2403507204067246e-06, "loss": 0.0392, "step": 566075 }, { "epoch": 5.57, "grad_norm": 3.4149084091186523, "learning_rate": 2.2402265979524762e-06, "loss": 0.0851, "step": 566100 }, { "epoch": 5.57, "grad_norm": 10.27001953125, "learning_rate": 2.240102475498228e-06, "loss": 0.0349, "step": 566125 }, { "epoch": 5.57, "grad_norm": 0.6387959122657776, "learning_rate": 2.239978353043979e-06, "loss": 0.1173, "step": 566150 }, { "epoch": 5.57, "grad_norm": 9.32557487487793, "learning_rate": 2.2398542305897307e-06, "loss": 0.0441, "step": 566175 }, { "epoch": 5.57, "grad_norm": 3.3480846881866455, "learning_rate": 2.2397301081354824e-06, "loss": 0.0735, "step": 566200 }, { "epoch": 5.57, "grad_norm": 8.38469409942627, "learning_rate": 2.239605985681234e-06, "loss": 0.0487, "step": 566225 }, { "epoch": 5.57, "grad_norm": 2.1800014972686768, "learning_rate": 2.2394818632269856e-06, "loss": 0.0568, "step": 566250 }, { "epoch": 5.57, "grad_norm": 5.477644443511963, "learning_rate": 2.239357740772737e-06, "loss": 0.0676, "step": 566275 }, { "epoch": 5.57, "grad_norm": 6.714944839477539, "learning_rate": 2.2392336183184885e-06, "loss": 0.086, "step": 566300 }, { "epoch": 5.57, "grad_norm": 5.516708850860596, "learning_rate": 2.23910949586424e-06, "loss": 0.0498, "step": 566325 }, { "epoch": 5.57, "grad_norm": 1.0869888067245483, "learning_rate": 2.2389853734099913e-06, "loss": 0.0727, "step": 566350 }, { "epoch": 5.57, "grad_norm": 11.522767066955566, "learning_rate": 2.238861250955743e-06, "loss": 0.0337, "step": 566375 }, { "epoch": 5.57, "grad_norm": 0.6348318457603455, "learning_rate": 2.2387371285014946e-06, "loss": 0.0828, "step": 566400 }, { "epoch": 5.57, "grad_norm": 14.59333324432373, "learning_rate": 2.2386130060472462e-06, "loss": 0.052, "step": 566425 }, { "epoch": 5.57, "grad_norm": 1.2035505771636963, "learning_rate": 2.238488883592998e-06, "loss": 0.0727, "step": 566450 }, { "epoch": 5.57, "grad_norm": 6.900845527648926, "learning_rate": 2.2383647611387495e-06, "loss": 0.0436, "step": 566475 }, { "epoch": 5.57, "grad_norm": 0.4003031849861145, "learning_rate": 2.2382406386845007e-06, "loss": 0.0878, "step": 566500 }, { "epoch": 5.57, "grad_norm": 4.778422832489014, "learning_rate": 2.2381165162302523e-06, "loss": 0.0447, "step": 566525 }, { "epoch": 5.57, "grad_norm": 0.3062683641910553, "learning_rate": 2.237992393776004e-06, "loss": 0.0932, "step": 566550 }, { "epoch": 5.57, "grad_norm": 4.553188323974609, "learning_rate": 2.237868271321755e-06, "loss": 0.0567, "step": 566575 }, { "epoch": 5.57, "grad_norm": 0.763974130153656, "learning_rate": 2.237744148867507e-06, "loss": 0.0788, "step": 566600 }, { "epoch": 5.57, "grad_norm": 15.832503318786621, "learning_rate": 2.2376200264132585e-06, "loss": 0.0322, "step": 566625 }, { "epoch": 5.57, "grad_norm": 0.5939611792564392, "learning_rate": 2.23749590395901e-06, "loss": 0.0752, "step": 566650 }, { "epoch": 5.57, "grad_norm": 13.790064811706543, "learning_rate": 2.2373717815047617e-06, "loss": 0.0515, "step": 566675 }, { "epoch": 5.57, "grad_norm": 4.6308393478393555, "learning_rate": 2.237247659050513e-06, "loss": 0.0817, "step": 566700 }, { "epoch": 5.57, "grad_norm": 16.806026458740234, "learning_rate": 2.2371235365962646e-06, "loss": 0.0658, "step": 566725 }, { "epoch": 5.57, "grad_norm": 3.1972033977508545, "learning_rate": 2.2369994141420162e-06, "loss": 0.0763, "step": 566750 }, { "epoch": 5.57, "grad_norm": 11.121651649475098, "learning_rate": 2.2368752916877674e-06, "loss": 0.0652, "step": 566775 }, { "epoch": 5.57, "grad_norm": 2.401116371154785, "learning_rate": 2.236751169233519e-06, "loss": 0.0863, "step": 566800 }, { "epoch": 5.57, "grad_norm": 6.971864700317383, "learning_rate": 2.2366270467792707e-06, "loss": 0.0548, "step": 566825 }, { "epoch": 5.57, "grad_norm": 7.65577507019043, "learning_rate": 2.2365029243250223e-06, "loss": 0.1017, "step": 566850 }, { "epoch": 5.57, "grad_norm": 8.688310623168945, "learning_rate": 2.236378801870774e-06, "loss": 0.0561, "step": 566875 }, { "epoch": 5.57, "grad_norm": 0.14047978818416595, "learning_rate": 2.2362546794165256e-06, "loss": 0.089, "step": 566900 }, { "epoch": 5.57, "grad_norm": 18.832176208496094, "learning_rate": 2.236130556962277e-06, "loss": 0.0598, "step": 566925 }, { "epoch": 5.57, "grad_norm": 4.498093128204346, "learning_rate": 2.2360113994061983e-06, "loss": 0.0967, "step": 566950 }, { "epoch": 5.57, "grad_norm": 9.84597396850586, "learning_rate": 2.23588727695195e-06, "loss": 0.0396, "step": 566975 }, { "epoch": 5.57, "grad_norm": 2.9713926315307617, "learning_rate": 2.2357631544977015e-06, "loss": 0.0945, "step": 567000 }, { "epoch": 5.58, "grad_norm": 12.302408218383789, "learning_rate": 2.2356390320434527e-06, "loss": 0.0639, "step": 567025 }, { "epoch": 5.58, "grad_norm": 9.789414405822754, "learning_rate": 2.2355149095892044e-06, "loss": 0.0931, "step": 567050 }, { "epoch": 5.58, "grad_norm": 6.430967330932617, "learning_rate": 2.235390787134956e-06, "loss": 0.0446, "step": 567075 }, { "epoch": 5.58, "grad_norm": 1.5914336442947388, "learning_rate": 2.2352666646807077e-06, "loss": 0.1146, "step": 567100 }, { "epoch": 5.58, "grad_norm": 17.170976638793945, "learning_rate": 2.2351425422264593e-06, "loss": 0.0495, "step": 567125 }, { "epoch": 5.58, "grad_norm": 3.2777597904205322, "learning_rate": 2.235018419772211e-06, "loss": 0.0839, "step": 567150 }, { "epoch": 5.58, "grad_norm": 8.551946640014648, "learning_rate": 2.234894297317962e-06, "loss": 0.0521, "step": 567175 }, { "epoch": 5.58, "grad_norm": 0.3003314733505249, "learning_rate": 2.2347701748637138e-06, "loss": 0.0906, "step": 567200 }, { "epoch": 5.58, "grad_norm": 10.817867279052734, "learning_rate": 2.234646052409465e-06, "loss": 0.0486, "step": 567225 }, { "epoch": 5.58, "grad_norm": 0.13943591713905334, "learning_rate": 2.2345219299552166e-06, "loss": 0.0926, "step": 567250 }, { "epoch": 5.58, "grad_norm": 6.659384727478027, "learning_rate": 2.2343978075009682e-06, "loss": 0.0516, "step": 567275 }, { "epoch": 5.58, "grad_norm": 8.262838363647461, "learning_rate": 2.23427368504672e-06, "loss": 0.0569, "step": 567300 }, { "epoch": 5.58, "grad_norm": 7.867619037628174, "learning_rate": 2.2341495625924715e-06, "loss": 0.031, "step": 567325 }, { "epoch": 5.58, "grad_norm": 0.5978577136993408, "learning_rate": 2.234025440138223e-06, "loss": 0.0853, "step": 567350 }, { "epoch": 5.58, "grad_norm": 4.137357711791992, "learning_rate": 2.2339013176839744e-06, "loss": 0.0319, "step": 567375 }, { "epoch": 5.58, "grad_norm": 0.49182137846946716, "learning_rate": 2.233777195229726e-06, "loss": 0.0968, "step": 567400 }, { "epoch": 5.58, "grad_norm": 14.18752384185791, "learning_rate": 2.2336530727754776e-06, "loss": 0.0621, "step": 567425 }, { "epoch": 5.58, "grad_norm": 1.2810245752334595, "learning_rate": 2.233528950321229e-06, "loss": 0.1161, "step": 567450 }, { "epoch": 5.58, "grad_norm": 10.406075477600098, "learning_rate": 2.2334048278669805e-06, "loss": 0.052, "step": 567475 }, { "epoch": 5.58, "grad_norm": 2.692812442779541, "learning_rate": 2.233280705412732e-06, "loss": 0.096, "step": 567500 }, { "epoch": 5.58, "grad_norm": 8.20284366607666, "learning_rate": 2.2331565829584838e-06, "loss": 0.0405, "step": 567525 }, { "epoch": 5.58, "grad_norm": 0.022934596985578537, "learning_rate": 2.2330324605042354e-06, "loss": 0.0823, "step": 567550 }, { "epoch": 5.58, "grad_norm": 13.098342895507812, "learning_rate": 2.232908338049987e-06, "loss": 0.0582, "step": 567575 }, { "epoch": 5.58, "grad_norm": 1.2960535287857056, "learning_rate": 2.2327842155957382e-06, "loss": 0.0769, "step": 567600 }, { "epoch": 5.58, "grad_norm": 11.058643341064453, "learning_rate": 2.23266009314149e-06, "loss": 0.0538, "step": 567625 }, { "epoch": 5.58, "grad_norm": 0.2108399122953415, "learning_rate": 2.232535970687241e-06, "loss": 0.0873, "step": 567650 }, { "epoch": 5.58, "grad_norm": 7.504179000854492, "learning_rate": 2.2324118482329927e-06, "loss": 0.0387, "step": 567675 }, { "epoch": 5.58, "grad_norm": 1.9734739065170288, "learning_rate": 2.2322877257787443e-06, "loss": 0.0962, "step": 567700 }, { "epoch": 5.58, "grad_norm": 9.023482322692871, "learning_rate": 2.232163603324496e-06, "loss": 0.0539, "step": 567725 }, { "epoch": 5.58, "grad_norm": 3.5915510654449463, "learning_rate": 2.2320394808702476e-06, "loss": 0.0837, "step": 567750 }, { "epoch": 5.58, "grad_norm": 13.091064453125, "learning_rate": 2.2319153584159993e-06, "loss": 0.0727, "step": 567775 }, { "epoch": 5.58, "grad_norm": 5.862116813659668, "learning_rate": 2.2317912359617505e-06, "loss": 0.0916, "step": 567800 }, { "epoch": 5.58, "grad_norm": 6.670660495758057, "learning_rate": 2.231667113507502e-06, "loss": 0.0495, "step": 567825 }, { "epoch": 5.58, "grad_norm": 5.571667671203613, "learning_rate": 2.2315429910532537e-06, "loss": 0.1256, "step": 567850 }, { "epoch": 5.58, "grad_norm": 9.29139518737793, "learning_rate": 2.231418868599005e-06, "loss": 0.0634, "step": 567875 }, { "epoch": 5.58, "grad_norm": 6.375314235687256, "learning_rate": 2.2312947461447566e-06, "loss": 0.0759, "step": 567900 }, { "epoch": 5.58, "grad_norm": 11.751632690429688, "learning_rate": 2.2311706236905082e-06, "loss": 0.0597, "step": 567925 }, { "epoch": 5.58, "grad_norm": 1.8750548362731934, "learning_rate": 2.23104650123626e-06, "loss": 0.1311, "step": 567950 }, { "epoch": 5.58, "grad_norm": 7.0295281410217285, "learning_rate": 2.2309223787820115e-06, "loss": 0.0629, "step": 567975 }, { "epoch": 5.58, "grad_norm": 4.635251045227051, "learning_rate": 2.230798256327763e-06, "loss": 0.0822, "step": 568000 }, { "epoch": 5.58, "grad_norm": 12.537022590637207, "learning_rate": 2.2306741338735143e-06, "loss": 0.0494, "step": 568025 }, { "epoch": 5.59, "grad_norm": 0.1415763795375824, "learning_rate": 2.230550011419266e-06, "loss": 0.0938, "step": 568050 }, { "epoch": 5.59, "grad_norm": 15.768081665039062, "learning_rate": 2.230425888965017e-06, "loss": 0.0641, "step": 568075 }, { "epoch": 5.59, "grad_norm": 0.7474467754364014, "learning_rate": 2.230301766510769e-06, "loss": 0.0659, "step": 568100 }, { "epoch": 5.59, "grad_norm": 5.684939861297607, "learning_rate": 2.2301776440565204e-06, "loss": 0.0679, "step": 568125 }, { "epoch": 5.59, "grad_norm": 0.11391105502843857, "learning_rate": 2.230053521602272e-06, "loss": 0.0681, "step": 568150 }, { "epoch": 5.59, "grad_norm": 5.222065448760986, "learning_rate": 2.2299293991480237e-06, "loss": 0.0608, "step": 568175 }, { "epoch": 5.59, "grad_norm": 3.050701379776001, "learning_rate": 2.2298052766937754e-06, "loss": 0.0909, "step": 568200 }, { "epoch": 5.59, "grad_norm": 9.405020713806152, "learning_rate": 2.2296811542395266e-06, "loss": 0.0517, "step": 568225 }, { "epoch": 5.59, "grad_norm": 0.22363540530204773, "learning_rate": 2.229557031785278e-06, "loss": 0.1061, "step": 568250 }, { "epoch": 5.59, "grad_norm": 7.216924667358398, "learning_rate": 2.22943290933103e-06, "loss": 0.0593, "step": 568275 }, { "epoch": 5.59, "grad_norm": 1.0723525285720825, "learning_rate": 2.2293087868767815e-06, "loss": 0.0669, "step": 568300 }, { "epoch": 5.59, "grad_norm": 16.622844696044922, "learning_rate": 2.229184664422533e-06, "loss": 0.051, "step": 568325 }, { "epoch": 5.59, "grad_norm": 3.6941373348236084, "learning_rate": 2.2290605419682847e-06, "loss": 0.0905, "step": 568350 }, { "epoch": 5.59, "grad_norm": 8.862171173095703, "learning_rate": 2.228936419514036e-06, "loss": 0.0419, "step": 568375 }, { "epoch": 5.59, "grad_norm": 3.5639145374298096, "learning_rate": 2.2288122970597876e-06, "loss": 0.1061, "step": 568400 }, { "epoch": 5.59, "grad_norm": 11.975970268249512, "learning_rate": 2.2286881746055392e-06, "loss": 0.0445, "step": 568425 }, { "epoch": 5.59, "grad_norm": 5.52266263961792, "learning_rate": 2.2285640521512904e-06, "loss": 0.0918, "step": 568450 }, { "epoch": 5.59, "grad_norm": 9.513230323791504, "learning_rate": 2.228439929697042e-06, "loss": 0.0285, "step": 568475 }, { "epoch": 5.59, "grad_norm": 0.04832417890429497, "learning_rate": 2.2283158072427937e-06, "loss": 0.0767, "step": 568500 }, { "epoch": 5.59, "grad_norm": 13.240571975708008, "learning_rate": 2.2281916847885453e-06, "loss": 0.0595, "step": 568525 }, { "epoch": 5.59, "grad_norm": 0.8914379477500916, "learning_rate": 2.228067562334297e-06, "loss": 0.073, "step": 568550 }, { "epoch": 5.59, "grad_norm": 13.4782133102417, "learning_rate": 2.227943439880048e-06, "loss": 0.0485, "step": 568575 }, { "epoch": 5.59, "grad_norm": 6.062019348144531, "learning_rate": 2.2278193174258e-06, "loss": 0.0819, "step": 568600 }, { "epoch": 5.59, "grad_norm": 8.327600479125977, "learning_rate": 2.2276951949715515e-06, "loss": 0.0555, "step": 568625 }, { "epoch": 5.59, "grad_norm": 0.23920516669750214, "learning_rate": 2.2275710725173027e-06, "loss": 0.0794, "step": 568650 }, { "epoch": 5.59, "grad_norm": 3.869783639907837, "learning_rate": 2.2274469500630543e-06, "loss": 0.056, "step": 568675 }, { "epoch": 5.59, "grad_norm": 12.839900970458984, "learning_rate": 2.227322827608806e-06, "loss": 0.0587, "step": 568700 }, { "epoch": 5.59, "grad_norm": 7.879628658294678, "learning_rate": 2.2271987051545576e-06, "loss": 0.0567, "step": 568725 }, { "epoch": 5.59, "grad_norm": 0.07937205582857132, "learning_rate": 2.227074582700309e-06, "loss": 0.0831, "step": 568750 }, { "epoch": 5.59, "grad_norm": 9.824588775634766, "learning_rate": 2.226950460246061e-06, "loss": 0.0413, "step": 568775 }, { "epoch": 5.59, "grad_norm": 0.43527454137802124, "learning_rate": 2.226826337791812e-06, "loss": 0.1042, "step": 568800 }, { "epoch": 5.59, "grad_norm": 9.157164573669434, "learning_rate": 2.2267022153375637e-06, "loss": 0.045, "step": 568825 }, { "epoch": 5.59, "grad_norm": 0.7380460500717163, "learning_rate": 2.2265780928833153e-06, "loss": 0.0907, "step": 568850 }, { "epoch": 5.59, "grad_norm": 12.802407264709473, "learning_rate": 2.2264539704290665e-06, "loss": 0.0648, "step": 568875 }, { "epoch": 5.59, "grad_norm": 0.6851546168327332, "learning_rate": 2.226329847974818e-06, "loss": 0.084, "step": 568900 }, { "epoch": 5.59, "grad_norm": 8.265327453613281, "learning_rate": 2.22620572552057e-06, "loss": 0.0506, "step": 568925 }, { "epoch": 5.59, "grad_norm": 0.5731889009475708, "learning_rate": 2.2260816030663214e-06, "loss": 0.0865, "step": 568950 }, { "epoch": 5.59, "grad_norm": 9.20032024383545, "learning_rate": 2.225957480612073e-06, "loss": 0.0418, "step": 568975 }, { "epoch": 5.59, "grad_norm": 1.1145604848861694, "learning_rate": 2.2258333581578243e-06, "loss": 0.0918, "step": 569000 }, { "epoch": 5.59, "grad_norm": 11.550980567932129, "learning_rate": 2.225709235703576e-06, "loss": 0.0426, "step": 569025 }, { "epoch": 5.59, "grad_norm": 0.03016122430562973, "learning_rate": 2.2255900781474974e-06, "loss": 0.0753, "step": 569050 }, { "epoch": 5.6, "grad_norm": 7.2802581787109375, "learning_rate": 2.225465955693249e-06, "loss": 0.0389, "step": 569075 }, { "epoch": 5.6, "grad_norm": 1.3179374933242798, "learning_rate": 2.2253418332390002e-06, "loss": 0.0805, "step": 569100 }, { "epoch": 5.6, "grad_norm": 12.725297927856445, "learning_rate": 2.225217710784752e-06, "loss": 0.0605, "step": 569125 }, { "epoch": 5.6, "grad_norm": 2.961768865585327, "learning_rate": 2.2250935883305035e-06, "loss": 0.0765, "step": 569150 }, { "epoch": 5.6, "grad_norm": 4.908098220825195, "learning_rate": 2.224969465876255e-06, "loss": 0.0677, "step": 569175 }, { "epoch": 5.6, "grad_norm": 6.728981971740723, "learning_rate": 2.2248453434220068e-06, "loss": 0.1145, "step": 569200 }, { "epoch": 5.6, "grad_norm": 17.425016403198242, "learning_rate": 2.2247212209677584e-06, "loss": 0.0846, "step": 569225 }, { "epoch": 5.6, "grad_norm": 5.156711101531982, "learning_rate": 2.2245970985135096e-06, "loss": 0.0721, "step": 569250 }, { "epoch": 5.6, "grad_norm": 2.5815701484680176, "learning_rate": 2.2244729760592612e-06, "loss": 0.0476, "step": 569275 }, { "epoch": 5.6, "grad_norm": 0.4445842504501343, "learning_rate": 2.224348853605013e-06, "loss": 0.0761, "step": 569300 }, { "epoch": 5.6, "grad_norm": 10.71462345123291, "learning_rate": 2.224224731150764e-06, "loss": 0.0692, "step": 569325 }, { "epoch": 5.6, "grad_norm": 3.7625229358673096, "learning_rate": 2.2241006086965157e-06, "loss": 0.0867, "step": 569350 }, { "epoch": 5.6, "grad_norm": 10.689854621887207, "learning_rate": 2.2239764862422674e-06, "loss": 0.0581, "step": 569375 }, { "epoch": 5.6, "grad_norm": 0.9589993953704834, "learning_rate": 2.223852363788019e-06, "loss": 0.0826, "step": 569400 }, { "epoch": 5.6, "grad_norm": 12.748333930969238, "learning_rate": 2.2237282413337706e-06, "loss": 0.0538, "step": 569425 }, { "epoch": 5.6, "grad_norm": 2.1972148418426514, "learning_rate": 2.2236041188795223e-06, "loss": 0.0799, "step": 569450 }, { "epoch": 5.6, "grad_norm": 11.580604553222656, "learning_rate": 2.2234799964252735e-06, "loss": 0.0605, "step": 569475 }, { "epoch": 5.6, "grad_norm": 2.379432201385498, "learning_rate": 2.223355873971025e-06, "loss": 0.0804, "step": 569500 }, { "epoch": 5.6, "grad_norm": 2.487600326538086, "learning_rate": 2.2232317515167763e-06, "loss": 0.0453, "step": 569525 }, { "epoch": 5.6, "grad_norm": 0.5390511751174927, "learning_rate": 2.223107629062528e-06, "loss": 0.0821, "step": 569550 }, { "epoch": 5.6, "grad_norm": 16.56118392944336, "learning_rate": 2.2229835066082796e-06, "loss": 0.0454, "step": 569575 }, { "epoch": 5.6, "grad_norm": 2.845684766769409, "learning_rate": 2.2228593841540312e-06, "loss": 0.0666, "step": 569600 }, { "epoch": 5.6, "grad_norm": 7.12839937210083, "learning_rate": 2.222735261699783e-06, "loss": 0.0465, "step": 569625 }, { "epoch": 5.6, "grad_norm": 6.697272777557373, "learning_rate": 2.2226111392455345e-06, "loss": 0.102, "step": 569650 }, { "epoch": 5.6, "grad_norm": 22.031949996948242, "learning_rate": 2.2224870167912857e-06, "loss": 0.0587, "step": 569675 }, { "epoch": 5.6, "grad_norm": 0.27313488721847534, "learning_rate": 2.2223628943370373e-06, "loss": 0.0843, "step": 569700 }, { "epoch": 5.6, "grad_norm": 8.632699012756348, "learning_rate": 2.222238771882789e-06, "loss": 0.0566, "step": 569725 }, { "epoch": 5.6, "grad_norm": 3.4624688625335693, "learning_rate": 2.22211464942854e-06, "loss": 0.0895, "step": 569750 }, { "epoch": 5.6, "grad_norm": 10.802925109863281, "learning_rate": 2.221990526974292e-06, "loss": 0.0505, "step": 569775 }, { "epoch": 5.6, "grad_norm": 0.22885219752788544, "learning_rate": 2.2218664045200435e-06, "loss": 0.093, "step": 569800 }, { "epoch": 5.6, "grad_norm": 7.740658760070801, "learning_rate": 2.221742282065795e-06, "loss": 0.0455, "step": 569825 }, { "epoch": 5.6, "grad_norm": 2.1309189796447754, "learning_rate": 2.2216181596115467e-06, "loss": 0.0751, "step": 569850 }, { "epoch": 5.6, "grad_norm": 10.958404541015625, "learning_rate": 2.2214940371572984e-06, "loss": 0.0697, "step": 569875 }, { "epoch": 5.6, "grad_norm": 0.19340001046657562, "learning_rate": 2.2213699147030496e-06, "loss": 0.0839, "step": 569900 }, { "epoch": 5.6, "grad_norm": 11.717900276184082, "learning_rate": 2.221245792248801e-06, "loss": 0.0493, "step": 569925 }, { "epoch": 5.6, "grad_norm": 0.11779078841209412, "learning_rate": 2.2211216697945524e-06, "loss": 0.0873, "step": 569950 }, { "epoch": 5.6, "grad_norm": 3.667489528656006, "learning_rate": 2.220997547340304e-06, "loss": 0.0409, "step": 569975 }, { "epoch": 5.6, "grad_norm": 2.0134143829345703, "learning_rate": 2.2208734248860557e-06, "loss": 0.0693, "step": 570000 }, { "epoch": 5.6, "grad_norm": 3.1928632259368896, "learning_rate": 2.2207493024318073e-06, "loss": 0.0576, "step": 570025 }, { "epoch": 5.6, "grad_norm": 1.895007610321045, "learning_rate": 2.220625179977559e-06, "loss": 0.0841, "step": 570050 }, { "epoch": 5.61, "grad_norm": 5.670292377471924, "learning_rate": 2.2205010575233106e-06, "loss": 0.0526, "step": 570075 }, { "epoch": 5.61, "grad_norm": 2.010253429412842, "learning_rate": 2.220376935069062e-06, "loss": 0.0846, "step": 570100 }, { "epoch": 5.61, "grad_norm": 19.01981544494629, "learning_rate": 2.2202528126148134e-06, "loss": 0.058, "step": 570125 }, { "epoch": 5.61, "grad_norm": 0.16294629871845245, "learning_rate": 2.220128690160565e-06, "loss": 0.0692, "step": 570150 }, { "epoch": 5.61, "grad_norm": 12.433720588684082, "learning_rate": 2.2200045677063163e-06, "loss": 0.0388, "step": 570175 }, { "epoch": 5.61, "grad_norm": 3.857877731323242, "learning_rate": 2.219880445252068e-06, "loss": 0.0747, "step": 570200 }, { "epoch": 5.61, "grad_norm": 8.945701599121094, "learning_rate": 2.2197563227978196e-06, "loss": 0.0602, "step": 570225 }, { "epoch": 5.61, "grad_norm": 1.5880982875823975, "learning_rate": 2.219632200343571e-06, "loss": 0.0862, "step": 570250 }, { "epoch": 5.61, "grad_norm": 13.144341468811035, "learning_rate": 2.219508077889323e-06, "loss": 0.0461, "step": 570275 }, { "epoch": 5.61, "grad_norm": 6.408389568328857, "learning_rate": 2.2193839554350745e-06, "loss": 0.0831, "step": 570300 }, { "epoch": 5.61, "grad_norm": 17.365222930908203, "learning_rate": 2.2192598329808257e-06, "loss": 0.0613, "step": 570325 }, { "epoch": 5.61, "grad_norm": 10.22046947479248, "learning_rate": 2.2191357105265773e-06, "loss": 0.0679, "step": 570350 }, { "epoch": 5.61, "grad_norm": 14.0049409866333, "learning_rate": 2.2190115880723285e-06, "loss": 0.0449, "step": 570375 }, { "epoch": 5.61, "grad_norm": 1.384548544883728, "learning_rate": 2.21888746561808e-06, "loss": 0.0947, "step": 570400 }, { "epoch": 5.61, "grad_norm": 7.240363597869873, "learning_rate": 2.218763343163832e-06, "loss": 0.0616, "step": 570425 }, { "epoch": 5.61, "grad_norm": 0.2566860318183899, "learning_rate": 2.2186392207095834e-06, "loss": 0.0779, "step": 570450 }, { "epoch": 5.61, "grad_norm": 6.637153625488281, "learning_rate": 2.218515098255335e-06, "loss": 0.0504, "step": 570475 }, { "epoch": 5.61, "grad_norm": 3.3275201320648193, "learning_rate": 2.2183909758010867e-06, "loss": 0.0664, "step": 570500 }, { "epoch": 5.61, "grad_norm": 12.221830368041992, "learning_rate": 2.218266853346838e-06, "loss": 0.0608, "step": 570525 }, { "epoch": 5.61, "grad_norm": 9.306840896606445, "learning_rate": 2.2181427308925895e-06, "loss": 0.0687, "step": 570550 }, { "epoch": 5.61, "grad_norm": 9.667579650878906, "learning_rate": 2.218018608438341e-06, "loss": 0.0593, "step": 570575 }, { "epoch": 5.61, "grad_norm": 0.45698732137680054, "learning_rate": 2.2178944859840924e-06, "loss": 0.0671, "step": 570600 }, { "epoch": 5.61, "grad_norm": 7.1729888916015625, "learning_rate": 2.217770363529844e-06, "loss": 0.0521, "step": 570625 }, { "epoch": 5.61, "grad_norm": 0.3094063401222229, "learning_rate": 2.2176462410755957e-06, "loss": 0.0979, "step": 570650 }, { "epoch": 5.61, "grad_norm": 8.366482734680176, "learning_rate": 2.2175221186213473e-06, "loss": 0.0483, "step": 570675 }, { "epoch": 5.61, "grad_norm": 0.1756734400987625, "learning_rate": 2.217397996167099e-06, "loss": 0.0763, "step": 570700 }, { "epoch": 5.61, "grad_norm": 4.738056182861328, "learning_rate": 2.2172738737128506e-06, "loss": 0.0418, "step": 570725 }, { "epoch": 5.61, "grad_norm": 1.2814573049545288, "learning_rate": 2.2171497512586018e-06, "loss": 0.0586, "step": 570750 }, { "epoch": 5.61, "grad_norm": 12.9039888381958, "learning_rate": 2.2170256288043534e-06, "loss": 0.0497, "step": 570775 }, { "epoch": 5.61, "grad_norm": 4.91898250579834, "learning_rate": 2.2169015063501046e-06, "loss": 0.0726, "step": 570800 }, { "epoch": 5.61, "grad_norm": 16.858407974243164, "learning_rate": 2.2167773838958563e-06, "loss": 0.0391, "step": 570825 }, { "epoch": 5.61, "grad_norm": 0.5642307996749878, "learning_rate": 2.216653261441608e-06, "loss": 0.0775, "step": 570850 }, { "epoch": 5.61, "grad_norm": 11.937943458557129, "learning_rate": 2.2165291389873595e-06, "loss": 0.0482, "step": 570875 }, { "epoch": 5.61, "grad_norm": 0.2487732172012329, "learning_rate": 2.216405016533111e-06, "loss": 0.0948, "step": 570900 }, { "epoch": 5.61, "grad_norm": 11.757930755615234, "learning_rate": 2.216280894078863e-06, "loss": 0.0516, "step": 570925 }, { "epoch": 5.61, "grad_norm": 1.8724626302719116, "learning_rate": 2.216156771624614e-06, "loss": 0.0749, "step": 570950 }, { "epoch": 5.61, "grad_norm": 8.918679237365723, "learning_rate": 2.2160326491703656e-06, "loss": 0.0511, "step": 570975 }, { "epoch": 5.61, "grad_norm": 1.461018681526184, "learning_rate": 2.2159085267161173e-06, "loss": 0.0731, "step": 571000 }, { "epoch": 5.61, "grad_norm": 12.111064910888672, "learning_rate": 2.2157844042618685e-06, "loss": 0.0527, "step": 571025 }, { "epoch": 5.61, "grad_norm": 2.5215632915496826, "learning_rate": 2.21566028180762e-06, "loss": 0.0806, "step": 571050 }, { "epoch": 5.61, "grad_norm": 17.514524459838867, "learning_rate": 2.2155361593533718e-06, "loss": 0.0544, "step": 571075 }, { "epoch": 5.62, "grad_norm": 0.12404827028512955, "learning_rate": 2.2154120368991234e-06, "loss": 0.0862, "step": 571100 }, { "epoch": 5.62, "grad_norm": 5.390607833862305, "learning_rate": 2.215287914444875e-06, "loss": 0.0348, "step": 571125 }, { "epoch": 5.62, "grad_norm": 1.5442620515823364, "learning_rate": 2.2151637919906267e-06, "loss": 0.0814, "step": 571150 }, { "epoch": 5.62, "grad_norm": 6.997594356536865, "learning_rate": 2.215039669536378e-06, "loss": 0.0617, "step": 571175 }, { "epoch": 5.62, "grad_norm": 4.569423198699951, "learning_rate": 2.2149155470821295e-06, "loss": 0.085, "step": 571200 }, { "epoch": 5.62, "grad_norm": 8.415882110595703, "learning_rate": 2.214791424627881e-06, "loss": 0.0686, "step": 571225 }, { "epoch": 5.62, "grad_norm": 0.2722890377044678, "learning_rate": 2.2146722670718026e-06, "loss": 0.0937, "step": 571250 }, { "epoch": 5.62, "grad_norm": 12.31545639038086, "learning_rate": 2.2145481446175542e-06, "loss": 0.036, "step": 571275 }, { "epoch": 5.62, "grad_norm": 2.166330099105835, "learning_rate": 2.2144240221633054e-06, "loss": 0.1297, "step": 571300 }, { "epoch": 5.62, "grad_norm": 6.257668495178223, "learning_rate": 2.214299899709057e-06, "loss": 0.0435, "step": 571325 }, { "epoch": 5.62, "grad_norm": 4.624256610870361, "learning_rate": 2.2141757772548087e-06, "loss": 0.08, "step": 571350 }, { "epoch": 5.62, "grad_norm": 14.729209899902344, "learning_rate": 2.2140516548005603e-06, "loss": 0.0522, "step": 571375 }, { "epoch": 5.62, "grad_norm": 1.1717053651809692, "learning_rate": 2.2139275323463116e-06, "loss": 0.0906, "step": 571400 }, { "epoch": 5.62, "grad_norm": 19.171850204467773, "learning_rate": 2.213803409892063e-06, "loss": 0.0598, "step": 571425 }, { "epoch": 5.62, "grad_norm": 0.4669475555419922, "learning_rate": 2.213679287437815e-06, "loss": 0.0567, "step": 571450 }, { "epoch": 5.62, "grad_norm": 15.575392723083496, "learning_rate": 2.2135551649835665e-06, "loss": 0.0484, "step": 571475 }, { "epoch": 5.62, "grad_norm": 4.430044174194336, "learning_rate": 2.213431042529318e-06, "loss": 0.1164, "step": 571500 }, { "epoch": 5.62, "grad_norm": 7.312186241149902, "learning_rate": 2.2133069200750697e-06, "loss": 0.0274, "step": 571525 }, { "epoch": 5.62, "grad_norm": 0.7179434895515442, "learning_rate": 2.213182797620821e-06, "loss": 0.077, "step": 571550 }, { "epoch": 5.62, "grad_norm": 7.9114251136779785, "learning_rate": 2.2130586751665726e-06, "loss": 0.0472, "step": 571575 }, { "epoch": 5.62, "grad_norm": 3.133718490600586, "learning_rate": 2.2129345527123242e-06, "loss": 0.0952, "step": 571600 }, { "epoch": 5.62, "grad_norm": 10.072526931762695, "learning_rate": 2.2128104302580754e-06, "loss": 0.0622, "step": 571625 }, { "epoch": 5.62, "grad_norm": 2.4066929817199707, "learning_rate": 2.212686307803827e-06, "loss": 0.077, "step": 571650 }, { "epoch": 5.62, "grad_norm": 1.0801900625228882, "learning_rate": 2.2125621853495787e-06, "loss": 0.0414, "step": 571675 }, { "epoch": 5.62, "grad_norm": 3.176628828048706, "learning_rate": 2.2124380628953303e-06, "loss": 0.0766, "step": 571700 }, { "epoch": 5.62, "grad_norm": 4.3810272216796875, "learning_rate": 2.212313940441082e-06, "loss": 0.0543, "step": 571725 }, { "epoch": 5.62, "grad_norm": 1.8207755088806152, "learning_rate": 2.212189817986833e-06, "loss": 0.0769, "step": 571750 }, { "epoch": 5.62, "grad_norm": 5.546588897705078, "learning_rate": 2.212065695532585e-06, "loss": 0.0582, "step": 571775 }, { "epoch": 5.62, "grad_norm": 5.518492698669434, "learning_rate": 2.2119415730783364e-06, "loss": 0.0811, "step": 571800 }, { "epoch": 5.62, "grad_norm": 15.469488143920898, "learning_rate": 2.2118174506240877e-06, "loss": 0.0466, "step": 571825 }, { "epoch": 5.62, "grad_norm": 3.2788474559783936, "learning_rate": 2.2116933281698393e-06, "loss": 0.0966, "step": 571850 }, { "epoch": 5.62, "grad_norm": 10.832390785217285, "learning_rate": 2.211569205715591e-06, "loss": 0.0391, "step": 571875 }, { "epoch": 5.62, "grad_norm": 11.009151458740234, "learning_rate": 2.2114450832613426e-06, "loss": 0.1017, "step": 571900 }, { "epoch": 5.62, "grad_norm": 11.146970748901367, "learning_rate": 2.211320960807094e-06, "loss": 0.06, "step": 571925 }, { "epoch": 5.62, "grad_norm": 8.045110702514648, "learning_rate": 2.211196838352846e-06, "loss": 0.1062, "step": 571950 }, { "epoch": 5.62, "grad_norm": 30.615697860717773, "learning_rate": 2.211072715898597e-06, "loss": 0.0534, "step": 571975 }, { "epoch": 5.62, "grad_norm": 2.267047166824341, "learning_rate": 2.2109485934443487e-06, "loss": 0.092, "step": 572000 }, { "epoch": 5.62, "grad_norm": 4.047514915466309, "learning_rate": 2.2108244709901003e-06, "loss": 0.0373, "step": 572025 }, { "epoch": 5.62, "grad_norm": 4.469048976898193, "learning_rate": 2.2107003485358515e-06, "loss": 0.0753, "step": 572050 }, { "epoch": 5.62, "grad_norm": 16.9541015625, "learning_rate": 2.210576226081603e-06, "loss": 0.0749, "step": 572075 }, { "epoch": 5.62, "grad_norm": 3.3322951793670654, "learning_rate": 2.210452103627355e-06, "loss": 0.0585, "step": 572100 }, { "epoch": 5.63, "grad_norm": 5.240408897399902, "learning_rate": 2.2103279811731064e-06, "loss": 0.0462, "step": 572125 }, { "epoch": 5.63, "grad_norm": 1.1230264902114868, "learning_rate": 2.210203858718858e-06, "loss": 0.0684, "step": 572150 }, { "epoch": 5.63, "grad_norm": 6.8990478515625, "learning_rate": 2.2100797362646097e-06, "loss": 0.0709, "step": 572175 }, { "epoch": 5.63, "grad_norm": 4.1919732093811035, "learning_rate": 2.209955613810361e-06, "loss": 0.0903, "step": 572200 }, { "epoch": 5.63, "grad_norm": 12.02964973449707, "learning_rate": 2.2098314913561125e-06, "loss": 0.0371, "step": 572225 }, { "epoch": 5.63, "grad_norm": 12.747426986694336, "learning_rate": 2.2097073689018638e-06, "loss": 0.0756, "step": 572250 }, { "epoch": 5.63, "grad_norm": 4.7542195320129395, "learning_rate": 2.2095832464476154e-06, "loss": 0.0439, "step": 572275 }, { "epoch": 5.63, "grad_norm": 1.5559507608413696, "learning_rate": 2.209459123993367e-06, "loss": 0.0622, "step": 572300 }, { "epoch": 5.63, "grad_norm": 20.088294982910156, "learning_rate": 2.2093350015391187e-06, "loss": 0.063, "step": 572325 }, { "epoch": 5.63, "grad_norm": 0.1159430742263794, "learning_rate": 2.2092108790848703e-06, "loss": 0.0839, "step": 572350 }, { "epoch": 5.63, "grad_norm": 13.848952293395996, "learning_rate": 2.209086756630622e-06, "loss": 0.0515, "step": 572375 }, { "epoch": 5.63, "grad_norm": 1.179976463317871, "learning_rate": 2.208962634176373e-06, "loss": 0.0747, "step": 572400 }, { "epoch": 5.63, "grad_norm": 7.028043746948242, "learning_rate": 2.2088385117221248e-06, "loss": 0.0518, "step": 572425 }, { "epoch": 5.63, "grad_norm": 8.240962982177734, "learning_rate": 2.2087143892678764e-06, "loss": 0.083, "step": 572450 }, { "epoch": 5.63, "grad_norm": 5.928658962249756, "learning_rate": 2.2085902668136276e-06, "loss": 0.0307, "step": 572475 }, { "epoch": 5.63, "grad_norm": 4.383721351623535, "learning_rate": 2.2084661443593793e-06, "loss": 0.0613, "step": 572500 }, { "epoch": 5.63, "grad_norm": 9.9795560836792, "learning_rate": 2.208342021905131e-06, "loss": 0.0447, "step": 572525 }, { "epoch": 5.63, "grad_norm": 0.13041725754737854, "learning_rate": 2.2082178994508825e-06, "loss": 0.0858, "step": 572550 }, { "epoch": 5.63, "grad_norm": 9.583268165588379, "learning_rate": 2.208093776996634e-06, "loss": 0.0633, "step": 572575 }, { "epoch": 5.63, "grad_norm": 2.42315411567688, "learning_rate": 2.207969654542386e-06, "loss": 0.081, "step": 572600 }, { "epoch": 5.63, "grad_norm": 11.024394989013672, "learning_rate": 2.207845532088137e-06, "loss": 0.0653, "step": 572625 }, { "epoch": 5.63, "grad_norm": 6.291317939758301, "learning_rate": 2.2077214096338886e-06, "loss": 0.1002, "step": 572650 }, { "epoch": 5.63, "grad_norm": 18.9952449798584, "learning_rate": 2.20759728717964e-06, "loss": 0.0587, "step": 572675 }, { "epoch": 5.63, "grad_norm": 4.159104347229004, "learning_rate": 2.2074731647253915e-06, "loss": 0.0849, "step": 572700 }, { "epoch": 5.63, "grad_norm": 13.577937126159668, "learning_rate": 2.207349042271143e-06, "loss": 0.0617, "step": 572725 }, { "epoch": 5.63, "grad_norm": 0.725687563419342, "learning_rate": 2.2072249198168948e-06, "loss": 0.091, "step": 572750 }, { "epoch": 5.63, "grad_norm": 4.885849475860596, "learning_rate": 2.2071007973626464e-06, "loss": 0.034, "step": 572775 }, { "epoch": 5.63, "grad_norm": 0.0673372894525528, "learning_rate": 2.206976674908398e-06, "loss": 0.1021, "step": 572800 }, { "epoch": 5.63, "grad_norm": 7.576930046081543, "learning_rate": 2.2068525524541492e-06, "loss": 0.062, "step": 572825 }, { "epoch": 5.63, "grad_norm": 1.532397985458374, "learning_rate": 2.206728429999901e-06, "loss": 0.1003, "step": 572850 }, { "epoch": 5.63, "grad_norm": 6.995522975921631, "learning_rate": 2.2066043075456525e-06, "loss": 0.0466, "step": 572875 }, { "epoch": 5.63, "grad_norm": 10.394214630126953, "learning_rate": 2.2064801850914037e-06, "loss": 0.0808, "step": 572900 }, { "epoch": 5.63, "grad_norm": 14.634702682495117, "learning_rate": 2.2063560626371554e-06, "loss": 0.0632, "step": 572925 }, { "epoch": 5.63, "grad_norm": 0.050307612866163254, "learning_rate": 2.206231940182907e-06, "loss": 0.086, "step": 572950 }, { "epoch": 5.63, "grad_norm": 2.1558234691619873, "learning_rate": 2.2061078177286586e-06, "loss": 0.0484, "step": 572975 }, { "epoch": 5.63, "grad_norm": 0.29246827960014343, "learning_rate": 2.2059836952744103e-06, "loss": 0.0915, "step": 573000 }, { "epoch": 5.63, "grad_norm": 3.33809494972229, "learning_rate": 2.205859572820162e-06, "loss": 0.0341, "step": 573025 }, { "epoch": 5.63, "grad_norm": 0.060564152896404266, "learning_rate": 2.205735450365913e-06, "loss": 0.0912, "step": 573050 }, { "epoch": 5.63, "grad_norm": 8.416020393371582, "learning_rate": 2.2056113279116647e-06, "loss": 0.0555, "step": 573075 }, { "epoch": 5.63, "grad_norm": 4.572382926940918, "learning_rate": 2.205487205457416e-06, "loss": 0.1246, "step": 573100 }, { "epoch": 5.64, "grad_norm": 3.053922414779663, "learning_rate": 2.2053630830031676e-06, "loss": 0.0516, "step": 573125 }, { "epoch": 5.64, "grad_norm": 0.40711843967437744, "learning_rate": 2.2052389605489192e-06, "loss": 0.0806, "step": 573150 }, { "epoch": 5.64, "grad_norm": 12.299247741699219, "learning_rate": 2.205114838094671e-06, "loss": 0.0686, "step": 573175 }, { "epoch": 5.64, "grad_norm": 0.6101398468017578, "learning_rate": 2.2049907156404225e-06, "loss": 0.0849, "step": 573200 }, { "epoch": 5.64, "grad_norm": 15.350733757019043, "learning_rate": 2.204866593186174e-06, "loss": 0.0443, "step": 573225 }, { "epoch": 5.64, "grad_norm": 2.846172332763672, "learning_rate": 2.2047424707319253e-06, "loss": 0.0565, "step": 573250 }, { "epoch": 5.64, "grad_norm": 14.791572570800781, "learning_rate": 2.204618348277677e-06, "loss": 0.0491, "step": 573275 }, { "epoch": 5.64, "grad_norm": 5.335533618927002, "learning_rate": 2.2044942258234286e-06, "loss": 0.106, "step": 573300 }, { "epoch": 5.64, "grad_norm": 9.376239776611328, "learning_rate": 2.20437010336918e-06, "loss": 0.0565, "step": 573325 }, { "epoch": 5.64, "grad_norm": 1.491368055343628, "learning_rate": 2.2042509458131013e-06, "loss": 0.0722, "step": 573350 }, { "epoch": 5.64, "grad_norm": 9.183905601501465, "learning_rate": 2.204126823358853e-06, "loss": 0.0352, "step": 573375 }, { "epoch": 5.64, "grad_norm": 3.6104989051818848, "learning_rate": 2.2040027009046045e-06, "loss": 0.0588, "step": 573400 }, { "epoch": 5.64, "grad_norm": 14.652301788330078, "learning_rate": 2.203878578450356e-06, "loss": 0.058, "step": 573425 }, { "epoch": 5.64, "grad_norm": 3.32999587059021, "learning_rate": 2.203754455996108e-06, "loss": 0.0826, "step": 573450 }, { "epoch": 5.64, "grad_norm": 13.153775215148926, "learning_rate": 2.2036303335418595e-06, "loss": 0.0461, "step": 573475 }, { "epoch": 5.64, "grad_norm": 0.09876509755849838, "learning_rate": 2.2035062110876107e-06, "loss": 0.094, "step": 573500 }, { "epoch": 5.64, "grad_norm": 12.95981216430664, "learning_rate": 2.2033820886333623e-06, "loss": 0.0432, "step": 573525 }, { "epoch": 5.64, "grad_norm": 6.6186137199401855, "learning_rate": 2.203257966179114e-06, "loss": 0.0449, "step": 573550 }, { "epoch": 5.64, "grad_norm": 17.051359176635742, "learning_rate": 2.203133843724865e-06, "loss": 0.0604, "step": 573575 }, { "epoch": 5.64, "grad_norm": 5.296125411987305, "learning_rate": 2.2030097212706168e-06, "loss": 0.0984, "step": 573600 }, { "epoch": 5.64, "grad_norm": 8.566329956054688, "learning_rate": 2.2028855988163684e-06, "loss": 0.0397, "step": 573625 }, { "epoch": 5.64, "grad_norm": 2.7203481197357178, "learning_rate": 2.20276147636212e-06, "loss": 0.1145, "step": 573650 }, { "epoch": 5.64, "grad_norm": 1.182863473892212, "learning_rate": 2.2026373539078717e-06, "loss": 0.0567, "step": 573675 }, { "epoch": 5.64, "grad_norm": 2.1651082038879395, "learning_rate": 2.202513231453623e-06, "loss": 0.069, "step": 573700 }, { "epoch": 5.64, "grad_norm": 3.483555316925049, "learning_rate": 2.2023891089993745e-06, "loss": 0.0196, "step": 573725 }, { "epoch": 5.64, "grad_norm": 2.3975911140441895, "learning_rate": 2.202264986545126e-06, "loss": 0.0817, "step": 573750 }, { "epoch": 5.64, "grad_norm": 15.454450607299805, "learning_rate": 2.2021408640908774e-06, "loss": 0.0375, "step": 573775 }, { "epoch": 5.64, "grad_norm": 0.28409361839294434, "learning_rate": 2.202016741636629e-06, "loss": 0.0839, "step": 573800 }, { "epoch": 5.64, "grad_norm": 8.34769058227539, "learning_rate": 2.2018926191823806e-06, "loss": 0.0361, "step": 573825 }, { "epoch": 5.64, "grad_norm": 1.8748890161514282, "learning_rate": 2.2017684967281323e-06, "loss": 0.0882, "step": 573850 }, { "epoch": 5.64, "grad_norm": 10.118870735168457, "learning_rate": 2.201644374273884e-06, "loss": 0.0413, "step": 573875 }, { "epoch": 5.64, "grad_norm": 0.24411088228225708, "learning_rate": 2.2015202518196356e-06, "loss": 0.0728, "step": 573900 }, { "epoch": 5.64, "grad_norm": 3.487996816635132, "learning_rate": 2.2013961293653868e-06, "loss": 0.0507, "step": 573925 }, { "epoch": 5.64, "grad_norm": 3.2849037647247314, "learning_rate": 2.2012720069111384e-06, "loss": 0.0818, "step": 573950 }, { "epoch": 5.64, "grad_norm": 7.488863468170166, "learning_rate": 2.20114788445689e-06, "loss": 0.0409, "step": 573975 }, { "epoch": 5.64, "grad_norm": 5.097745895385742, "learning_rate": 2.2010237620026412e-06, "loss": 0.0886, "step": 574000 }, { "epoch": 5.64, "grad_norm": 6.945026397705078, "learning_rate": 2.200899639548393e-06, "loss": 0.0457, "step": 574025 }, { "epoch": 5.64, "grad_norm": 0.11603828519582748, "learning_rate": 2.2007755170941445e-06, "loss": 0.0718, "step": 574050 }, { "epoch": 5.64, "grad_norm": 6.901297569274902, "learning_rate": 2.200651394639896e-06, "loss": 0.0275, "step": 574075 }, { "epoch": 5.64, "grad_norm": 0.7326195240020752, "learning_rate": 2.2005272721856478e-06, "loss": 0.0854, "step": 574100 }, { "epoch": 5.64, "grad_norm": 15.338479042053223, "learning_rate": 2.200403149731399e-06, "loss": 0.0338, "step": 574125 }, { "epoch": 5.65, "grad_norm": 1.4167182445526123, "learning_rate": 2.2002790272771506e-06, "loss": 0.0913, "step": 574150 }, { "epoch": 5.65, "grad_norm": 11.702886581420898, "learning_rate": 2.2001549048229023e-06, "loss": 0.0473, "step": 574175 }, { "epoch": 5.65, "grad_norm": 3.0157017707824707, "learning_rate": 2.200030782368654e-06, "loss": 0.0827, "step": 574200 }, { "epoch": 5.65, "grad_norm": 20.639314651489258, "learning_rate": 2.1999066599144055e-06, "loss": 0.0656, "step": 574225 }, { "epoch": 5.65, "grad_norm": 2.4110381603240967, "learning_rate": 2.199782537460157e-06, "loss": 0.076, "step": 574250 }, { "epoch": 5.65, "grad_norm": 6.737905502319336, "learning_rate": 2.1996584150059084e-06, "loss": 0.053, "step": 574275 }, { "epoch": 5.65, "grad_norm": 0.8996483087539673, "learning_rate": 2.19953429255166e-06, "loss": 0.0729, "step": 574300 }, { "epoch": 5.65, "grad_norm": 12.944454193115234, "learning_rate": 2.1994101700974117e-06, "loss": 0.034, "step": 574325 }, { "epoch": 5.65, "grad_norm": 0.4148953855037689, "learning_rate": 2.199286047643163e-06, "loss": 0.0633, "step": 574350 }, { "epoch": 5.65, "grad_norm": 10.865917205810547, "learning_rate": 2.1991619251889145e-06, "loss": 0.0419, "step": 574375 }, { "epoch": 5.65, "grad_norm": 5.413486003875732, "learning_rate": 2.199037802734666e-06, "loss": 0.0787, "step": 574400 }, { "epoch": 5.65, "grad_norm": 6.60006856918335, "learning_rate": 2.1989136802804178e-06, "loss": 0.0355, "step": 574425 }, { "epoch": 5.65, "grad_norm": 0.16768617928028107, "learning_rate": 2.1987895578261694e-06, "loss": 0.088, "step": 574450 }, { "epoch": 5.65, "grad_norm": 9.553694725036621, "learning_rate": 2.1986654353719206e-06, "loss": 0.0413, "step": 574475 }, { "epoch": 5.65, "grad_norm": 0.06588481366634369, "learning_rate": 2.1985413129176723e-06, "loss": 0.0941, "step": 574500 }, { "epoch": 5.65, "grad_norm": 7.223248481750488, "learning_rate": 2.198417190463424e-06, "loss": 0.0407, "step": 574525 }, { "epoch": 5.65, "grad_norm": 0.3849107623100281, "learning_rate": 2.198293068009175e-06, "loss": 0.0835, "step": 574550 }, { "epoch": 5.65, "grad_norm": 10.717364311218262, "learning_rate": 2.1981689455549267e-06, "loss": 0.0732, "step": 574575 }, { "epoch": 5.65, "grad_norm": 0.3164551854133606, "learning_rate": 2.1980448231006784e-06, "loss": 0.0783, "step": 574600 }, { "epoch": 5.65, "grad_norm": 4.497241973876953, "learning_rate": 2.19792070064643e-06, "loss": 0.0505, "step": 574625 }, { "epoch": 5.65, "grad_norm": 0.49896812438964844, "learning_rate": 2.1977965781921816e-06, "loss": 0.1041, "step": 574650 }, { "epoch": 5.65, "grad_norm": 5.846010684967041, "learning_rate": 2.1976724557379333e-06, "loss": 0.0496, "step": 574675 }, { "epoch": 5.65, "grad_norm": 1.2645015716552734, "learning_rate": 2.1975483332836845e-06, "loss": 0.082, "step": 574700 }, { "epoch": 5.65, "grad_norm": 15.202472686767578, "learning_rate": 2.197424210829436e-06, "loss": 0.0745, "step": 574725 }, { "epoch": 5.65, "grad_norm": 5.364851474761963, "learning_rate": 2.1973000883751878e-06, "loss": 0.0844, "step": 574750 }, { "epoch": 5.65, "grad_norm": 9.61286449432373, "learning_rate": 2.197175965920939e-06, "loss": 0.0494, "step": 574775 }, { "epoch": 5.65, "grad_norm": 8.717530250549316, "learning_rate": 2.1970518434666906e-06, "loss": 0.1171, "step": 574800 }, { "epoch": 5.65, "grad_norm": 15.886240005493164, "learning_rate": 2.1969277210124422e-06, "loss": 0.062, "step": 574825 }, { "epoch": 5.65, "grad_norm": 0.019693350419402122, "learning_rate": 2.196803598558194e-06, "loss": 0.0922, "step": 574850 }, { "epoch": 5.65, "grad_norm": 9.191276550292969, "learning_rate": 2.1966794761039455e-06, "loss": 0.0444, "step": 574875 }, { "epoch": 5.65, "grad_norm": 1.4358632564544678, "learning_rate": 2.1965553536496967e-06, "loss": 0.0659, "step": 574900 }, { "epoch": 5.65, "grad_norm": 11.406815528869629, "learning_rate": 2.1964312311954484e-06, "loss": 0.0543, "step": 574925 }, { "epoch": 5.65, "grad_norm": 0.004921373445540667, "learning_rate": 2.1963071087412e-06, "loss": 0.0838, "step": 574950 }, { "epoch": 5.65, "grad_norm": 22.420095443725586, "learning_rate": 2.196182986286951e-06, "loss": 0.0553, "step": 574975 }, { "epoch": 5.65, "grad_norm": 1.066683053970337, "learning_rate": 2.196058863832703e-06, "loss": 0.0843, "step": 575000 }, { "epoch": 5.65, "grad_norm": 11.141014099121094, "learning_rate": 2.1959347413784545e-06, "loss": 0.0399, "step": 575025 }, { "epoch": 5.65, "grad_norm": 3.866157054901123, "learning_rate": 2.195810618924206e-06, "loss": 0.1017, "step": 575050 }, { "epoch": 5.65, "grad_norm": 10.498461723327637, "learning_rate": 2.1956864964699577e-06, "loss": 0.0629, "step": 575075 }, { "epoch": 5.65, "grad_norm": 0.02398066781461239, "learning_rate": 2.1955623740157094e-06, "loss": 0.072, "step": 575100 }, { "epoch": 5.65, "grad_norm": 7.947684288024902, "learning_rate": 2.1954382515614606e-06, "loss": 0.0491, "step": 575125 }, { "epoch": 5.65, "grad_norm": 0.03187040984630585, "learning_rate": 2.1953141291072122e-06, "loss": 0.09, "step": 575150 }, { "epoch": 5.66, "grad_norm": 11.816845893859863, "learning_rate": 2.195190006652964e-06, "loss": 0.0481, "step": 575175 }, { "epoch": 5.66, "grad_norm": 1.4782794713974, "learning_rate": 2.195065884198715e-06, "loss": 0.0873, "step": 575200 }, { "epoch": 5.66, "grad_norm": 19.258275985717773, "learning_rate": 2.1949417617444667e-06, "loss": 0.0517, "step": 575225 }, { "epoch": 5.66, "grad_norm": 3.618164539337158, "learning_rate": 2.1948176392902183e-06, "loss": 0.0706, "step": 575250 }, { "epoch": 5.66, "grad_norm": 15.744365692138672, "learning_rate": 2.19469351683597e-06, "loss": 0.0552, "step": 575275 }, { "epoch": 5.66, "grad_norm": 3.664872884750366, "learning_rate": 2.1945693943817216e-06, "loss": 0.0981, "step": 575300 }, { "epoch": 5.66, "grad_norm": 16.077604293823242, "learning_rate": 2.194445271927473e-06, "loss": 0.0515, "step": 575325 }, { "epoch": 5.66, "grad_norm": 4.341440677642822, "learning_rate": 2.1943211494732245e-06, "loss": 0.0821, "step": 575350 }, { "epoch": 5.66, "grad_norm": 13.13774585723877, "learning_rate": 2.194197027018976e-06, "loss": 0.0657, "step": 575375 }, { "epoch": 5.66, "grad_norm": 1.2884299755096436, "learning_rate": 2.1940778694628975e-06, "loss": 0.0736, "step": 575400 }, { "epoch": 5.66, "grad_norm": 10.334800720214844, "learning_rate": 2.193953747008649e-06, "loss": 0.0397, "step": 575425 }, { "epoch": 5.66, "grad_norm": 4.285221099853516, "learning_rate": 2.1938296245544004e-06, "loss": 0.0817, "step": 575450 }, { "epoch": 5.66, "grad_norm": 12.445219039916992, "learning_rate": 2.193705502100152e-06, "loss": 0.051, "step": 575475 }, { "epoch": 5.66, "grad_norm": 1.5812119245529175, "learning_rate": 2.1935813796459037e-06, "loss": 0.0898, "step": 575500 }, { "epoch": 5.66, "grad_norm": 16.40320587158203, "learning_rate": 2.1934572571916553e-06, "loss": 0.0594, "step": 575525 }, { "epoch": 5.66, "grad_norm": 0.02626134641468525, "learning_rate": 2.193333134737407e-06, "loss": 0.0752, "step": 575550 }, { "epoch": 5.66, "grad_norm": 12.025358200073242, "learning_rate": 2.193209012283158e-06, "loss": 0.0579, "step": 575575 }, { "epoch": 5.66, "grad_norm": 7.939798831939697, "learning_rate": 2.1930848898289098e-06, "loss": 0.0877, "step": 575600 }, { "epoch": 5.66, "grad_norm": 4.113125801086426, "learning_rate": 2.1929607673746614e-06, "loss": 0.0487, "step": 575625 }, { "epoch": 5.66, "grad_norm": 0.07615206390619278, "learning_rate": 2.1928366449204126e-06, "loss": 0.0747, "step": 575650 }, { "epoch": 5.66, "grad_norm": 5.556502342224121, "learning_rate": 2.1927125224661643e-06, "loss": 0.0411, "step": 575675 }, { "epoch": 5.66, "grad_norm": 0.04113546013832092, "learning_rate": 2.192588400011916e-06, "loss": 0.0794, "step": 575700 }, { "epoch": 5.66, "grad_norm": 9.461883544921875, "learning_rate": 2.1924642775576675e-06, "loss": 0.0448, "step": 575725 }, { "epoch": 5.66, "grad_norm": 1.7447973489761353, "learning_rate": 2.192340155103419e-06, "loss": 0.0917, "step": 575750 }, { "epoch": 5.66, "grad_norm": 4.906871795654297, "learning_rate": 2.192216032649171e-06, "loss": 0.0371, "step": 575775 }, { "epoch": 5.66, "grad_norm": 5.664839744567871, "learning_rate": 2.192091910194922e-06, "loss": 0.0776, "step": 575800 }, { "epoch": 5.66, "grad_norm": 14.726842880249023, "learning_rate": 2.1919677877406736e-06, "loss": 0.0518, "step": 575825 }, { "epoch": 5.66, "grad_norm": 0.4412800967693329, "learning_rate": 2.1918436652864253e-06, "loss": 0.0618, "step": 575850 }, { "epoch": 5.66, "grad_norm": 12.009736061096191, "learning_rate": 2.1917195428321765e-06, "loss": 0.0541, "step": 575875 }, { "epoch": 5.66, "grad_norm": 0.4297427833080292, "learning_rate": 2.191595420377928e-06, "loss": 0.0743, "step": 575900 }, { "epoch": 5.66, "grad_norm": 10.621614456176758, "learning_rate": 2.1914712979236798e-06, "loss": 0.0499, "step": 575925 }, { "epoch": 5.66, "grad_norm": 5.844671726226807, "learning_rate": 2.1913471754694314e-06, "loss": 0.1193, "step": 575950 }, { "epoch": 5.66, "grad_norm": 9.58802604675293, "learning_rate": 2.191223053015183e-06, "loss": 0.0623, "step": 575975 }, { "epoch": 5.66, "grad_norm": 1.1150248050689697, "learning_rate": 2.1910989305609342e-06, "loss": 0.0816, "step": 576000 }, { "epoch": 5.66, "grad_norm": 6.631799697875977, "learning_rate": 2.190974808106686e-06, "loss": 0.0613, "step": 576025 }, { "epoch": 5.66, "grad_norm": 7.3627729415893555, "learning_rate": 2.1908506856524375e-06, "loss": 0.1054, "step": 576050 }, { "epoch": 5.66, "grad_norm": 5.267489433288574, "learning_rate": 2.1907265631981887e-06, "loss": 0.0497, "step": 576075 }, { "epoch": 5.66, "grad_norm": 0.7818328738212585, "learning_rate": 2.1906024407439404e-06, "loss": 0.0937, "step": 576100 }, { "epoch": 5.66, "grad_norm": 16.118703842163086, "learning_rate": 2.190478318289692e-06, "loss": 0.0682, "step": 576125 }, { "epoch": 5.66, "grad_norm": 3.92922306060791, "learning_rate": 2.1903541958354436e-06, "loss": 0.0979, "step": 576150 }, { "epoch": 5.67, "grad_norm": 17.816556930541992, "learning_rate": 2.1902300733811953e-06, "loss": 0.0518, "step": 576175 }, { "epoch": 5.67, "grad_norm": 4.862457752227783, "learning_rate": 2.190105950926947e-06, "loss": 0.0944, "step": 576200 }, { "epoch": 5.67, "grad_norm": 13.346150398254395, "learning_rate": 2.189981828472698e-06, "loss": 0.066, "step": 576225 }, { "epoch": 5.67, "grad_norm": 4.551697731018066, "learning_rate": 2.1898577060184497e-06, "loss": 0.094, "step": 576250 }, { "epoch": 5.67, "grad_norm": 10.36080551147461, "learning_rate": 2.1897335835642014e-06, "loss": 0.0438, "step": 576275 }, { "epoch": 5.67, "grad_norm": 1.4320287704467773, "learning_rate": 2.1896094611099526e-06, "loss": 0.0966, "step": 576300 }, { "epoch": 5.67, "grad_norm": 14.812054634094238, "learning_rate": 2.1894853386557042e-06, "loss": 0.0563, "step": 576325 }, { "epoch": 5.67, "grad_norm": 2.1300103664398193, "learning_rate": 2.189361216201456e-06, "loss": 0.0978, "step": 576350 }, { "epoch": 5.67, "grad_norm": 16.049945831298828, "learning_rate": 2.1892370937472075e-06, "loss": 0.0519, "step": 576375 }, { "epoch": 5.67, "grad_norm": 2.004049301147461, "learning_rate": 2.189112971292959e-06, "loss": 0.095, "step": 576400 }, { "epoch": 5.67, "grad_norm": 5.079681396484375, "learning_rate": 2.1889888488387103e-06, "loss": 0.0659, "step": 576425 }, { "epoch": 5.67, "grad_norm": 1.5398005247116089, "learning_rate": 2.188864726384462e-06, "loss": 0.0715, "step": 576450 }, { "epoch": 5.67, "grad_norm": 24.54189109802246, "learning_rate": 2.1887406039302136e-06, "loss": 0.0519, "step": 576475 }, { "epoch": 5.67, "grad_norm": 3.8880844116210938, "learning_rate": 2.188616481475965e-06, "loss": 0.0658, "step": 576500 }, { "epoch": 5.67, "grad_norm": 9.847679138183594, "learning_rate": 2.1884923590217165e-06, "loss": 0.0413, "step": 576525 }, { "epoch": 5.67, "grad_norm": 0.4186227023601532, "learning_rate": 2.188368236567468e-06, "loss": 0.0938, "step": 576550 }, { "epoch": 5.67, "grad_norm": 8.142600059509277, "learning_rate": 2.1882441141132197e-06, "loss": 0.0376, "step": 576575 }, { "epoch": 5.67, "grad_norm": 3.444373607635498, "learning_rate": 2.1881199916589714e-06, "loss": 0.0954, "step": 576600 }, { "epoch": 5.67, "grad_norm": 11.581429481506348, "learning_rate": 2.187995869204723e-06, "loss": 0.0429, "step": 576625 }, { "epoch": 5.67, "grad_norm": 3.827936887741089, "learning_rate": 2.187871746750474e-06, "loss": 0.0942, "step": 576650 }, { "epoch": 5.67, "grad_norm": 9.74930191040039, "learning_rate": 2.187747624296226e-06, "loss": 0.0445, "step": 576675 }, { "epoch": 5.67, "grad_norm": 0.17721989750862122, "learning_rate": 2.1876235018419775e-06, "loss": 0.078, "step": 576700 }, { "epoch": 5.67, "grad_norm": 10.562043190002441, "learning_rate": 2.1874993793877287e-06, "loss": 0.0586, "step": 576725 }, { "epoch": 5.67, "grad_norm": 1.9577980041503906, "learning_rate": 2.1873752569334803e-06, "loss": 0.0741, "step": 576750 }, { "epoch": 5.67, "grad_norm": 10.47189712524414, "learning_rate": 2.187251134479232e-06, "loss": 0.0452, "step": 576775 }, { "epoch": 5.67, "grad_norm": 0.629959762096405, "learning_rate": 2.1871270120249836e-06, "loss": 0.0704, "step": 576800 }, { "epoch": 5.67, "grad_norm": 17.098094940185547, "learning_rate": 2.1870028895707352e-06, "loss": 0.0457, "step": 576825 }, { "epoch": 5.67, "grad_norm": 3.4909160137176514, "learning_rate": 2.1868787671164864e-06, "loss": 0.0623, "step": 576850 }, { "epoch": 5.67, "grad_norm": 7.2987799644470215, "learning_rate": 2.186754644662238e-06, "loss": 0.0365, "step": 576875 }, { "epoch": 5.67, "grad_norm": 1.2516827583312988, "learning_rate": 2.1866305222079897e-06, "loss": 0.0681, "step": 576900 }, { "epoch": 5.67, "grad_norm": 16.575029373168945, "learning_rate": 2.186506399753741e-06, "loss": 0.0501, "step": 576925 }, { "epoch": 5.67, "grad_norm": 2.3098304271698, "learning_rate": 2.1863822772994926e-06, "loss": 0.1017, "step": 576950 }, { "epoch": 5.67, "grad_norm": 11.336383819580078, "learning_rate": 2.186258154845244e-06, "loss": 0.052, "step": 576975 }, { "epoch": 5.67, "grad_norm": 4.221768856048584, "learning_rate": 2.186134032390996e-06, "loss": 0.1216, "step": 577000 }, { "epoch": 5.67, "grad_norm": 14.782092094421387, "learning_rate": 2.1860099099367475e-06, "loss": 0.0384, "step": 577025 }, { "epoch": 5.67, "grad_norm": 2.4499189853668213, "learning_rate": 2.185885787482499e-06, "loss": 0.09, "step": 577050 }, { "epoch": 5.67, "grad_norm": 18.557458877563477, "learning_rate": 2.1857616650282503e-06, "loss": 0.0614, "step": 577075 }, { "epoch": 5.67, "grad_norm": 0.21733708679676056, "learning_rate": 2.185637542574002e-06, "loss": 0.094, "step": 577100 }, { "epoch": 5.67, "grad_norm": 13.537081718444824, "learning_rate": 2.1855134201197536e-06, "loss": 0.0511, "step": 577125 }, { "epoch": 5.67, "grad_norm": 0.6103794574737549, "learning_rate": 2.185389297665505e-06, "loss": 0.1019, "step": 577150 }, { "epoch": 5.67, "grad_norm": 2.0018084049224854, "learning_rate": 2.185265175211257e-06, "loss": 0.075, "step": 577175 }, { "epoch": 5.68, "grad_norm": 5.813149929046631, "learning_rate": 2.185141052757008e-06, "loss": 0.0848, "step": 577200 }, { "epoch": 5.68, "grad_norm": 5.116739749908447, "learning_rate": 2.1850169303027597e-06, "loss": 0.0442, "step": 577225 }, { "epoch": 5.68, "grad_norm": 0.36260396242141724, "learning_rate": 2.1848928078485113e-06, "loss": 0.076, "step": 577250 }, { "epoch": 5.68, "grad_norm": 15.254708290100098, "learning_rate": 2.1847686853942625e-06, "loss": 0.0807, "step": 577275 }, { "epoch": 5.68, "grad_norm": 0.26814135909080505, "learning_rate": 2.184644562940014e-06, "loss": 0.0817, "step": 577300 }, { "epoch": 5.68, "grad_norm": 8.59585952758789, "learning_rate": 2.184520440485766e-06, "loss": 0.0577, "step": 577325 }, { "epoch": 5.68, "grad_norm": 4.630189418792725, "learning_rate": 2.1843963180315174e-06, "loss": 0.0902, "step": 577350 }, { "epoch": 5.68, "grad_norm": 5.552585124969482, "learning_rate": 2.184272195577269e-06, "loss": 0.0586, "step": 577375 }, { "epoch": 5.68, "grad_norm": 3.528244733810425, "learning_rate": 2.1841480731230207e-06, "loss": 0.0885, "step": 577400 }, { "epoch": 5.68, "grad_norm": 11.30410385131836, "learning_rate": 2.184023950668772e-06, "loss": 0.0592, "step": 577425 }, { "epoch": 5.68, "grad_norm": 1.0136014223098755, "learning_rate": 2.1838998282145236e-06, "loss": 0.071, "step": 577450 }, { "epoch": 5.68, "grad_norm": 7.308492183685303, "learning_rate": 2.183775705760275e-06, "loss": 0.0374, "step": 577475 }, { "epoch": 5.68, "grad_norm": 0.6833376288414001, "learning_rate": 2.1836565482041966e-06, "loss": 0.0955, "step": 577500 }, { "epoch": 5.68, "grad_norm": 4.92172908782959, "learning_rate": 2.183532425749948e-06, "loss": 0.0619, "step": 577525 }, { "epoch": 5.68, "grad_norm": 0.3468813896179199, "learning_rate": 2.1834083032956995e-06, "loss": 0.075, "step": 577550 }, { "epoch": 5.68, "grad_norm": 11.966812133789062, "learning_rate": 2.183284180841451e-06, "loss": 0.0666, "step": 577575 }, { "epoch": 5.68, "grad_norm": 9.887511253356934, "learning_rate": 2.1831600583872028e-06, "loss": 0.0735, "step": 577600 }, { "epoch": 5.68, "grad_norm": 9.611433029174805, "learning_rate": 2.1830359359329544e-06, "loss": 0.0532, "step": 577625 }, { "epoch": 5.68, "grad_norm": 1.0313389301300049, "learning_rate": 2.182911813478706e-06, "loss": 0.0611, "step": 577650 }, { "epoch": 5.68, "grad_norm": 12.772706031799316, "learning_rate": 2.1827876910244572e-06, "loss": 0.0414, "step": 577675 }, { "epoch": 5.68, "grad_norm": 5.085030555725098, "learning_rate": 2.182663568570209e-06, "loss": 0.0977, "step": 577700 }, { "epoch": 5.68, "grad_norm": 10.378107070922852, "learning_rate": 2.1825394461159605e-06, "loss": 0.0383, "step": 577725 }, { "epoch": 5.68, "grad_norm": 8.174460411071777, "learning_rate": 2.1824153236617117e-06, "loss": 0.1087, "step": 577750 }, { "epoch": 5.68, "grad_norm": 8.671889305114746, "learning_rate": 2.1822912012074634e-06, "loss": 0.0602, "step": 577775 }, { "epoch": 5.68, "grad_norm": 2.776129961013794, "learning_rate": 2.182167078753215e-06, "loss": 0.0768, "step": 577800 }, { "epoch": 5.68, "grad_norm": 9.294960975646973, "learning_rate": 2.1820429562989666e-06, "loss": 0.0518, "step": 577825 }, { "epoch": 5.68, "grad_norm": 0.1689811497926712, "learning_rate": 2.1819188338447183e-06, "loss": 0.0942, "step": 577850 }, { "epoch": 5.68, "grad_norm": 14.039353370666504, "learning_rate": 2.1817947113904695e-06, "loss": 0.0504, "step": 577875 }, { "epoch": 5.68, "grad_norm": 3.1149678230285645, "learning_rate": 2.181670588936221e-06, "loss": 0.0929, "step": 577900 }, { "epoch": 5.68, "grad_norm": 12.417315483093262, "learning_rate": 2.1815464664819727e-06, "loss": 0.0549, "step": 577925 }, { "epoch": 5.68, "grad_norm": 0.9075329899787903, "learning_rate": 2.181422344027724e-06, "loss": 0.0847, "step": 577950 }, { "epoch": 5.68, "grad_norm": 15.069354057312012, "learning_rate": 2.1812982215734756e-06, "loss": 0.0582, "step": 577975 }, { "epoch": 5.68, "grad_norm": 1.5615342855453491, "learning_rate": 2.1811740991192272e-06, "loss": 0.075, "step": 578000 }, { "epoch": 5.68, "grad_norm": 12.86404037475586, "learning_rate": 2.181049976664979e-06, "loss": 0.0519, "step": 578025 }, { "epoch": 5.68, "grad_norm": 5.554650783538818, "learning_rate": 2.1809258542107305e-06, "loss": 0.0811, "step": 578050 }, { "epoch": 5.68, "grad_norm": 12.410323143005371, "learning_rate": 2.180801731756482e-06, "loss": 0.0455, "step": 578075 }, { "epoch": 5.68, "grad_norm": 6.0696234703063965, "learning_rate": 2.1806776093022333e-06, "loss": 0.0777, "step": 578100 }, { "epoch": 5.68, "grad_norm": 8.692122459411621, "learning_rate": 2.180553486847985e-06, "loss": 0.0486, "step": 578125 }, { "epoch": 5.68, "grad_norm": 1.553469181060791, "learning_rate": 2.1804293643937366e-06, "loss": 0.0916, "step": 578150 }, { "epoch": 5.68, "grad_norm": 3.538825273513794, "learning_rate": 2.180305241939488e-06, "loss": 0.0407, "step": 578175 }, { "epoch": 5.68, "grad_norm": 0.4348973035812378, "learning_rate": 2.1801811194852395e-06, "loss": 0.1042, "step": 578200 }, { "epoch": 5.69, "grad_norm": 5.059473991394043, "learning_rate": 2.180056997030991e-06, "loss": 0.054, "step": 578225 }, { "epoch": 5.69, "grad_norm": 7.475003719329834, "learning_rate": 2.1799328745767427e-06, "loss": 0.0713, "step": 578250 }, { "epoch": 5.69, "grad_norm": 12.763415336608887, "learning_rate": 2.1798087521224944e-06, "loss": 0.0639, "step": 578275 }, { "epoch": 5.69, "grad_norm": 3.60172176361084, "learning_rate": 2.1796846296682456e-06, "loss": 0.0905, "step": 578300 }, { "epoch": 5.69, "grad_norm": 13.667933464050293, "learning_rate": 2.1795605072139972e-06, "loss": 0.0444, "step": 578325 }, { "epoch": 5.69, "grad_norm": 6.929892063140869, "learning_rate": 2.179436384759749e-06, "loss": 0.074, "step": 578350 }, { "epoch": 5.69, "grad_norm": 13.989622116088867, "learning_rate": 2.1793122623055e-06, "loss": 0.0887, "step": 578375 }, { "epoch": 5.69, "grad_norm": 6.192164421081543, "learning_rate": 2.1791881398512517e-06, "loss": 0.1022, "step": 578400 }, { "epoch": 5.69, "grad_norm": 13.865365982055664, "learning_rate": 2.1790640173970033e-06, "loss": 0.0518, "step": 578425 }, { "epoch": 5.69, "grad_norm": 4.5972394943237305, "learning_rate": 2.178939894942755e-06, "loss": 0.0931, "step": 578450 }, { "epoch": 5.69, "grad_norm": 10.032458305358887, "learning_rate": 2.1788157724885066e-06, "loss": 0.056, "step": 578475 }, { "epoch": 5.69, "grad_norm": 2.2632393836975098, "learning_rate": 2.1786916500342582e-06, "loss": 0.083, "step": 578500 }, { "epoch": 5.69, "grad_norm": 14.734326362609863, "learning_rate": 2.1785675275800094e-06, "loss": 0.0576, "step": 578525 }, { "epoch": 5.69, "grad_norm": 2.359508991241455, "learning_rate": 2.178443405125761e-06, "loss": 0.0947, "step": 578550 }, { "epoch": 5.69, "grad_norm": 9.792121887207031, "learning_rate": 2.1783192826715127e-06, "loss": 0.0494, "step": 578575 }, { "epoch": 5.69, "grad_norm": 14.743351936340332, "learning_rate": 2.178195160217264e-06, "loss": 0.083, "step": 578600 }, { "epoch": 5.69, "grad_norm": 8.011573791503906, "learning_rate": 2.1780710377630156e-06, "loss": 0.0655, "step": 578625 }, { "epoch": 5.69, "grad_norm": 2.6191744804382324, "learning_rate": 2.177946915308767e-06, "loss": 0.0777, "step": 578650 }, { "epoch": 5.69, "grad_norm": 9.43694019317627, "learning_rate": 2.177822792854519e-06, "loss": 0.0554, "step": 578675 }, { "epoch": 5.69, "grad_norm": 0.9287886619567871, "learning_rate": 2.1776986704002705e-06, "loss": 0.0946, "step": 578700 }, { "epoch": 5.69, "grad_norm": 9.379849433898926, "learning_rate": 2.1775745479460217e-06, "loss": 0.0707, "step": 578725 }, { "epoch": 5.69, "grad_norm": 1.2793326377868652, "learning_rate": 2.1774504254917733e-06, "loss": 0.0826, "step": 578750 }, { "epoch": 5.69, "grad_norm": 5.565031051635742, "learning_rate": 2.177326303037525e-06, "loss": 0.0392, "step": 578775 }, { "epoch": 5.69, "grad_norm": 3.982121467590332, "learning_rate": 2.177202180583276e-06, "loss": 0.0899, "step": 578800 }, { "epoch": 5.69, "grad_norm": 13.669602394104004, "learning_rate": 2.177078058129028e-06, "loss": 0.0531, "step": 578825 }, { "epoch": 5.69, "grad_norm": 1.7209782600402832, "learning_rate": 2.1769539356747794e-06, "loss": 0.0942, "step": 578850 }, { "epoch": 5.69, "grad_norm": 7.086721897125244, "learning_rate": 2.176829813220531e-06, "loss": 0.0518, "step": 578875 }, { "epoch": 5.69, "grad_norm": 0.32261818647384644, "learning_rate": 2.1767056907662827e-06, "loss": 0.0829, "step": 578900 }, { "epoch": 5.69, "grad_norm": 9.872212409973145, "learning_rate": 2.1765815683120343e-06, "loss": 0.0687, "step": 578925 }, { "epoch": 5.69, "grad_norm": 3.9672658443450928, "learning_rate": 2.1764574458577855e-06, "loss": 0.0915, "step": 578950 }, { "epoch": 5.69, "grad_norm": 8.928106307983398, "learning_rate": 2.176333323403537e-06, "loss": 0.0592, "step": 578975 }, { "epoch": 5.69, "grad_norm": 9.905406951904297, "learning_rate": 2.176209200949289e-06, "loss": 0.0819, "step": 579000 }, { "epoch": 5.69, "grad_norm": 15.342670440673828, "learning_rate": 2.17608507849504e-06, "loss": 0.0487, "step": 579025 }, { "epoch": 5.69, "grad_norm": 0.9836016893386841, "learning_rate": 2.1759609560407917e-06, "loss": 0.0912, "step": 579050 }, { "epoch": 5.69, "grad_norm": 9.359463691711426, "learning_rate": 2.1758368335865433e-06, "loss": 0.0431, "step": 579075 }, { "epoch": 5.69, "grad_norm": 5.330907344818115, "learning_rate": 2.175712711132295e-06, "loss": 0.0905, "step": 579100 }, { "epoch": 5.69, "grad_norm": 14.934450149536133, "learning_rate": 2.1755885886780466e-06, "loss": 0.0617, "step": 579125 }, { "epoch": 5.69, "grad_norm": 5.349994659423828, "learning_rate": 2.1754644662237978e-06, "loss": 0.0865, "step": 579150 }, { "epoch": 5.69, "grad_norm": 12.162711143493652, "learning_rate": 2.1753403437695494e-06, "loss": 0.0631, "step": 579175 }, { "epoch": 5.69, "grad_norm": 2.898239850997925, "learning_rate": 2.175216221315301e-06, "loss": 0.0858, "step": 579200 }, { "epoch": 5.7, "grad_norm": 16.45186996459961, "learning_rate": 2.1750920988610523e-06, "loss": 0.0319, "step": 579225 }, { "epoch": 5.7, "grad_norm": 1.1162757873535156, "learning_rate": 2.174967976406804e-06, "loss": 0.0784, "step": 579250 }, { "epoch": 5.7, "grad_norm": 5.681748390197754, "learning_rate": 2.1748438539525555e-06, "loss": 0.0636, "step": 579275 }, { "epoch": 5.7, "grad_norm": 0.14197896420955658, "learning_rate": 2.174719731498307e-06, "loss": 0.0779, "step": 579300 }, { "epoch": 5.7, "grad_norm": 7.724573612213135, "learning_rate": 2.174595609044059e-06, "loss": 0.0593, "step": 579325 }, { "epoch": 5.7, "grad_norm": 4.539895057678223, "learning_rate": 2.1744714865898104e-06, "loss": 0.118, "step": 579350 }, { "epoch": 5.7, "grad_norm": 13.481059074401855, "learning_rate": 2.1743473641355616e-06, "loss": 0.0561, "step": 579375 }, { "epoch": 5.7, "grad_norm": 3.5562479496002197, "learning_rate": 2.1742232416813133e-06, "loss": 0.0968, "step": 579400 }, { "epoch": 5.7, "grad_norm": 16.960792541503906, "learning_rate": 2.174099119227065e-06, "loss": 0.066, "step": 579425 }, { "epoch": 5.7, "grad_norm": 0.6181586384773254, "learning_rate": 2.173974996772816e-06, "loss": 0.0649, "step": 579450 }, { "epoch": 5.7, "grad_norm": 14.033979415893555, "learning_rate": 2.1738508743185678e-06, "loss": 0.0545, "step": 579475 }, { "epoch": 5.7, "grad_norm": 1.0228557586669922, "learning_rate": 2.1737267518643194e-06, "loss": 0.0612, "step": 579500 }, { "epoch": 5.7, "grad_norm": 5.895701885223389, "learning_rate": 2.173602629410071e-06, "loss": 0.0622, "step": 579525 }, { "epoch": 5.7, "grad_norm": 6.4358134269714355, "learning_rate": 2.1734785069558227e-06, "loss": 0.083, "step": 579550 }, { "epoch": 5.7, "grad_norm": 13.339059829711914, "learning_rate": 2.173354384501574e-06, "loss": 0.0358, "step": 579575 }, { "epoch": 5.7, "grad_norm": 3.696242094039917, "learning_rate": 2.1732302620473255e-06, "loss": 0.0966, "step": 579600 }, { "epoch": 5.7, "grad_norm": 3.361057758331299, "learning_rate": 2.173106139593077e-06, "loss": 0.0405, "step": 579625 }, { "epoch": 5.7, "grad_norm": 0.08067913353443146, "learning_rate": 2.1729820171388284e-06, "loss": 0.0871, "step": 579650 }, { "epoch": 5.7, "grad_norm": 12.687137603759766, "learning_rate": 2.17285789468458e-06, "loss": 0.0763, "step": 579675 }, { "epoch": 5.7, "grad_norm": 1.2555103302001953, "learning_rate": 2.1727387371285014e-06, "loss": 0.1083, "step": 579700 }, { "epoch": 5.7, "grad_norm": 12.985756874084473, "learning_rate": 2.172614614674253e-06, "loss": 0.0411, "step": 579725 }, { "epoch": 5.7, "grad_norm": 3.648064374923706, "learning_rate": 2.1724904922200047e-06, "loss": 0.0789, "step": 579750 }, { "epoch": 5.7, "grad_norm": 9.8794584274292, "learning_rate": 2.1723663697657564e-06, "loss": 0.0526, "step": 579775 }, { "epoch": 5.7, "grad_norm": 0.1253177672624588, "learning_rate": 2.172242247311508e-06, "loss": 0.0702, "step": 579800 }, { "epoch": 5.7, "grad_norm": 11.547198295593262, "learning_rate": 2.172118124857259e-06, "loss": 0.044, "step": 579825 }, { "epoch": 5.7, "grad_norm": 4.375365734100342, "learning_rate": 2.171994002403011e-06, "loss": 0.0826, "step": 579850 }, { "epoch": 5.7, "grad_norm": 13.884828567504883, "learning_rate": 2.1718698799487625e-06, "loss": 0.0536, "step": 579875 }, { "epoch": 5.7, "grad_norm": 0.9604523777961731, "learning_rate": 2.1717457574945137e-06, "loss": 0.0842, "step": 579900 }, { "epoch": 5.7, "grad_norm": 5.712193965911865, "learning_rate": 2.1716216350402653e-06, "loss": 0.0475, "step": 579925 }, { "epoch": 5.7, "grad_norm": 6.735917091369629, "learning_rate": 2.171497512586017e-06, "loss": 0.0749, "step": 579950 }, { "epoch": 5.7, "grad_norm": 10.038034439086914, "learning_rate": 2.1713733901317686e-06, "loss": 0.0445, "step": 579975 }, { "epoch": 5.7, "grad_norm": 8.618919372558594, "learning_rate": 2.1712492676775202e-06, "loss": 0.0815, "step": 580000 }, { "epoch": 5.7, "eval_loss": 0.7635440826416016, "eval_runtime": 6068.2224, "eval_samples_per_second": 1.56, "eval_steps_per_second": 0.195, "eval_wer": 0.11454644949462435, "step": 580000 }, { "epoch": 5.7, "grad_norm": 4.86414098739624, "learning_rate": 2.171125145223272e-06, "loss": 0.0425, "step": 580025 }, { "epoch": 5.7, "grad_norm": 7.570699214935303, "learning_rate": 2.171001022769023e-06, "loss": 0.0884, "step": 580050 }, { "epoch": 5.7, "grad_norm": 10.807049751281738, "learning_rate": 2.1708769003147747e-06, "loss": 0.0464, "step": 580075 }, { "epoch": 5.7, "grad_norm": 0.17794711887836456, "learning_rate": 2.1707527778605263e-06, "loss": 0.0865, "step": 580100 }, { "epoch": 5.7, "grad_norm": 9.187962532043457, "learning_rate": 2.170628655406278e-06, "loss": 0.0456, "step": 580125 }, { "epoch": 5.7, "grad_norm": 1.8884321451187134, "learning_rate": 2.1705045329520296e-06, "loss": 0.0734, "step": 580150 }, { "epoch": 5.7, "grad_norm": 13.118922233581543, "learning_rate": 2.170380410497781e-06, "loss": 0.0431, "step": 580175 }, { "epoch": 5.7, "grad_norm": 3.9860012531280518, "learning_rate": 2.1702562880435325e-06, "loss": 0.0872, "step": 580200 }, { "epoch": 5.7, "grad_norm": 14.161953926086426, "learning_rate": 2.170132165589284e-06, "loss": 0.0599, "step": 580225 }, { "epoch": 5.71, "grad_norm": 1.972804307937622, "learning_rate": 2.1700080431350353e-06, "loss": 0.0836, "step": 580250 }, { "epoch": 5.71, "grad_norm": 7.930309295654297, "learning_rate": 2.169883920680787e-06, "loss": 0.0432, "step": 580275 }, { "epoch": 5.71, "grad_norm": 6.7859907150268555, "learning_rate": 2.1697597982265386e-06, "loss": 0.1106, "step": 580300 }, { "epoch": 5.71, "grad_norm": 4.188480377197266, "learning_rate": 2.16963567577229e-06, "loss": 0.053, "step": 580325 }, { "epoch": 5.71, "grad_norm": 5.348471641540527, "learning_rate": 2.169511553318042e-06, "loss": 0.0769, "step": 580350 }, { "epoch": 5.71, "grad_norm": 4.157258033752441, "learning_rate": 2.1693874308637935e-06, "loss": 0.0631, "step": 580375 }, { "epoch": 5.71, "grad_norm": 1.683214545249939, "learning_rate": 2.1692633084095447e-06, "loss": 0.0683, "step": 580400 }, { "epoch": 5.71, "grad_norm": 14.012357711791992, "learning_rate": 2.1691391859552963e-06, "loss": 0.0442, "step": 580425 }, { "epoch": 5.71, "grad_norm": 3.639909267425537, "learning_rate": 2.169015063501048e-06, "loss": 0.084, "step": 580450 }, { "epoch": 5.71, "grad_norm": 5.404869079589844, "learning_rate": 2.168890941046799e-06, "loss": 0.0535, "step": 580475 }, { "epoch": 5.71, "grad_norm": 2.589984893798828, "learning_rate": 2.168766818592551e-06, "loss": 0.0647, "step": 580500 }, { "epoch": 5.71, "grad_norm": 71.3851547241211, "learning_rate": 2.1686426961383024e-06, "loss": 0.0728, "step": 580525 }, { "epoch": 5.71, "grad_norm": 0.007824946194887161, "learning_rate": 2.168518573684054e-06, "loss": 0.09, "step": 580550 }, { "epoch": 5.71, "grad_norm": 18.513708114624023, "learning_rate": 2.1683944512298057e-06, "loss": 0.0504, "step": 580575 }, { "epoch": 5.71, "grad_norm": 0.5733749866485596, "learning_rate": 2.168270328775557e-06, "loss": 0.0656, "step": 580600 }, { "epoch": 5.71, "grad_norm": 8.469169616699219, "learning_rate": 2.1681462063213086e-06, "loss": 0.0419, "step": 580625 }, { "epoch": 5.71, "grad_norm": 3.368940591812134, "learning_rate": 2.16802208386706e-06, "loss": 0.0622, "step": 580650 }, { "epoch": 5.71, "grad_norm": 12.619842529296875, "learning_rate": 2.1678979614128114e-06, "loss": 0.0625, "step": 580675 }, { "epoch": 5.71, "grad_norm": 0.09009427577257156, "learning_rate": 2.167773838958563e-06, "loss": 0.1157, "step": 580700 }, { "epoch": 5.71, "grad_norm": 15.211749076843262, "learning_rate": 2.1676497165043147e-06, "loss": 0.0723, "step": 580725 }, { "epoch": 5.71, "grad_norm": 3.9606432914733887, "learning_rate": 2.1675255940500663e-06, "loss": 0.0816, "step": 580750 }, { "epoch": 5.71, "grad_norm": 8.08758544921875, "learning_rate": 2.167401471595818e-06, "loss": 0.0303, "step": 580775 }, { "epoch": 5.71, "grad_norm": 0.24422980844974518, "learning_rate": 2.1672773491415696e-06, "loss": 0.0746, "step": 580800 }, { "epoch": 5.71, "grad_norm": 10.388087272644043, "learning_rate": 2.1671532266873208e-06, "loss": 0.0447, "step": 580825 }, { "epoch": 5.71, "grad_norm": 7.03493070602417, "learning_rate": 2.1670291042330724e-06, "loss": 0.1005, "step": 580850 }, { "epoch": 5.71, "grad_norm": 6.359462261199951, "learning_rate": 2.166904981778824e-06, "loss": 0.0506, "step": 580875 }, { "epoch": 5.71, "grad_norm": 0.03605649992823601, "learning_rate": 2.1667808593245753e-06, "loss": 0.0743, "step": 580900 }, { "epoch": 5.71, "grad_norm": 5.505781650543213, "learning_rate": 2.166656736870327e-06, "loss": 0.0431, "step": 580925 }, { "epoch": 5.71, "grad_norm": 3.4572532176971436, "learning_rate": 2.1665326144160785e-06, "loss": 0.0647, "step": 580950 }, { "epoch": 5.71, "grad_norm": 18.038606643676758, "learning_rate": 2.16640849196183e-06, "loss": 0.0605, "step": 580975 }, { "epoch": 5.71, "grad_norm": 1.0624403953552246, "learning_rate": 2.166284369507582e-06, "loss": 0.0656, "step": 581000 }, { "epoch": 5.71, "grad_norm": 18.991016387939453, "learning_rate": 2.166160247053333e-06, "loss": 0.0404, "step": 581025 }, { "epoch": 5.71, "grad_norm": 0.11248928308486938, "learning_rate": 2.1660361245990847e-06, "loss": 0.0699, "step": 581050 }, { "epoch": 5.71, "grad_norm": 12.377443313598633, "learning_rate": 2.1659120021448363e-06, "loss": 0.053, "step": 581075 }, { "epoch": 5.71, "grad_norm": 0.9013954997062683, "learning_rate": 2.1657878796905875e-06, "loss": 0.0797, "step": 581100 }, { "epoch": 5.71, "grad_norm": 2.5920088291168213, "learning_rate": 2.165663757236339e-06, "loss": 0.0658, "step": 581125 }, { "epoch": 5.71, "grad_norm": 0.7863203883171082, "learning_rate": 2.1655396347820908e-06, "loss": 0.0643, "step": 581150 }, { "epoch": 5.71, "grad_norm": 18.23069190979004, "learning_rate": 2.1654155123278424e-06, "loss": 0.0558, "step": 581175 }, { "epoch": 5.71, "grad_norm": 3.3577704429626465, "learning_rate": 2.165291389873594e-06, "loss": 0.096, "step": 581200 }, { "epoch": 5.71, "grad_norm": 5.3263654708862305, "learning_rate": 2.1651672674193457e-06, "loss": 0.0634, "step": 581225 }, { "epoch": 5.71, "grad_norm": 5.425230026245117, "learning_rate": 2.165043144965097e-06, "loss": 0.0804, "step": 581250 }, { "epoch": 5.72, "grad_norm": 10.326116561889648, "learning_rate": 2.1649190225108485e-06, "loss": 0.0485, "step": 581275 }, { "epoch": 5.72, "grad_norm": 7.468583583831787, "learning_rate": 2.1647949000566e-06, "loss": 0.0716, "step": 581300 }, { "epoch": 5.72, "grad_norm": 14.683606147766113, "learning_rate": 2.1646707776023514e-06, "loss": 0.0672, "step": 581325 }, { "epoch": 5.72, "grad_norm": 1.491715431213379, "learning_rate": 2.164546655148103e-06, "loss": 0.0651, "step": 581350 }, { "epoch": 5.72, "grad_norm": 26.113906860351562, "learning_rate": 2.1644225326938546e-06, "loss": 0.0651, "step": 581375 }, { "epoch": 5.72, "grad_norm": 6.637989044189453, "learning_rate": 2.1642984102396063e-06, "loss": 0.0947, "step": 581400 }, { "epoch": 5.72, "grad_norm": 1.266459345817566, "learning_rate": 2.164174287785358e-06, "loss": 0.0685, "step": 581425 }, { "epoch": 5.72, "grad_norm": 0.012734698131680489, "learning_rate": 2.164050165331109e-06, "loss": 0.0634, "step": 581450 }, { "epoch": 5.72, "grad_norm": 9.42680835723877, "learning_rate": 2.1639260428768608e-06, "loss": 0.0458, "step": 581475 }, { "epoch": 5.72, "grad_norm": 0.02419901080429554, "learning_rate": 2.1638019204226124e-06, "loss": 0.126, "step": 581500 }, { "epoch": 5.72, "grad_norm": 5.806612968444824, "learning_rate": 2.1636777979683636e-06, "loss": 0.0399, "step": 581525 }, { "epoch": 5.72, "grad_norm": 0.5285012125968933, "learning_rate": 2.1635536755141152e-06, "loss": 0.0792, "step": 581550 }, { "epoch": 5.72, "grad_norm": 18.56608772277832, "learning_rate": 2.163429553059867e-06, "loss": 0.0399, "step": 581575 }, { "epoch": 5.72, "grad_norm": 1.9476560354232788, "learning_rate": 2.1633054306056185e-06, "loss": 0.0881, "step": 581600 }, { "epoch": 5.72, "grad_norm": 3.9391424655914307, "learning_rate": 2.16318130815137e-06, "loss": 0.0607, "step": 581625 }, { "epoch": 5.72, "grad_norm": 0.38196444511413574, "learning_rate": 2.1630571856971218e-06, "loss": 0.0659, "step": 581650 }, { "epoch": 5.72, "grad_norm": 3.582530975341797, "learning_rate": 2.162933063242873e-06, "loss": 0.0586, "step": 581675 }, { "epoch": 5.72, "grad_norm": 4.879631042480469, "learning_rate": 2.1628089407886246e-06, "loss": 0.0688, "step": 581700 }, { "epoch": 5.72, "grad_norm": 6.3822102546691895, "learning_rate": 2.1626848183343763e-06, "loss": 0.0585, "step": 581725 }, { "epoch": 5.72, "grad_norm": 0.06557626277208328, "learning_rate": 2.1625606958801275e-06, "loss": 0.0881, "step": 581750 }, { "epoch": 5.72, "grad_norm": 8.084259986877441, "learning_rate": 2.162436573425879e-06, "loss": 0.0459, "step": 581775 }, { "epoch": 5.72, "grad_norm": 0.32051876187324524, "learning_rate": 2.1623124509716307e-06, "loss": 0.0808, "step": 581800 }, { "epoch": 5.72, "grad_norm": 6.109321117401123, "learning_rate": 2.1621883285173824e-06, "loss": 0.0404, "step": 581825 }, { "epoch": 5.72, "grad_norm": 0.10067916661500931, "learning_rate": 2.162064206063134e-06, "loss": 0.0939, "step": 581850 }, { "epoch": 5.72, "grad_norm": 13.608774185180664, "learning_rate": 2.1619400836088852e-06, "loss": 0.0691, "step": 581875 }, { "epoch": 5.72, "grad_norm": 10.046521186828613, "learning_rate": 2.161815961154637e-06, "loss": 0.0903, "step": 581900 }, { "epoch": 5.72, "grad_norm": 7.5417633056640625, "learning_rate": 2.1616918387003885e-06, "loss": 0.0434, "step": 581925 }, { "epoch": 5.72, "grad_norm": 9.349979400634766, "learning_rate": 2.1615677162461397e-06, "loss": 0.0763, "step": 581950 }, { "epoch": 5.72, "grad_norm": 6.119059085845947, "learning_rate": 2.1614435937918913e-06, "loss": 0.0354, "step": 581975 }, { "epoch": 5.72, "grad_norm": 4.3533501625061035, "learning_rate": 2.161319471337643e-06, "loss": 0.0833, "step": 582000 }, { "epoch": 5.72, "grad_norm": 14.667672157287598, "learning_rate": 2.1611953488833946e-06, "loss": 0.0558, "step": 582025 }, { "epoch": 5.72, "grad_norm": 5.914393901824951, "learning_rate": 2.1610712264291462e-06, "loss": 0.1027, "step": 582050 }, { "epoch": 5.72, "grad_norm": 10.937938690185547, "learning_rate": 2.160947103974898e-06, "loss": 0.0456, "step": 582075 }, { "epoch": 5.72, "grad_norm": 0.0984557643532753, "learning_rate": 2.160822981520649e-06, "loss": 0.1021, "step": 582100 }, { "epoch": 5.72, "grad_norm": 0.260824054479599, "learning_rate": 2.1606988590664007e-06, "loss": 0.0299, "step": 582125 }, { "epoch": 5.72, "grad_norm": 0.6754244565963745, "learning_rate": 2.1605747366121524e-06, "loss": 0.1071, "step": 582150 }, { "epoch": 5.72, "grad_norm": 8.01246452331543, "learning_rate": 2.1604506141579036e-06, "loss": 0.0499, "step": 582175 }, { "epoch": 5.72, "grad_norm": 5.431916236877441, "learning_rate": 2.160326491703655e-06, "loss": 0.0843, "step": 582200 }, { "epoch": 5.72, "grad_norm": 11.135793685913086, "learning_rate": 2.1602073341475767e-06, "loss": 0.0628, "step": 582225 }, { "epoch": 5.72, "grad_norm": 1.615168809890747, "learning_rate": 2.1600832116933283e-06, "loss": 0.0839, "step": 582250 }, { "epoch": 5.73, "grad_norm": 7.883699893951416, "learning_rate": 2.15995908923908e-06, "loss": 0.048, "step": 582275 }, { "epoch": 5.73, "grad_norm": 0.20144149661064148, "learning_rate": 2.1598349667848316e-06, "loss": 0.0891, "step": 582300 }, { "epoch": 5.73, "grad_norm": 13.64217758178711, "learning_rate": 2.159710844330583e-06, "loss": 0.0535, "step": 582325 }, { "epoch": 5.73, "grad_norm": 10.022445678710938, "learning_rate": 2.1595867218763344e-06, "loss": 0.0655, "step": 582350 }, { "epoch": 5.73, "grad_norm": 11.907417297363281, "learning_rate": 2.159462599422086e-06, "loss": 0.0666, "step": 582375 }, { "epoch": 5.73, "grad_norm": 0.2889660596847534, "learning_rate": 2.1593384769678373e-06, "loss": 0.0931, "step": 582400 }, { "epoch": 5.73, "grad_norm": 11.415055274963379, "learning_rate": 2.159214354513589e-06, "loss": 0.0487, "step": 582425 }, { "epoch": 5.73, "grad_norm": 0.5848806500434875, "learning_rate": 2.1590902320593405e-06, "loss": 0.0839, "step": 582450 }, { "epoch": 5.73, "grad_norm": 14.79243278503418, "learning_rate": 2.158966109605092e-06, "loss": 0.0653, "step": 582475 }, { "epoch": 5.73, "grad_norm": 0.0953659638762474, "learning_rate": 2.158841987150844e-06, "loss": 0.0917, "step": 582500 }, { "epoch": 5.73, "grad_norm": 23.615398406982422, "learning_rate": 2.1587178646965954e-06, "loss": 0.0523, "step": 582525 }, { "epoch": 5.73, "grad_norm": 10.509793281555176, "learning_rate": 2.1585937422423466e-06, "loss": 0.0788, "step": 582550 }, { "epoch": 5.73, "grad_norm": 4.271074295043945, "learning_rate": 2.1584696197880983e-06, "loss": 0.0414, "step": 582575 }, { "epoch": 5.73, "grad_norm": 0.010392465628683567, "learning_rate": 2.15834549733385e-06, "loss": 0.0863, "step": 582600 }, { "epoch": 5.73, "grad_norm": 7.667080402374268, "learning_rate": 2.158221374879601e-06, "loss": 0.0367, "step": 582625 }, { "epoch": 5.73, "grad_norm": 0.008043593727052212, "learning_rate": 2.1580972524253528e-06, "loss": 0.0938, "step": 582650 }, { "epoch": 5.73, "grad_norm": 3.1262621879577637, "learning_rate": 2.1579731299711044e-06, "loss": 0.0394, "step": 582675 }, { "epoch": 5.73, "grad_norm": 7.716240882873535, "learning_rate": 2.157849007516856e-06, "loss": 0.108, "step": 582700 }, { "epoch": 5.73, "grad_norm": 5.418607234954834, "learning_rate": 2.1577248850626077e-06, "loss": 0.0523, "step": 582725 }, { "epoch": 5.73, "grad_norm": 21.108034133911133, "learning_rate": 2.1576007626083593e-06, "loss": 0.0894, "step": 582750 }, { "epoch": 5.73, "grad_norm": 18.058250427246094, "learning_rate": 2.1574766401541105e-06, "loss": 0.0672, "step": 582775 }, { "epoch": 5.73, "grad_norm": 0.5489497184753418, "learning_rate": 2.157352517699862e-06, "loss": 0.0742, "step": 582800 }, { "epoch": 5.73, "grad_norm": 13.601542472839355, "learning_rate": 2.1572283952456134e-06, "loss": 0.0656, "step": 582825 }, { "epoch": 5.73, "grad_norm": 1.2486504316329956, "learning_rate": 2.157104272791365e-06, "loss": 0.0855, "step": 582850 }, { "epoch": 5.73, "grad_norm": 9.921835899353027, "learning_rate": 2.1569801503371166e-06, "loss": 0.0702, "step": 582875 }, { "epoch": 5.73, "grad_norm": 7.2388691902160645, "learning_rate": 2.1568560278828683e-06, "loss": 0.1186, "step": 582900 }, { "epoch": 5.73, "grad_norm": 12.180460929870605, "learning_rate": 2.15673190542862e-06, "loss": 0.0623, "step": 582925 }, { "epoch": 5.73, "grad_norm": 0.12424791604280472, "learning_rate": 2.1566077829743715e-06, "loss": 0.0889, "step": 582950 }, { "epoch": 5.73, "grad_norm": 15.023056030273438, "learning_rate": 2.1564836605201227e-06, "loss": 0.0926, "step": 582975 }, { "epoch": 5.73, "grad_norm": 3.097083806991577, "learning_rate": 2.1563595380658744e-06, "loss": 0.0871, "step": 583000 }, { "epoch": 5.73, "grad_norm": 9.258389472961426, "learning_rate": 2.156235415611626e-06, "loss": 0.0578, "step": 583025 }, { "epoch": 5.73, "grad_norm": 3.4445159435272217, "learning_rate": 2.1561112931573776e-06, "loss": 0.0896, "step": 583050 }, { "epoch": 5.73, "grad_norm": 7.81548547744751, "learning_rate": 2.1559871707031293e-06, "loss": 0.0518, "step": 583075 }, { "epoch": 5.73, "grad_norm": 0.969290018081665, "learning_rate": 2.155863048248881e-06, "loss": 0.0997, "step": 583100 }, { "epoch": 5.73, "grad_norm": 11.85377311706543, "learning_rate": 2.155738925794632e-06, "loss": 0.0489, "step": 583125 }, { "epoch": 5.73, "grad_norm": 0.17982862889766693, "learning_rate": 2.1556148033403838e-06, "loss": 0.07, "step": 583150 }, { "epoch": 5.73, "grad_norm": 12.074051856994629, "learning_rate": 2.1554906808861354e-06, "loss": 0.0407, "step": 583175 }, { "epoch": 5.73, "grad_norm": 3.577770233154297, "learning_rate": 2.1553665584318866e-06, "loss": 0.0911, "step": 583200 }, { "epoch": 5.73, "grad_norm": 18.72088623046875, "learning_rate": 2.1552424359776382e-06, "loss": 0.0614, "step": 583225 }, { "epoch": 5.73, "grad_norm": 0.009103269316256046, "learning_rate": 2.15511831352339e-06, "loss": 0.1018, "step": 583250 }, { "epoch": 5.73, "grad_norm": 15.348394393920898, "learning_rate": 2.1549941910691415e-06, "loss": 0.0352, "step": 583275 }, { "epoch": 5.74, "grad_norm": 1.0616041421890259, "learning_rate": 2.154870068614893e-06, "loss": 0.0717, "step": 583300 }, { "epoch": 5.74, "grad_norm": 5.1224446296691895, "learning_rate": 2.1547459461606444e-06, "loss": 0.0509, "step": 583325 }, { "epoch": 5.74, "grad_norm": 5.252969264984131, "learning_rate": 2.154621823706396e-06, "loss": 0.0902, "step": 583350 }, { "epoch": 5.74, "grad_norm": 10.418448448181152, "learning_rate": 2.1544977012521476e-06, "loss": 0.057, "step": 583375 }, { "epoch": 5.74, "grad_norm": 0.9132574200630188, "learning_rate": 2.154373578797899e-06, "loss": 0.064, "step": 583400 }, { "epoch": 5.74, "grad_norm": 6.7244768142700195, "learning_rate": 2.1542494563436505e-06, "loss": 0.05, "step": 583425 }, { "epoch": 5.74, "grad_norm": 6.210404872894287, "learning_rate": 2.154125333889402e-06, "loss": 0.0972, "step": 583450 }, { "epoch": 5.74, "grad_norm": 15.871882438659668, "learning_rate": 2.1540012114351537e-06, "loss": 0.0739, "step": 583475 }, { "epoch": 5.74, "grad_norm": 1.8655298948287964, "learning_rate": 2.1538770889809054e-06, "loss": 0.1075, "step": 583500 }, { "epoch": 5.74, "grad_norm": 14.600784301757812, "learning_rate": 2.153752966526657e-06, "loss": 0.0554, "step": 583525 }, { "epoch": 5.74, "grad_norm": 0.052044931799173355, "learning_rate": 2.1536288440724082e-06, "loss": 0.0818, "step": 583550 }, { "epoch": 5.74, "grad_norm": 7.715011119842529, "learning_rate": 2.15350472161816e-06, "loss": 0.058, "step": 583575 }, { "epoch": 5.74, "grad_norm": 0.02460184134542942, "learning_rate": 2.1533805991639115e-06, "loss": 0.0636, "step": 583600 }, { "epoch": 5.74, "grad_norm": 13.129436492919922, "learning_rate": 2.1532564767096627e-06, "loss": 0.0643, "step": 583625 }, { "epoch": 5.74, "grad_norm": 1.2301840782165527, "learning_rate": 2.1531323542554143e-06, "loss": 0.0884, "step": 583650 }, { "epoch": 5.74, "grad_norm": 4.552608489990234, "learning_rate": 2.153008231801166e-06, "loss": 0.0429, "step": 583675 }, { "epoch": 5.74, "grad_norm": 0.3650382161140442, "learning_rate": 2.1528841093469176e-06, "loss": 0.0629, "step": 583700 }, { "epoch": 5.74, "grad_norm": 9.788771629333496, "learning_rate": 2.1527599868926692e-06, "loss": 0.0425, "step": 583725 }, { "epoch": 5.74, "grad_norm": 0.5186398029327393, "learning_rate": 2.1526358644384205e-06, "loss": 0.0624, "step": 583750 }, { "epoch": 5.74, "grad_norm": 5.123228549957275, "learning_rate": 2.152511741984172e-06, "loss": 0.0395, "step": 583775 }, { "epoch": 5.74, "grad_norm": 6.3095622062683105, "learning_rate": 2.1523876195299237e-06, "loss": 0.0675, "step": 583800 }, { "epoch": 5.74, "grad_norm": 7.173234462738037, "learning_rate": 2.152263497075675e-06, "loss": 0.0408, "step": 583825 }, { "epoch": 5.74, "grad_norm": 6.190356731414795, "learning_rate": 2.1521393746214266e-06, "loss": 0.087, "step": 583850 }, { "epoch": 5.74, "grad_norm": 16.969816207885742, "learning_rate": 2.152015252167178e-06, "loss": 0.0538, "step": 583875 }, { "epoch": 5.74, "grad_norm": 8.524860382080078, "learning_rate": 2.15189112971293e-06, "loss": 0.0681, "step": 583900 }, { "epoch": 5.74, "grad_norm": 17.790977478027344, "learning_rate": 2.1517670072586815e-06, "loss": 0.0715, "step": 583925 }, { "epoch": 5.74, "grad_norm": 0.4506610035896301, "learning_rate": 2.151642884804433e-06, "loss": 0.0627, "step": 583950 }, { "epoch": 5.74, "grad_norm": 15.296664237976074, "learning_rate": 2.1515187623501843e-06, "loss": 0.0519, "step": 583975 }, { "epoch": 5.74, "grad_norm": 0.02678721398115158, "learning_rate": 2.151394639895936e-06, "loss": 0.0792, "step": 584000 }, { "epoch": 5.74, "grad_norm": 9.175406455993652, "learning_rate": 2.1512705174416876e-06, "loss": 0.0349, "step": 584025 }, { "epoch": 5.74, "grad_norm": 2.5526187419891357, "learning_rate": 2.151146394987439e-06, "loss": 0.0748, "step": 584050 }, { "epoch": 5.74, "grad_norm": 10.856969833374023, "learning_rate": 2.1510222725331904e-06, "loss": 0.0591, "step": 584075 }, { "epoch": 5.74, "grad_norm": 0.5269800424575806, "learning_rate": 2.150898150078942e-06, "loss": 0.0982, "step": 584100 }, { "epoch": 5.74, "grad_norm": 11.790452003479004, "learning_rate": 2.1507740276246937e-06, "loss": 0.046, "step": 584125 }, { "epoch": 5.74, "grad_norm": 0.009806890040636063, "learning_rate": 2.1506499051704453e-06, "loss": 0.1015, "step": 584150 }, { "epoch": 5.74, "grad_norm": 0.8316639065742493, "learning_rate": 2.1505257827161966e-06, "loss": 0.0407, "step": 584175 }, { "epoch": 5.74, "grad_norm": 1.1521542072296143, "learning_rate": 2.150401660261948e-06, "loss": 0.0687, "step": 584200 }, { "epoch": 5.74, "grad_norm": 11.014281272888184, "learning_rate": 2.1502775378077e-06, "loss": 0.0449, "step": 584225 }, { "epoch": 5.74, "grad_norm": 0.19364862143993378, "learning_rate": 2.1501583802516213e-06, "loss": 0.0635, "step": 584250 }, { "epoch": 5.74, "grad_norm": 9.652429580688477, "learning_rate": 2.1500342577973725e-06, "loss": 0.052, "step": 584275 }, { "epoch": 5.74, "grad_norm": 4.934818267822266, "learning_rate": 2.149910135343124e-06, "loss": 0.0858, "step": 584300 }, { "epoch": 5.75, "grad_norm": 9.257209777832031, "learning_rate": 2.1497860128888758e-06, "loss": 0.0419, "step": 584325 }, { "epoch": 5.75, "grad_norm": 2.909092903137207, "learning_rate": 2.1496618904346274e-06, "loss": 0.114, "step": 584350 }, { "epoch": 5.75, "grad_norm": 16.90567398071289, "learning_rate": 2.149537767980379e-06, "loss": 0.062, "step": 584375 }, { "epoch": 5.75, "grad_norm": 3.733006238937378, "learning_rate": 2.1494136455261307e-06, "loss": 0.103, "step": 584400 }, { "epoch": 5.75, "grad_norm": 6.299493312835693, "learning_rate": 2.149289523071882e-06, "loss": 0.0467, "step": 584425 }, { "epoch": 5.75, "grad_norm": 2.8799431324005127, "learning_rate": 2.1491654006176335e-06, "loss": 0.0982, "step": 584450 }, { "epoch": 5.75, "grad_norm": 9.62285041809082, "learning_rate": 2.149041278163385e-06, "loss": 0.0481, "step": 584475 }, { "epoch": 5.75, "grad_norm": 1.8404432535171509, "learning_rate": 2.1489171557091364e-06, "loss": 0.0919, "step": 584500 }, { "epoch": 5.75, "grad_norm": 20.34187126159668, "learning_rate": 2.148793033254888e-06, "loss": 0.0587, "step": 584525 }, { "epoch": 5.75, "grad_norm": 5.932919502258301, "learning_rate": 2.1486689108006396e-06, "loss": 0.0849, "step": 584550 }, { "epoch": 5.75, "grad_norm": 5.012416839599609, "learning_rate": 2.1485447883463913e-06, "loss": 0.0419, "step": 584575 }, { "epoch": 5.75, "grad_norm": 0.22489839792251587, "learning_rate": 2.148420665892143e-06, "loss": 0.0572, "step": 584600 }, { "epoch": 5.75, "grad_norm": 7.871328353881836, "learning_rate": 2.1482965434378945e-06, "loss": 0.0501, "step": 584625 }, { "epoch": 5.75, "grad_norm": 5.168237209320068, "learning_rate": 2.1481724209836457e-06, "loss": 0.0978, "step": 584650 }, { "epoch": 5.75, "grad_norm": 7.420399188995361, "learning_rate": 2.1480482985293974e-06, "loss": 0.0496, "step": 584675 }, { "epoch": 5.75, "grad_norm": 0.5193085670471191, "learning_rate": 2.1479241760751486e-06, "loss": 0.0777, "step": 584700 }, { "epoch": 5.75, "grad_norm": 14.078271865844727, "learning_rate": 2.1478000536209002e-06, "loss": 0.0442, "step": 584725 }, { "epoch": 5.75, "grad_norm": 9.237326622009277, "learning_rate": 2.147675931166652e-06, "loss": 0.0847, "step": 584750 }, { "epoch": 5.75, "grad_norm": 6.8516364097595215, "learning_rate": 2.1475518087124035e-06, "loss": 0.0805, "step": 584775 }, { "epoch": 5.75, "grad_norm": 2.1894190311431885, "learning_rate": 2.147427686258155e-06, "loss": 0.073, "step": 584800 }, { "epoch": 5.75, "grad_norm": 2.579209804534912, "learning_rate": 2.1473035638039068e-06, "loss": 0.0517, "step": 584825 }, { "epoch": 5.75, "grad_norm": 0.1104799211025238, "learning_rate": 2.147179441349658e-06, "loss": 0.0725, "step": 584850 }, { "epoch": 5.75, "grad_norm": 8.15982723236084, "learning_rate": 2.1470553188954096e-06, "loss": 0.044, "step": 584875 }, { "epoch": 5.75, "grad_norm": 1.3533647060394287, "learning_rate": 2.1469311964411612e-06, "loss": 0.0632, "step": 584900 }, { "epoch": 5.75, "grad_norm": 14.36706829071045, "learning_rate": 2.1468070739869125e-06, "loss": 0.0434, "step": 584925 }, { "epoch": 5.75, "grad_norm": 4.1585612297058105, "learning_rate": 2.146682951532664e-06, "loss": 0.1352, "step": 584950 }, { "epoch": 5.75, "grad_norm": 8.347411155700684, "learning_rate": 2.1465588290784157e-06, "loss": 0.0444, "step": 584975 }, { "epoch": 5.75, "grad_norm": 4.925941467285156, "learning_rate": 2.1464347066241674e-06, "loss": 0.107, "step": 585000 }, { "epoch": 5.75, "grad_norm": 16.24008560180664, "learning_rate": 2.146310584169919e-06, "loss": 0.0692, "step": 585025 }, { "epoch": 5.75, "grad_norm": 1.2088202238082886, "learning_rate": 2.1461864617156706e-06, "loss": 0.0737, "step": 585050 }, { "epoch": 5.75, "grad_norm": 7.768581867218018, "learning_rate": 2.146062339261422e-06, "loss": 0.0474, "step": 585075 }, { "epoch": 5.75, "grad_norm": 0.7390689253807068, "learning_rate": 2.1459382168071735e-06, "loss": 0.09, "step": 585100 }, { "epoch": 5.75, "grad_norm": 6.0011749267578125, "learning_rate": 2.1458140943529247e-06, "loss": 0.0439, "step": 585125 }, { "epoch": 5.75, "grad_norm": 0.7627333998680115, "learning_rate": 2.1456899718986763e-06, "loss": 0.0759, "step": 585150 }, { "epoch": 5.75, "grad_norm": 16.044288635253906, "learning_rate": 2.145565849444428e-06, "loss": 0.0642, "step": 585175 }, { "epoch": 5.75, "grad_norm": 4.0628156661987305, "learning_rate": 2.1454417269901796e-06, "loss": 0.0763, "step": 585200 }, { "epoch": 5.75, "grad_norm": 9.287616729736328, "learning_rate": 2.1453176045359312e-06, "loss": 0.0559, "step": 585225 }, { "epoch": 5.75, "grad_norm": 0.04215330258011818, "learning_rate": 2.145193482081683e-06, "loss": 0.0791, "step": 585250 }, { "epoch": 5.75, "grad_norm": 16.014318466186523, "learning_rate": 2.145069359627434e-06, "loss": 0.0598, "step": 585275 }, { "epoch": 5.75, "grad_norm": 5.5958733558654785, "learning_rate": 2.1449452371731857e-06, "loss": 0.0854, "step": 585300 }, { "epoch": 5.76, "grad_norm": 16.045429229736328, "learning_rate": 2.1448211147189373e-06, "loss": 0.0528, "step": 585325 }, { "epoch": 5.76, "grad_norm": 6.158514499664307, "learning_rate": 2.1446969922646886e-06, "loss": 0.065, "step": 585350 }, { "epoch": 5.76, "grad_norm": 10.148375511169434, "learning_rate": 2.14457286981044e-06, "loss": 0.0512, "step": 585375 }, { "epoch": 5.76, "grad_norm": 6.109524250030518, "learning_rate": 2.144448747356192e-06, "loss": 0.0955, "step": 585400 }, { "epoch": 5.76, "grad_norm": 13.412144660949707, "learning_rate": 2.1443246249019435e-06, "loss": 0.0503, "step": 585425 }, { "epoch": 5.76, "grad_norm": 0.3926852345466614, "learning_rate": 2.144200502447695e-06, "loss": 0.0988, "step": 585450 }, { "epoch": 5.76, "grad_norm": 16.37283706665039, "learning_rate": 2.1440763799934467e-06, "loss": 0.0419, "step": 585475 }, { "epoch": 5.76, "grad_norm": 2.1559157371520996, "learning_rate": 2.143952257539198e-06, "loss": 0.0809, "step": 585500 }, { "epoch": 5.76, "grad_norm": 17.919452667236328, "learning_rate": 2.1438281350849496e-06, "loss": 0.0432, "step": 585525 }, { "epoch": 5.76, "grad_norm": 2.4329161643981934, "learning_rate": 2.143704012630701e-06, "loss": 0.0837, "step": 585550 }, { "epoch": 5.76, "grad_norm": 10.032885551452637, "learning_rate": 2.1435798901764524e-06, "loss": 0.0323, "step": 585575 }, { "epoch": 5.76, "grad_norm": 3.000027656555176, "learning_rate": 2.143455767722204e-06, "loss": 0.0941, "step": 585600 }, { "epoch": 5.76, "grad_norm": 9.825753211975098, "learning_rate": 2.1433316452679557e-06, "loss": 0.0267, "step": 585625 }, { "epoch": 5.76, "grad_norm": 0.479706734418869, "learning_rate": 2.1432075228137073e-06, "loss": 0.0919, "step": 585650 }, { "epoch": 5.76, "grad_norm": 5.1814188957214355, "learning_rate": 2.143083400359459e-06, "loss": 0.0335, "step": 585675 }, { "epoch": 5.76, "grad_norm": 13.146994590759277, "learning_rate": 2.14295927790521e-06, "loss": 0.0754, "step": 585700 }, { "epoch": 5.76, "grad_norm": 10.085845947265625, "learning_rate": 2.142835155450962e-06, "loss": 0.0671, "step": 585725 }, { "epoch": 5.76, "grad_norm": 3.206587076187134, "learning_rate": 2.1427110329967134e-06, "loss": 0.0968, "step": 585750 }, { "epoch": 5.76, "grad_norm": 14.499414443969727, "learning_rate": 2.1425869105424647e-06, "loss": 0.0519, "step": 585775 }, { "epoch": 5.76, "grad_norm": 1.2787728309631348, "learning_rate": 2.1424627880882163e-06, "loss": 0.0843, "step": 585800 }, { "epoch": 5.76, "grad_norm": 9.532920837402344, "learning_rate": 2.142338665633968e-06, "loss": 0.0516, "step": 585825 }, { "epoch": 5.76, "grad_norm": 0.05345112085342407, "learning_rate": 2.1422145431797196e-06, "loss": 0.1008, "step": 585850 }, { "epoch": 5.76, "grad_norm": 16.473257064819336, "learning_rate": 2.142090420725471e-06, "loss": 0.0477, "step": 585875 }, { "epoch": 5.76, "grad_norm": 2.3137149810791016, "learning_rate": 2.141966298271223e-06, "loss": 0.0705, "step": 585900 }, { "epoch": 5.76, "grad_norm": 14.42061710357666, "learning_rate": 2.141842175816974e-06, "loss": 0.0483, "step": 585925 }, { "epoch": 5.76, "grad_norm": 3.4319007396698, "learning_rate": 2.1417180533627257e-06, "loss": 0.0616, "step": 585950 }, { "epoch": 5.76, "grad_norm": 13.733683586120605, "learning_rate": 2.1415939309084773e-06, "loss": 0.046, "step": 585975 }, { "epoch": 5.76, "grad_norm": 7.203212738037109, "learning_rate": 2.141469808454229e-06, "loss": 0.077, "step": 586000 }, { "epoch": 5.76, "grad_norm": 7.045639514923096, "learning_rate": 2.1413456859999806e-06, "loss": 0.0521, "step": 586025 }, { "epoch": 5.76, "grad_norm": 3.123659133911133, "learning_rate": 2.141221563545732e-06, "loss": 0.088, "step": 586050 }, { "epoch": 5.76, "grad_norm": 7.1733269691467285, "learning_rate": 2.1410974410914834e-06, "loss": 0.0551, "step": 586075 }, { "epoch": 5.76, "grad_norm": 1.0703692436218262, "learning_rate": 2.140973318637235e-06, "loss": 0.0888, "step": 586100 }, { "epoch": 5.76, "grad_norm": 9.12391471862793, "learning_rate": 2.1408491961829863e-06, "loss": 0.0413, "step": 586125 }, { "epoch": 5.76, "grad_norm": 4.616436958312988, "learning_rate": 2.140725073728738e-06, "loss": 0.1103, "step": 586150 }, { "epoch": 5.76, "grad_norm": 11.035895347595215, "learning_rate": 2.1406009512744895e-06, "loss": 0.0494, "step": 586175 }, { "epoch": 5.76, "grad_norm": 2.939425468444824, "learning_rate": 2.140476828820241e-06, "loss": 0.1252, "step": 586200 }, { "epoch": 5.76, "grad_norm": 17.9141902923584, "learning_rate": 2.140352706365993e-06, "loss": 0.0469, "step": 586225 }, { "epoch": 5.76, "grad_norm": 0.7134664058685303, "learning_rate": 2.1402285839117445e-06, "loss": 0.0657, "step": 586250 }, { "epoch": 5.76, "grad_norm": 12.15460205078125, "learning_rate": 2.1401044614574957e-06, "loss": 0.0488, "step": 586275 }, { "epoch": 5.76, "grad_norm": 2.9030370712280273, "learning_rate": 2.1399803390032473e-06, "loss": 0.0839, "step": 586300 }, { "epoch": 5.76, "grad_norm": 11.429780006408691, "learning_rate": 2.139856216548999e-06, "loss": 0.0573, "step": 586325 }, { "epoch": 5.77, "grad_norm": 0.045822981745004654, "learning_rate": 2.13973209409475e-06, "loss": 0.1025, "step": 586350 }, { "epoch": 5.77, "grad_norm": 4.120967388153076, "learning_rate": 2.1396079716405018e-06, "loss": 0.0565, "step": 586375 }, { "epoch": 5.77, "grad_norm": 4.5447587966918945, "learning_rate": 2.1394838491862534e-06, "loss": 0.0814, "step": 586400 }, { "epoch": 5.77, "grad_norm": 5.09234094619751, "learning_rate": 2.139359726732005e-06, "loss": 0.0365, "step": 586425 }, { "epoch": 5.77, "grad_norm": 0.8298963904380798, "learning_rate": 2.1392356042777567e-06, "loss": 0.0854, "step": 586450 }, { "epoch": 5.77, "grad_norm": 9.012807846069336, "learning_rate": 2.139111481823508e-06, "loss": 0.0539, "step": 586475 }, { "epoch": 5.77, "grad_norm": 3.458322286605835, "learning_rate": 2.1389873593692595e-06, "loss": 0.1179, "step": 586500 }, { "epoch": 5.77, "grad_norm": Infinity, "learning_rate": 2.138868201813181e-06, "loss": 0.0538, "step": 586525 }, { "epoch": 5.77, "grad_norm": 4.088949203491211, "learning_rate": 2.1387440793589326e-06, "loss": 0.0856, "step": 586550 }, { "epoch": 5.77, "grad_norm": 3.1319148540496826, "learning_rate": 2.138619956904684e-06, "loss": 0.0392, "step": 586575 }, { "epoch": 5.77, "grad_norm": 3.4970128536224365, "learning_rate": 2.1384958344504355e-06, "loss": 0.115, "step": 586600 }, { "epoch": 5.77, "grad_norm": 17.636075973510742, "learning_rate": 2.138371711996187e-06, "loss": 0.0536, "step": 586625 }, { "epoch": 5.77, "grad_norm": 5.5205278396606445, "learning_rate": 2.1382475895419387e-06, "loss": 0.0898, "step": 586650 }, { "epoch": 5.77, "grad_norm": 9.835390090942383, "learning_rate": 2.1381234670876904e-06, "loss": 0.0329, "step": 586675 }, { "epoch": 5.77, "grad_norm": 0.48252072930336, "learning_rate": 2.137999344633442e-06, "loss": 0.0629, "step": 586700 }, { "epoch": 5.77, "grad_norm": 14.461715698242188, "learning_rate": 2.1378752221791932e-06, "loss": 0.0778, "step": 586725 }, { "epoch": 5.77, "grad_norm": 5.0970892906188965, "learning_rate": 2.137751099724945e-06, "loss": 0.1103, "step": 586750 }, { "epoch": 5.77, "grad_norm": 6.019598484039307, "learning_rate": 2.1376269772706965e-06, "loss": 0.0484, "step": 586775 }, { "epoch": 5.77, "grad_norm": 1.9791611433029175, "learning_rate": 2.1375028548164477e-06, "loss": 0.057, "step": 586800 }, { "epoch": 5.77, "grad_norm": 12.598339080810547, "learning_rate": 2.1373787323621993e-06, "loss": 0.048, "step": 586825 }, { "epoch": 5.77, "grad_norm": 1.2300101518630981, "learning_rate": 2.137254609907951e-06, "loss": 0.0759, "step": 586850 }, { "epoch": 5.77, "grad_norm": 8.918500900268555, "learning_rate": 2.1371304874537026e-06, "loss": 0.0416, "step": 586875 }, { "epoch": 5.77, "grad_norm": 7.998741626739502, "learning_rate": 2.1370063649994542e-06, "loss": 0.0812, "step": 586900 }, { "epoch": 5.77, "grad_norm": 14.087079048156738, "learning_rate": 2.136882242545206e-06, "loss": 0.0692, "step": 586925 }, { "epoch": 5.77, "grad_norm": 0.020319543778896332, "learning_rate": 2.136758120090957e-06, "loss": 0.0803, "step": 586950 }, { "epoch": 5.77, "grad_norm": 15.528614044189453, "learning_rate": 2.1366339976367087e-06, "loss": 0.0757, "step": 586975 }, { "epoch": 5.77, "grad_norm": 1.5550051927566528, "learning_rate": 2.13650987518246e-06, "loss": 0.1026, "step": 587000 }, { "epoch": 5.77, "grad_norm": 13.752836227416992, "learning_rate": 2.1363857527282116e-06, "loss": 0.0518, "step": 587025 }, { "epoch": 5.77, "grad_norm": 0.11009781807661057, "learning_rate": 2.136261630273963e-06, "loss": 0.0868, "step": 587050 }, { "epoch": 5.77, "grad_norm": 3.0791871547698975, "learning_rate": 2.136137507819715e-06, "loss": 0.0569, "step": 587075 }, { "epoch": 5.77, "grad_norm": 26.050588607788086, "learning_rate": 2.1360133853654665e-06, "loss": 0.0902, "step": 587100 }, { "epoch": 5.77, "grad_norm": 13.967336654663086, "learning_rate": 2.135889262911218e-06, "loss": 0.0496, "step": 587125 }, { "epoch": 5.77, "grad_norm": 0.5583190321922302, "learning_rate": 2.1357651404569693e-06, "loss": 0.0713, "step": 587150 }, { "epoch": 5.77, "grad_norm": 6.9815802574157715, "learning_rate": 2.135641018002721e-06, "loss": 0.0849, "step": 587175 }, { "epoch": 5.77, "grad_norm": 0.905287504196167, "learning_rate": 2.1355168955484726e-06, "loss": 0.0791, "step": 587200 }, { "epoch": 5.77, "grad_norm": 8.626697540283203, "learning_rate": 2.135392773094224e-06, "loss": 0.0631, "step": 587225 }, { "epoch": 5.77, "grad_norm": 0.5273309350013733, "learning_rate": 2.1352686506399754e-06, "loss": 0.0835, "step": 587250 }, { "epoch": 5.77, "grad_norm": 8.801170349121094, "learning_rate": 2.135144528185727e-06, "loss": 0.0483, "step": 587275 }, { "epoch": 5.77, "grad_norm": 4.842280864715576, "learning_rate": 2.1350204057314787e-06, "loss": 0.0879, "step": 587300 }, { "epoch": 5.77, "grad_norm": 0.7600395083427429, "learning_rate": 2.1348962832772303e-06, "loss": 0.0614, "step": 587325 }, { "epoch": 5.77, "grad_norm": 5.839138031005859, "learning_rate": 2.134772160822982e-06, "loss": 0.0952, "step": 587350 }, { "epoch": 5.78, "grad_norm": 17.129486083984375, "learning_rate": 2.134648038368733e-06, "loss": 0.0522, "step": 587375 }, { "epoch": 5.78, "grad_norm": 10.055377006530762, "learning_rate": 2.134523915914485e-06, "loss": 0.0741, "step": 587400 }, { "epoch": 5.78, "grad_norm": 15.663817405700684, "learning_rate": 2.134399793460236e-06, "loss": 0.0535, "step": 587425 }, { "epoch": 5.78, "grad_norm": 0.34033718705177307, "learning_rate": 2.1342756710059877e-06, "loss": 0.0745, "step": 587450 }, { "epoch": 5.78, "grad_norm": 7.886284828186035, "learning_rate": 2.1341515485517393e-06, "loss": 0.0506, "step": 587475 }, { "epoch": 5.78, "grad_norm": 4.538841247558594, "learning_rate": 2.134027426097491e-06, "loss": 0.0782, "step": 587500 }, { "epoch": 5.78, "grad_norm": 9.887869834899902, "learning_rate": 2.1339033036432426e-06, "loss": 0.0483, "step": 587525 }, { "epoch": 5.78, "grad_norm": 9.923606872558594, "learning_rate": 2.133779181188994e-06, "loss": 0.0957, "step": 587550 }, { "epoch": 5.78, "grad_norm": 6.73823881149292, "learning_rate": 2.1336550587347454e-06, "loss": 0.0671, "step": 587575 }, { "epoch": 5.78, "grad_norm": 0.8960583806037903, "learning_rate": 2.133530936280497e-06, "loss": 0.0657, "step": 587600 }, { "epoch": 5.78, "grad_norm": 7.205126762390137, "learning_rate": 2.1334068138262487e-06, "loss": 0.0399, "step": 587625 }, { "epoch": 5.78, "grad_norm": 0.02464190684258938, "learning_rate": 2.133282691372e-06, "loss": 0.0558, "step": 587650 }, { "epoch": 5.78, "grad_norm": 8.598298072814941, "learning_rate": 2.1331585689177515e-06, "loss": 0.0376, "step": 587675 }, { "epoch": 5.78, "grad_norm": 4.052762508392334, "learning_rate": 2.133034446463503e-06, "loss": 0.0777, "step": 587700 }, { "epoch": 5.78, "grad_norm": 10.298179626464844, "learning_rate": 2.132910324009255e-06, "loss": 0.0456, "step": 587725 }, { "epoch": 5.78, "grad_norm": 2.450324058532715, "learning_rate": 2.1327862015550064e-06, "loss": 0.0659, "step": 587750 }, { "epoch": 5.78, "grad_norm": 6.10460901260376, "learning_rate": 2.132662079100758e-06, "loss": 0.0473, "step": 587775 }, { "epoch": 5.78, "grad_norm": 0.618431568145752, "learning_rate": 2.1325379566465093e-06, "loss": 0.0793, "step": 587800 }, { "epoch": 5.78, "grad_norm": 9.903189659118652, "learning_rate": 2.132413834192261e-06, "loss": 0.0573, "step": 587825 }, { "epoch": 5.78, "grad_norm": 0.015619436278939247, "learning_rate": 2.132289711738012e-06, "loss": 0.0563, "step": 587850 }, { "epoch": 5.78, "grad_norm": 8.6377534866333, "learning_rate": 2.1321655892837638e-06, "loss": 0.0602, "step": 587875 }, { "epoch": 5.78, "grad_norm": 2.3424789905548096, "learning_rate": 2.1320414668295154e-06, "loss": 0.0896, "step": 587900 }, { "epoch": 5.78, "grad_norm": 11.426417350769043, "learning_rate": 2.131917344375267e-06, "loss": 0.0554, "step": 587925 }, { "epoch": 5.78, "grad_norm": 0.08481302112340927, "learning_rate": 2.1317932219210187e-06, "loss": 0.0757, "step": 587950 }, { "epoch": 5.78, "grad_norm": 5.510794162750244, "learning_rate": 2.1316690994667703e-06, "loss": 0.0446, "step": 587975 }, { "epoch": 5.78, "grad_norm": 0.15578603744506836, "learning_rate": 2.1315449770125215e-06, "loss": 0.0791, "step": 588000 }, { "epoch": 5.78, "grad_norm": 10.02461051940918, "learning_rate": 2.131420854558273e-06, "loss": 0.0371, "step": 588025 }, { "epoch": 5.78, "grad_norm": 1.3600138425827026, "learning_rate": 2.131296732104025e-06, "loss": 0.0951, "step": 588050 }, { "epoch": 5.78, "grad_norm": 16.195743560791016, "learning_rate": 2.131172609649776e-06, "loss": 0.058, "step": 588075 }, { "epoch": 5.78, "grad_norm": 0.33391809463500977, "learning_rate": 2.1310484871955276e-06, "loss": 0.0823, "step": 588100 }, { "epoch": 5.78, "grad_norm": 10.805437088012695, "learning_rate": 2.1309243647412793e-06, "loss": 0.0317, "step": 588125 }, { "epoch": 5.78, "grad_norm": 0.21114131808280945, "learning_rate": 2.130800242287031e-06, "loss": 0.0801, "step": 588150 }, { "epoch": 5.78, "grad_norm": 6.163401126861572, "learning_rate": 2.1306761198327825e-06, "loss": 0.0485, "step": 588175 }, { "epoch": 5.78, "grad_norm": 0.12475931644439697, "learning_rate": 2.130551997378534e-06, "loss": 0.1161, "step": 588200 }, { "epoch": 5.78, "grad_norm": 15.22647762298584, "learning_rate": 2.1304278749242854e-06, "loss": 0.0628, "step": 588225 }, { "epoch": 5.78, "grad_norm": 2.951978921890259, "learning_rate": 2.130303752470037e-06, "loss": 0.0688, "step": 588250 }, { "epoch": 5.78, "grad_norm": 12.548104286193848, "learning_rate": 2.1301796300157882e-06, "loss": 0.0455, "step": 588275 }, { "epoch": 5.78, "grad_norm": 0.15024811029434204, "learning_rate": 2.13005550756154e-06, "loss": 0.0557, "step": 588300 }, { "epoch": 5.78, "grad_norm": 11.08795166015625, "learning_rate": 2.1299313851072915e-06, "loss": 0.0517, "step": 588325 }, { "epoch": 5.78, "grad_norm": 1.5111149549484253, "learning_rate": 2.129807262653043e-06, "loss": 0.0837, "step": 588350 }, { "epoch": 5.79, "grad_norm": 10.18375301361084, "learning_rate": 2.1296831401987948e-06, "loss": 0.052, "step": 588375 }, { "epoch": 5.79, "grad_norm": 1.7265045642852783, "learning_rate": 2.1295590177445464e-06, "loss": 0.075, "step": 588400 }, { "epoch": 5.79, "grad_norm": 8.977932929992676, "learning_rate": 2.1294348952902976e-06, "loss": 0.0574, "step": 588425 }, { "epoch": 5.79, "grad_norm": 1.7489681243896484, "learning_rate": 2.1293107728360493e-06, "loss": 0.0947, "step": 588450 }, { "epoch": 5.79, "grad_norm": 11.279966354370117, "learning_rate": 2.129186650381801e-06, "loss": 0.0492, "step": 588475 }, { "epoch": 5.79, "grad_norm": 8.595670700073242, "learning_rate": 2.129062527927552e-06, "loss": 0.0867, "step": 588500 }, { "epoch": 5.79, "grad_norm": 16.49195671081543, "learning_rate": 2.1289384054733037e-06, "loss": 0.0534, "step": 588525 }, { "epoch": 5.79, "grad_norm": 8.790863990783691, "learning_rate": 2.1288142830190554e-06, "loss": 0.1119, "step": 588550 }, { "epoch": 5.79, "grad_norm": 11.505784034729004, "learning_rate": 2.128690160564807e-06, "loss": 0.0525, "step": 588575 }, { "epoch": 5.79, "grad_norm": 6.6923346519470215, "learning_rate": 2.1285710030087285e-06, "loss": 0.0752, "step": 588600 }, { "epoch": 5.79, "grad_norm": 10.421845436096191, "learning_rate": 2.12844688055448e-06, "loss": 0.0575, "step": 588625 }, { "epoch": 5.79, "grad_norm": 3.3947534561157227, "learning_rate": 2.1283227581002317e-06, "loss": 0.0752, "step": 588650 }, { "epoch": 5.79, "grad_norm": 3.234158754348755, "learning_rate": 2.128198635645983e-06, "loss": 0.0328, "step": 588675 }, { "epoch": 5.79, "grad_norm": 5.423539638519287, "learning_rate": 2.1280745131917346e-06, "loss": 0.1012, "step": 588700 }, { "epoch": 5.79, "grad_norm": 7.338821887969971, "learning_rate": 2.127950390737486e-06, "loss": 0.0319, "step": 588725 }, { "epoch": 5.79, "grad_norm": 0.46415087580680847, "learning_rate": 2.1278262682832374e-06, "loss": 0.0721, "step": 588750 }, { "epoch": 5.79, "grad_norm": 9.932462692260742, "learning_rate": 2.127702145828989e-06, "loss": 0.0416, "step": 588775 }, { "epoch": 5.79, "grad_norm": 0.013294938020408154, "learning_rate": 2.1275780233747407e-06, "loss": 0.1073, "step": 588800 }, { "epoch": 5.79, "grad_norm": 16.250911712646484, "learning_rate": 2.1274539009204923e-06, "loss": 0.0573, "step": 588825 }, { "epoch": 5.79, "grad_norm": 0.013468502089381218, "learning_rate": 2.127329778466244e-06, "loss": 0.1122, "step": 588850 }, { "epoch": 5.79, "grad_norm": 9.494895935058594, "learning_rate": 2.127205656011995e-06, "loss": 0.0484, "step": 588875 }, { "epoch": 5.79, "grad_norm": 1.4392319917678833, "learning_rate": 2.127081533557747e-06, "loss": 0.0689, "step": 588900 }, { "epoch": 5.79, "grad_norm": 16.735050201416016, "learning_rate": 2.1269574111034984e-06, "loss": 0.0485, "step": 588925 }, { "epoch": 5.79, "grad_norm": 4.188528537750244, "learning_rate": 2.12683328864925e-06, "loss": 0.0897, "step": 588950 }, { "epoch": 5.79, "grad_norm": 11.784933090209961, "learning_rate": 2.1267091661950017e-06, "loss": 0.0521, "step": 588975 }, { "epoch": 5.79, "grad_norm": 1.0255095958709717, "learning_rate": 2.1265850437407533e-06, "loss": 0.0789, "step": 589000 }, { "epoch": 5.79, "grad_norm": 6.589531898498535, "learning_rate": 2.1264609212865046e-06, "loss": 0.0521, "step": 589025 }, { "epoch": 5.79, "grad_norm": 1.721937894821167, "learning_rate": 2.126336798832256e-06, "loss": 0.0908, "step": 589050 }, { "epoch": 5.79, "grad_norm": 15.200800895690918, "learning_rate": 2.126212676378008e-06, "loss": 0.0485, "step": 589075 }, { "epoch": 5.79, "grad_norm": 0.6950478553771973, "learning_rate": 2.126088553923759e-06, "loss": 0.0726, "step": 589100 }, { "epoch": 5.79, "grad_norm": 7.296674728393555, "learning_rate": 2.1259644314695107e-06, "loss": 0.0397, "step": 589125 }, { "epoch": 5.79, "grad_norm": 0.21492399275302887, "learning_rate": 2.1258403090152623e-06, "loss": 0.0821, "step": 589150 }, { "epoch": 5.79, "grad_norm": 15.841509819030762, "learning_rate": 2.125716186561014e-06, "loss": 0.0321, "step": 589175 }, { "epoch": 5.79, "grad_norm": 8.603984832763672, "learning_rate": 2.1255920641067656e-06, "loss": 0.0972, "step": 589200 }, { "epoch": 5.79, "grad_norm": 23.1882266998291, "learning_rate": 2.1254679416525172e-06, "loss": 0.0333, "step": 589225 }, { "epoch": 5.79, "grad_norm": 1.6274312734603882, "learning_rate": 2.1253438191982684e-06, "loss": 0.0933, "step": 589250 }, { "epoch": 5.79, "grad_norm": 4.565490245819092, "learning_rate": 2.12521969674402e-06, "loss": 0.0264, "step": 589275 }, { "epoch": 5.79, "grad_norm": 0.09445561468601227, "learning_rate": 2.1250955742897713e-06, "loss": 0.0858, "step": 589300 }, { "epoch": 5.79, "grad_norm": 12.49746036529541, "learning_rate": 2.124971451835523e-06, "loss": 0.0544, "step": 589325 }, { "epoch": 5.79, "grad_norm": 0.3221067190170288, "learning_rate": 2.1248473293812745e-06, "loss": 0.1023, "step": 589350 }, { "epoch": 5.79, "grad_norm": 13.011947631835938, "learning_rate": 2.124723206927026e-06, "loss": 0.0624, "step": 589375 }, { "epoch": 5.8, "grad_norm": 9.32553482055664, "learning_rate": 2.124599084472778e-06, "loss": 0.1064, "step": 589400 }, { "epoch": 5.8, "grad_norm": 2.081204891204834, "learning_rate": 2.1244749620185294e-06, "loss": 0.0437, "step": 589425 }, { "epoch": 5.8, "grad_norm": 0.18675048649311066, "learning_rate": 2.1243508395642807e-06, "loss": 0.0889, "step": 589450 }, { "epoch": 5.8, "grad_norm": 19.807647705078125, "learning_rate": 2.1242267171100323e-06, "loss": 0.0749, "step": 589475 }, { "epoch": 5.8, "grad_norm": 2.470184564590454, "learning_rate": 2.124102594655784e-06, "loss": 0.0879, "step": 589500 }, { "epoch": 5.8, "grad_norm": 6.1471452713012695, "learning_rate": 2.123978472201535e-06, "loss": 0.0422, "step": 589525 }, { "epoch": 5.8, "grad_norm": 0.6275395750999451, "learning_rate": 2.1238543497472868e-06, "loss": 0.0796, "step": 589550 }, { "epoch": 5.8, "grad_norm": 4.882011890411377, "learning_rate": 2.1237302272930384e-06, "loss": 0.0444, "step": 589575 }, { "epoch": 5.8, "grad_norm": 2.266649007797241, "learning_rate": 2.12360610483879e-06, "loss": 0.0805, "step": 589600 }, { "epoch": 5.8, "grad_norm": 10.92658519744873, "learning_rate": 2.1234819823845417e-06, "loss": 0.0495, "step": 589625 }, { "epoch": 5.8, "grad_norm": 5.901608943939209, "learning_rate": 2.1233578599302933e-06, "loss": 0.12, "step": 589650 }, { "epoch": 5.8, "grad_norm": 13.805270195007324, "learning_rate": 2.1232337374760445e-06, "loss": 0.0882, "step": 589675 }, { "epoch": 5.8, "grad_norm": 4.2034735679626465, "learning_rate": 2.123109615021796e-06, "loss": 0.0962, "step": 589700 }, { "epoch": 5.8, "grad_norm": 25.722551345825195, "learning_rate": 2.1229854925675474e-06, "loss": 0.0608, "step": 589725 }, { "epoch": 5.8, "grad_norm": 0.31916871666908264, "learning_rate": 2.122861370113299e-06, "loss": 0.061, "step": 589750 }, { "epoch": 5.8, "grad_norm": 22.269006729125977, "learning_rate": 2.1227372476590506e-06, "loss": 0.0638, "step": 589775 }, { "epoch": 5.8, "grad_norm": 3.037435531616211, "learning_rate": 2.1226131252048023e-06, "loss": 0.0817, "step": 589800 }, { "epoch": 5.8, "grad_norm": 7.931769371032715, "learning_rate": 2.122489002750554e-06, "loss": 0.053, "step": 589825 }, { "epoch": 5.8, "grad_norm": 5.588046073913574, "learning_rate": 2.1223648802963055e-06, "loss": 0.0858, "step": 589850 }, { "epoch": 5.8, "grad_norm": 2.9352285861968994, "learning_rate": 2.1222407578420568e-06, "loss": 0.0427, "step": 589875 }, { "epoch": 5.8, "grad_norm": 11.990721702575684, "learning_rate": 2.1221166353878084e-06, "loss": 0.0882, "step": 589900 }, { "epoch": 5.8, "grad_norm": 7.293154716491699, "learning_rate": 2.12199251293356e-06, "loss": 0.0619, "step": 589925 }, { "epoch": 5.8, "grad_norm": 7.9894304275512695, "learning_rate": 2.1218683904793112e-06, "loss": 0.0661, "step": 589950 }, { "epoch": 5.8, "grad_norm": 15.964531898498535, "learning_rate": 2.121744268025063e-06, "loss": 0.0618, "step": 589975 }, { "epoch": 5.8, "grad_norm": 2.6692795753479004, "learning_rate": 2.1216201455708145e-06, "loss": 0.0796, "step": 590000 }, { "epoch": 5.8, "grad_norm": 5.1539201736450195, "learning_rate": 2.121496023116566e-06, "loss": 0.0374, "step": 590025 }, { "epoch": 5.8, "grad_norm": 2.8628084659576416, "learning_rate": 2.1213719006623178e-06, "loss": 0.0955, "step": 590050 }, { "epoch": 5.8, "grad_norm": 13.565882682800293, "learning_rate": 2.1212477782080694e-06, "loss": 0.0672, "step": 590075 }, { "epoch": 5.8, "grad_norm": 1.557060718536377, "learning_rate": 2.1211236557538206e-06, "loss": 0.0694, "step": 590100 }, { "epoch": 5.8, "grad_norm": 4.785594463348389, "learning_rate": 2.1209995332995723e-06, "loss": 0.0405, "step": 590125 }, { "epoch": 5.8, "grad_norm": 0.05040721595287323, "learning_rate": 2.1208754108453235e-06, "loss": 0.0724, "step": 590150 }, { "epoch": 5.8, "grad_norm": 11.95490837097168, "learning_rate": 2.120751288391075e-06, "loss": 0.0448, "step": 590175 }, { "epoch": 5.8, "grad_norm": 0.6755931973457336, "learning_rate": 2.1206271659368267e-06, "loss": 0.0727, "step": 590200 }, { "epoch": 5.8, "grad_norm": 14.297795295715332, "learning_rate": 2.1205030434825784e-06, "loss": 0.048, "step": 590225 }, { "epoch": 5.8, "grad_norm": 7.552696228027344, "learning_rate": 2.12037892102833e-06, "loss": 0.1062, "step": 590250 }, { "epoch": 5.8, "grad_norm": 7.426322937011719, "learning_rate": 2.1202547985740816e-06, "loss": 0.0452, "step": 590275 }, { "epoch": 5.8, "grad_norm": 3.6532204151153564, "learning_rate": 2.120130676119833e-06, "loss": 0.0736, "step": 590300 }, { "epoch": 5.8, "grad_norm": 13.402266502380371, "learning_rate": 2.1200065536655845e-06, "loss": 0.0557, "step": 590325 }, { "epoch": 5.8, "grad_norm": 4.947115421295166, "learning_rate": 2.119882431211336e-06, "loss": 0.0986, "step": 590350 }, { "epoch": 5.8, "grad_norm": 16.751142501831055, "learning_rate": 2.1197583087570873e-06, "loss": 0.0671, "step": 590375 }, { "epoch": 5.8, "grad_norm": 5.237560272216797, "learning_rate": 2.119634186302839e-06, "loss": 0.0719, "step": 590400 }, { "epoch": 5.81, "grad_norm": 7.416085243225098, "learning_rate": 2.1195100638485906e-06, "loss": 0.0471, "step": 590425 }, { "epoch": 5.81, "grad_norm": 2.303396224975586, "learning_rate": 2.1193859413943422e-06, "loss": 0.0776, "step": 590450 }, { "epoch": 5.81, "grad_norm": 12.676578521728516, "learning_rate": 2.119261818940094e-06, "loss": 0.0526, "step": 590475 }, { "epoch": 5.81, "grad_norm": 1.1440585851669312, "learning_rate": 2.1191376964858455e-06, "loss": 0.0814, "step": 590500 }, { "epoch": 5.81, "grad_norm": 9.244369506835938, "learning_rate": 2.1190135740315967e-06, "loss": 0.0544, "step": 590525 }, { "epoch": 5.81, "grad_norm": 2.966956853866577, "learning_rate": 2.1188894515773484e-06, "loss": 0.0805, "step": 590550 }, { "epoch": 5.81, "grad_norm": 6.4975385665893555, "learning_rate": 2.1187653291230996e-06, "loss": 0.054, "step": 590575 }, { "epoch": 5.81, "grad_norm": 2.858492374420166, "learning_rate": 2.118641206668851e-06, "loss": 0.0914, "step": 590600 }, { "epoch": 5.81, "grad_norm": 7.452760219573975, "learning_rate": 2.118517084214603e-06, "loss": 0.044, "step": 590625 }, { "epoch": 5.81, "grad_norm": 0.11960888653993607, "learning_rate": 2.1183929617603545e-06, "loss": 0.1064, "step": 590650 }, { "epoch": 5.81, "grad_norm": 10.477376937866211, "learning_rate": 2.118268839306106e-06, "loss": 0.0492, "step": 590675 }, { "epoch": 5.81, "grad_norm": 1.0675499439239502, "learning_rate": 2.1181447168518577e-06, "loss": 0.0747, "step": 590700 }, { "epoch": 5.81, "grad_norm": 14.018492698669434, "learning_rate": 2.118020594397609e-06, "loss": 0.061, "step": 590725 }, { "epoch": 5.81, "grad_norm": 0.00667452672496438, "learning_rate": 2.1178964719433606e-06, "loss": 0.0827, "step": 590750 }, { "epoch": 5.81, "grad_norm": 19.17656135559082, "learning_rate": 2.1177723494891122e-06, "loss": 0.0364, "step": 590775 }, { "epoch": 5.81, "grad_norm": 0.44785958528518677, "learning_rate": 2.1176482270348634e-06, "loss": 0.0877, "step": 590800 }, { "epoch": 5.81, "grad_norm": 7.074615001678467, "learning_rate": 2.117524104580615e-06, "loss": 0.0459, "step": 590825 }, { "epoch": 5.81, "grad_norm": 0.4382703900337219, "learning_rate": 2.1173999821263667e-06, "loss": 0.0891, "step": 590850 }, { "epoch": 5.81, "grad_norm": 14.572233200073242, "learning_rate": 2.1172758596721183e-06, "loss": 0.048, "step": 590875 }, { "epoch": 5.81, "grad_norm": 5.970787525177002, "learning_rate": 2.11715173721787e-06, "loss": 0.0847, "step": 590900 }, { "epoch": 5.81, "grad_norm": 9.20438003540039, "learning_rate": 2.1170276147636216e-06, "loss": 0.0628, "step": 590925 }, { "epoch": 5.81, "grad_norm": 1.749246597290039, "learning_rate": 2.116903492309373e-06, "loss": 0.0713, "step": 590950 }, { "epoch": 5.81, "grad_norm": 18.289548873901367, "learning_rate": 2.1167793698551245e-06, "loss": 0.0497, "step": 590975 }, { "epoch": 5.81, "grad_norm": 0.35640159249305725, "learning_rate": 2.1166552474008757e-06, "loss": 0.0603, "step": 591000 }, { "epoch": 5.81, "grad_norm": 2.444674491882324, "learning_rate": 2.1165311249466273e-06, "loss": 0.051, "step": 591025 }, { "epoch": 5.81, "grad_norm": 0.4056979715824127, "learning_rate": 2.116407002492379e-06, "loss": 0.0735, "step": 591050 }, { "epoch": 5.81, "grad_norm": 17.83759117126465, "learning_rate": 2.1162828800381306e-06, "loss": 0.0523, "step": 591075 }, { "epoch": 5.81, "grad_norm": 5.985630035400391, "learning_rate": 2.1161587575838822e-06, "loss": 0.0779, "step": 591100 }, { "epoch": 5.81, "grad_norm": 9.102784156799316, "learning_rate": 2.116034635129634e-06, "loss": 0.0458, "step": 591125 }, { "epoch": 5.81, "grad_norm": 2.7527153491973877, "learning_rate": 2.1159154775735553e-06, "loss": 0.0833, "step": 591150 }, { "epoch": 5.81, "grad_norm": 19.564353942871094, "learning_rate": 2.1157913551193065e-06, "loss": 0.0442, "step": 591175 }, { "epoch": 5.81, "grad_norm": 4.05070161819458, "learning_rate": 2.115667232665058e-06, "loss": 0.0787, "step": 591200 }, { "epoch": 5.81, "grad_norm": 23.332048416137695, "learning_rate": 2.1155431102108098e-06, "loss": 0.0491, "step": 591225 }, { "epoch": 5.81, "grad_norm": 0.3651655912399292, "learning_rate": 2.115418987756561e-06, "loss": 0.0733, "step": 591250 }, { "epoch": 5.81, "grad_norm": 12.212406158447266, "learning_rate": 2.1152948653023126e-06, "loss": 0.0335, "step": 591275 }, { "epoch": 5.81, "grad_norm": 5.58850622177124, "learning_rate": 2.1151707428480643e-06, "loss": 0.0834, "step": 591300 }, { "epoch": 5.81, "grad_norm": 10.632912635803223, "learning_rate": 2.115046620393816e-06, "loss": 0.0566, "step": 591325 }, { "epoch": 5.81, "grad_norm": 5.972161769866943, "learning_rate": 2.1149224979395675e-06, "loss": 0.0746, "step": 591350 }, { "epoch": 5.81, "grad_norm": 6.45046854019165, "learning_rate": 2.114798375485319e-06, "loss": 0.044, "step": 591375 }, { "epoch": 5.81, "grad_norm": 25.65369415283203, "learning_rate": 2.1146742530310704e-06, "loss": 0.0894, "step": 591400 }, { "epoch": 5.81, "grad_norm": 8.929461479187012, "learning_rate": 2.114550130576822e-06, "loss": 0.0605, "step": 591425 }, { "epoch": 5.82, "grad_norm": 0.37772005796432495, "learning_rate": 2.1144260081225736e-06, "loss": 0.0867, "step": 591450 }, { "epoch": 5.82, "grad_norm": 15.747326850891113, "learning_rate": 2.114301885668325e-06, "loss": 0.0567, "step": 591475 }, { "epoch": 5.82, "grad_norm": 0.41191565990448, "learning_rate": 2.1141777632140765e-06, "loss": 0.0796, "step": 591500 }, { "epoch": 5.82, "grad_norm": 8.59809684753418, "learning_rate": 2.114053640759828e-06, "loss": 0.0677, "step": 591525 }, { "epoch": 5.82, "grad_norm": 3.493284225463867, "learning_rate": 2.1139295183055798e-06, "loss": 0.0749, "step": 591550 }, { "epoch": 5.82, "grad_norm": 7.900740146636963, "learning_rate": 2.1138053958513314e-06, "loss": 0.0528, "step": 591575 }, { "epoch": 5.82, "grad_norm": 3.884580373764038, "learning_rate": 2.1136812733970826e-06, "loss": 0.086, "step": 591600 }, { "epoch": 5.82, "grad_norm": 7.339694499969482, "learning_rate": 2.1135571509428342e-06, "loss": 0.0443, "step": 591625 }, { "epoch": 5.82, "grad_norm": 0.6626785397529602, "learning_rate": 2.113433028488586e-06, "loss": 0.0868, "step": 591650 }, { "epoch": 5.82, "grad_norm": 5.690333843231201, "learning_rate": 2.1133089060343375e-06, "loss": 0.0395, "step": 591675 }, { "epoch": 5.82, "grad_norm": 3.481142520904541, "learning_rate": 2.1131847835800887e-06, "loss": 0.0797, "step": 591700 }, { "epoch": 5.82, "grad_norm": 10.71491527557373, "learning_rate": 2.1130606611258404e-06, "loss": 0.0513, "step": 591725 }, { "epoch": 5.82, "grad_norm": 0.15167374908924103, "learning_rate": 2.112936538671592e-06, "loss": 0.0893, "step": 591750 }, { "epoch": 5.82, "grad_norm": 5.729691982269287, "learning_rate": 2.1128124162173436e-06, "loss": 0.0363, "step": 591775 }, { "epoch": 5.82, "grad_norm": 0.47800666093826294, "learning_rate": 2.1126882937630953e-06, "loss": 0.0609, "step": 591800 }, { "epoch": 5.82, "grad_norm": 11.040645599365234, "learning_rate": 2.1125641713088465e-06, "loss": 0.0556, "step": 591825 }, { "epoch": 5.82, "grad_norm": 1.1216989755630493, "learning_rate": 2.112440048854598e-06, "loss": 0.0716, "step": 591850 }, { "epoch": 5.82, "grad_norm": 1.7762969732284546, "learning_rate": 2.1123159264003497e-06, "loss": 0.0329, "step": 591875 }, { "epoch": 5.82, "grad_norm": 0.1913498342037201, "learning_rate": 2.1121918039461014e-06, "loss": 0.0764, "step": 591900 }, { "epoch": 5.82, "grad_norm": 13.044397354125977, "learning_rate": 2.112067681491853e-06, "loss": 0.0627, "step": 591925 }, { "epoch": 5.82, "grad_norm": 0.11237402260303497, "learning_rate": 2.1119435590376042e-06, "loss": 0.0906, "step": 591950 }, { "epoch": 5.82, "grad_norm": 16.044031143188477, "learning_rate": 2.111819436583356e-06, "loss": 0.0523, "step": 591975 }, { "epoch": 5.82, "grad_norm": 4.274134159088135, "learning_rate": 2.1116953141291075e-06, "loss": 0.0754, "step": 592000 }, { "epoch": 5.82, "grad_norm": 8.103948593139648, "learning_rate": 2.1115711916748587e-06, "loss": 0.0891, "step": 592025 }, { "epoch": 5.82, "grad_norm": 7.88636589050293, "learning_rate": 2.1114470692206103e-06, "loss": 0.0875, "step": 592050 }, { "epoch": 5.82, "grad_norm": 12.128728866577148, "learning_rate": 2.111322946766362e-06, "loss": 0.0538, "step": 592075 }, { "epoch": 5.82, "grad_norm": 0.36674147844314575, "learning_rate": 2.1111988243121136e-06, "loss": 0.0958, "step": 592100 }, { "epoch": 5.82, "grad_norm": 6.281200408935547, "learning_rate": 2.1110747018578653e-06, "loss": 0.0572, "step": 592125 }, { "epoch": 5.82, "grad_norm": 2.9972097873687744, "learning_rate": 2.110950579403617e-06, "loss": 0.0786, "step": 592150 }, { "epoch": 5.82, "grad_norm": 17.4018497467041, "learning_rate": 2.110826456949368e-06, "loss": 0.0527, "step": 592175 }, { "epoch": 5.82, "grad_norm": 0.33284056186676025, "learning_rate": 2.1107023344951197e-06, "loss": 0.0591, "step": 592200 }, { "epoch": 5.82, "grad_norm": 7.123678684234619, "learning_rate": 2.1105782120408714e-06, "loss": 0.0474, "step": 592225 }, { "epoch": 5.82, "grad_norm": 0.04202971234917641, "learning_rate": 2.1104540895866226e-06, "loss": 0.0971, "step": 592250 }, { "epoch": 5.82, "grad_norm": 12.139957427978516, "learning_rate": 2.1103299671323742e-06, "loss": 0.0543, "step": 592275 }, { "epoch": 5.82, "grad_norm": 0.2662752866744995, "learning_rate": 2.110205844678126e-06, "loss": 0.0803, "step": 592300 }, { "epoch": 5.82, "grad_norm": 10.902767181396484, "learning_rate": 2.1100817222238775e-06, "loss": 0.0462, "step": 592325 }, { "epoch": 5.82, "grad_norm": 1.5793887376785278, "learning_rate": 2.109957599769629e-06, "loss": 0.1071, "step": 592350 }, { "epoch": 5.82, "grad_norm": 2.902865409851074, "learning_rate": 2.1098334773153803e-06, "loss": 0.0666, "step": 592375 }, { "epoch": 5.82, "grad_norm": 0.16916880011558533, "learning_rate": 2.109709354861132e-06, "loss": 0.0944, "step": 592400 }, { "epoch": 5.82, "grad_norm": 11.23996353149414, "learning_rate": 2.1095852324068836e-06, "loss": 0.053, "step": 592425 }, { "epoch": 5.83, "grad_norm": 6.142126083374023, "learning_rate": 2.109461109952635e-06, "loss": 0.1034, "step": 592450 }, { "epoch": 5.83, "grad_norm": 11.071599006652832, "learning_rate": 2.1093369874983864e-06, "loss": 0.0354, "step": 592475 }, { "epoch": 5.83, "grad_norm": 0.020882636308670044, "learning_rate": 2.109212865044138e-06, "loss": 0.079, "step": 592500 }, { "epoch": 5.83, "grad_norm": 9.274093627929688, "learning_rate": 2.1090887425898897e-06, "loss": 0.0461, "step": 592525 }, { "epoch": 5.83, "grad_norm": 0.9930152893066406, "learning_rate": 2.1089646201356414e-06, "loss": 0.0949, "step": 592550 }, { "epoch": 5.83, "grad_norm": 21.336240768432617, "learning_rate": 2.108840497681393e-06, "loss": 0.0784, "step": 592575 }, { "epoch": 5.83, "grad_norm": 2.207573413848877, "learning_rate": 2.108716375227144e-06, "loss": 0.0717, "step": 592600 }, { "epoch": 5.83, "grad_norm": 15.760987281799316, "learning_rate": 2.108592252772896e-06, "loss": 0.0554, "step": 592625 }, { "epoch": 5.83, "grad_norm": 0.015372877940535545, "learning_rate": 2.1084681303186475e-06, "loss": 0.0841, "step": 592650 }, { "epoch": 5.83, "grad_norm": 8.948723793029785, "learning_rate": 2.1083440078643987e-06, "loss": 0.0288, "step": 592675 }, { "epoch": 5.83, "grad_norm": 3.694365978240967, "learning_rate": 2.1082198854101503e-06, "loss": 0.0766, "step": 592700 }, { "epoch": 5.83, "grad_norm": 14.664905548095703, "learning_rate": 2.108095762955902e-06, "loss": 0.0478, "step": 592725 }, { "epoch": 5.83, "grad_norm": 1.9210480451583862, "learning_rate": 2.1079716405016536e-06, "loss": 0.0594, "step": 592750 }, { "epoch": 5.83, "grad_norm": 12.287909507751465, "learning_rate": 2.1078475180474052e-06, "loss": 0.0388, "step": 592775 }, { "epoch": 5.83, "grad_norm": 0.14534738659858704, "learning_rate": 2.1077233955931564e-06, "loss": 0.1072, "step": 592800 }, { "epoch": 5.83, "grad_norm": 15.456382751464844, "learning_rate": 2.107599273138908e-06, "loss": 0.0588, "step": 592825 }, { "epoch": 5.83, "grad_norm": 3.5738792419433594, "learning_rate": 2.1074751506846597e-06, "loss": 0.1067, "step": 592850 }, { "epoch": 5.83, "grad_norm": 9.388838768005371, "learning_rate": 2.107351028230411e-06, "loss": 0.0404, "step": 592875 }, { "epoch": 5.83, "grad_norm": 0.38880789279937744, "learning_rate": 2.1072269057761625e-06, "loss": 0.0913, "step": 592900 }, { "epoch": 5.83, "grad_norm": 0.30576348304748535, "learning_rate": 2.107102783321914e-06, "loss": 0.0514, "step": 592925 }, { "epoch": 5.83, "grad_norm": 2.418177604675293, "learning_rate": 2.106978660867666e-06, "loss": 0.0965, "step": 592950 }, { "epoch": 5.83, "grad_norm": 11.653703689575195, "learning_rate": 2.1068545384134175e-06, "loss": 0.0429, "step": 592975 }, { "epoch": 5.83, "grad_norm": 1.3472521305084229, "learning_rate": 2.106730415959169e-06, "loss": 0.0815, "step": 593000 }, { "epoch": 5.83, "grad_norm": 9.984549522399902, "learning_rate": 2.1066062935049203e-06, "loss": 0.0412, "step": 593025 }, { "epoch": 5.83, "grad_norm": 0.37962543964385986, "learning_rate": 2.106482171050672e-06, "loss": 0.0703, "step": 593050 }, { "epoch": 5.83, "grad_norm": 10.135741233825684, "learning_rate": 2.1063580485964236e-06, "loss": 0.0415, "step": 593075 }, { "epoch": 5.83, "grad_norm": 2.7087807655334473, "learning_rate": 2.1062339261421748e-06, "loss": 0.1092, "step": 593100 }, { "epoch": 5.83, "grad_norm": 11.624129295349121, "learning_rate": 2.1061098036879264e-06, "loss": 0.0642, "step": 593125 }, { "epoch": 5.83, "grad_norm": 0.22498834133148193, "learning_rate": 2.105985681233678e-06, "loss": 0.0717, "step": 593150 }, { "epoch": 5.83, "grad_norm": 8.738798141479492, "learning_rate": 2.1058615587794297e-06, "loss": 0.0566, "step": 593175 }, { "epoch": 5.83, "grad_norm": 4.677692413330078, "learning_rate": 2.1057374363251813e-06, "loss": 0.0664, "step": 593200 }, { "epoch": 5.83, "grad_norm": 19.35304069519043, "learning_rate": 2.105613313870933e-06, "loss": 0.0518, "step": 593225 }, { "epoch": 5.83, "grad_norm": 2.0109705924987793, "learning_rate": 2.1054941563148544e-06, "loss": 0.0654, "step": 593250 }, { "epoch": 5.83, "grad_norm": 9.97319507598877, "learning_rate": 2.1053700338606056e-06, "loss": 0.0444, "step": 593275 }, { "epoch": 5.83, "grad_norm": 0.04117028787732124, "learning_rate": 2.1052459114063573e-06, "loss": 0.0967, "step": 593300 }, { "epoch": 5.83, "grad_norm": 4.1812286376953125, "learning_rate": 2.105121788952109e-06, "loss": 0.0477, "step": 593325 }, { "epoch": 5.83, "grad_norm": 0.528569221496582, "learning_rate": 2.10499766649786e-06, "loss": 0.0785, "step": 593350 }, { "epoch": 5.83, "grad_norm": 1.6235456466674805, "learning_rate": 2.1048735440436117e-06, "loss": 0.0771, "step": 593375 }, { "epoch": 5.83, "grad_norm": 0.32235202193260193, "learning_rate": 2.1047494215893634e-06, "loss": 0.1001, "step": 593400 }, { "epoch": 5.83, "grad_norm": 7.675725936889648, "learning_rate": 2.104625299135115e-06, "loss": 0.0595, "step": 593425 }, { "epoch": 5.83, "grad_norm": 18.389305114746094, "learning_rate": 2.1045011766808666e-06, "loss": 0.0697, "step": 593450 }, { "epoch": 5.84, "grad_norm": 1.091470718383789, "learning_rate": 2.104377054226618e-06, "loss": 0.0452, "step": 593475 }, { "epoch": 5.84, "grad_norm": 0.2377782166004181, "learning_rate": 2.1042529317723695e-06, "loss": 0.0984, "step": 593500 }, { "epoch": 5.84, "grad_norm": 2.7282543182373047, "learning_rate": 2.104128809318121e-06, "loss": 0.0456, "step": 593525 }, { "epoch": 5.84, "grad_norm": 6.573806285858154, "learning_rate": 2.1040046868638723e-06, "loss": 0.0734, "step": 593550 }, { "epoch": 5.84, "grad_norm": 6.994320869445801, "learning_rate": 2.103880564409624e-06, "loss": 0.0385, "step": 593575 }, { "epoch": 5.84, "grad_norm": 24.569292068481445, "learning_rate": 2.1037564419553756e-06, "loss": 0.0869, "step": 593600 }, { "epoch": 5.84, "grad_norm": 6.878535747528076, "learning_rate": 2.1036323195011272e-06, "loss": 0.0327, "step": 593625 }, { "epoch": 5.84, "grad_norm": 0.685180127620697, "learning_rate": 2.103508197046879e-06, "loss": 0.082, "step": 593650 }, { "epoch": 5.84, "grad_norm": 4.383141040802002, "learning_rate": 2.1033840745926305e-06, "loss": 0.062, "step": 593675 }, { "epoch": 5.84, "grad_norm": 0.2927503287792206, "learning_rate": 2.1032599521383817e-06, "loss": 0.0736, "step": 593700 }, { "epoch": 5.84, "grad_norm": 9.103958129882812, "learning_rate": 2.1031358296841334e-06, "loss": 0.0452, "step": 593725 }, { "epoch": 5.84, "grad_norm": 0.9128289222717285, "learning_rate": 2.103011707229885e-06, "loss": 0.0655, "step": 593750 }, { "epoch": 5.84, "grad_norm": 11.40598201751709, "learning_rate": 2.102887584775636e-06, "loss": 0.0409, "step": 593775 }, { "epoch": 5.84, "grad_norm": 4.590135097503662, "learning_rate": 2.102763462321388e-06, "loss": 0.0697, "step": 593800 }, { "epoch": 5.84, "grad_norm": 11.408987998962402, "learning_rate": 2.1026393398671395e-06, "loss": 0.0374, "step": 593825 }, { "epoch": 5.84, "grad_norm": 7.8754706382751465, "learning_rate": 2.102515217412891e-06, "loss": 0.102, "step": 593850 }, { "epoch": 5.84, "grad_norm": 13.808773040771484, "learning_rate": 2.1023910949586427e-06, "loss": 0.0583, "step": 593875 }, { "epoch": 5.84, "grad_norm": 1.1397812366485596, "learning_rate": 2.102266972504394e-06, "loss": 0.0831, "step": 593900 }, { "epoch": 5.84, "grad_norm": 11.661005973815918, "learning_rate": 2.1021428500501456e-06, "loss": 0.0565, "step": 593925 }, { "epoch": 5.84, "grad_norm": 1.6669330596923828, "learning_rate": 2.1020187275958972e-06, "loss": 0.0906, "step": 593950 }, { "epoch": 5.84, "grad_norm": 7.2863054275512695, "learning_rate": 2.1018946051416484e-06, "loss": 0.043, "step": 593975 }, { "epoch": 5.84, "grad_norm": 4.526549339294434, "learning_rate": 2.1017704826874e-06, "loss": 0.0806, "step": 594000 }, { "epoch": 5.84, "grad_norm": 10.226882934570312, "learning_rate": 2.1016463602331517e-06, "loss": 0.0542, "step": 594025 }, { "epoch": 5.84, "grad_norm": 1.8314157724380493, "learning_rate": 2.1015222377789033e-06, "loss": 0.0752, "step": 594050 }, { "epoch": 5.84, "grad_norm": 9.933443069458008, "learning_rate": 2.101398115324655e-06, "loss": 0.0557, "step": 594075 }, { "epoch": 5.84, "grad_norm": 1.7991523742675781, "learning_rate": 2.1012739928704066e-06, "loss": 0.0885, "step": 594100 }, { "epoch": 5.84, "grad_norm": 10.443254470825195, "learning_rate": 2.101149870416158e-06, "loss": 0.0473, "step": 594125 }, { "epoch": 5.84, "grad_norm": 7.325793743133545, "learning_rate": 2.1010257479619095e-06, "loss": 0.08, "step": 594150 }, { "epoch": 5.84, "grad_norm": 9.21358871459961, "learning_rate": 2.100901625507661e-06, "loss": 0.0435, "step": 594175 }, { "epoch": 5.84, "grad_norm": 2.2254960536956787, "learning_rate": 2.1007775030534123e-06, "loss": 0.0955, "step": 594200 }, { "epoch": 5.84, "grad_norm": 9.364927291870117, "learning_rate": 2.100653380599164e-06, "loss": 0.0441, "step": 594225 }, { "epoch": 5.84, "grad_norm": 5.436654090881348, "learning_rate": 2.1005292581449156e-06, "loss": 0.0921, "step": 594250 }, { "epoch": 5.84, "grad_norm": 1.998533010482788, "learning_rate": 2.100405135690667e-06, "loss": 0.0568, "step": 594275 }, { "epoch": 5.84, "grad_norm": 0.23353488743305206, "learning_rate": 2.100281013236419e-06, "loss": 0.0971, "step": 594300 }, { "epoch": 5.84, "grad_norm": 6.537031650543213, "learning_rate": 2.10015689078217e-06, "loss": 0.0385, "step": 594325 }, { "epoch": 5.84, "grad_norm": 0.5876672863960266, "learning_rate": 2.1000327683279217e-06, "loss": 0.0779, "step": 594350 }, { "epoch": 5.84, "grad_norm": 17.136322021484375, "learning_rate": 2.0999086458736733e-06, "loss": 0.0576, "step": 594375 }, { "epoch": 5.84, "grad_norm": 6.054027080535889, "learning_rate": 2.0997845234194245e-06, "loss": 0.0739, "step": 594400 }, { "epoch": 5.84, "grad_norm": 23.69362449645996, "learning_rate": 2.099660400965176e-06, "loss": 0.0668, "step": 594425 }, { "epoch": 5.84, "grad_norm": 5.136203765869141, "learning_rate": 2.099536278510928e-06, "loss": 0.1085, "step": 594450 }, { "epoch": 5.84, "grad_norm": 5.054346561431885, "learning_rate": 2.0994121560566794e-06, "loss": 0.0561, "step": 594475 }, { "epoch": 5.85, "grad_norm": 0.5283608436584473, "learning_rate": 2.099288033602431e-06, "loss": 0.0797, "step": 594500 }, { "epoch": 5.85, "grad_norm": 8.362290382385254, "learning_rate": 2.0991639111481827e-06, "loss": 0.0465, "step": 594525 }, { "epoch": 5.85, "grad_norm": 6.188142776489258, "learning_rate": 2.099039788693934e-06, "loss": 0.1051, "step": 594550 }, { "epoch": 5.85, "grad_norm": 10.956939697265625, "learning_rate": 2.0989156662396856e-06, "loss": 0.049, "step": 594575 }, { "epoch": 5.85, "grad_norm": 3.905715227127075, "learning_rate": 2.098791543785437e-06, "loss": 0.0892, "step": 594600 }, { "epoch": 5.85, "grad_norm": 21.28205680847168, "learning_rate": 2.098667421331189e-06, "loss": 0.0622, "step": 594625 }, { "epoch": 5.85, "grad_norm": 5.739577293395996, "learning_rate": 2.0985432988769405e-06, "loss": 0.0859, "step": 594650 }, { "epoch": 5.85, "grad_norm": 7.910289287567139, "learning_rate": 2.0984191764226917e-06, "loss": 0.0598, "step": 594675 }, { "epoch": 5.85, "grad_norm": 2.828233003616333, "learning_rate": 2.0982950539684433e-06, "loss": 0.0814, "step": 594700 }, { "epoch": 5.85, "grad_norm": 16.85090446472168, "learning_rate": 2.098170931514195e-06, "loss": 0.042, "step": 594725 }, { "epoch": 5.85, "grad_norm": 0.767204761505127, "learning_rate": 2.098046809059946e-06, "loss": 0.0763, "step": 594750 }, { "epoch": 5.85, "grad_norm": 7.810185432434082, "learning_rate": 2.0979226866056978e-06, "loss": 0.0569, "step": 594775 }, { "epoch": 5.85, "grad_norm": 2.3573663234710693, "learning_rate": 2.0977985641514494e-06, "loss": 0.0793, "step": 594800 }, { "epoch": 5.85, "grad_norm": 14.302098274230957, "learning_rate": 2.097674441697201e-06, "loss": 0.0522, "step": 594825 }, { "epoch": 5.85, "grad_norm": 8.356751441955566, "learning_rate": 2.0975503192429527e-06, "loss": 0.0743, "step": 594850 }, { "epoch": 5.85, "grad_norm": 14.388860702514648, "learning_rate": 2.0974261967887043e-06, "loss": 0.0623, "step": 594875 }, { "epoch": 5.85, "grad_norm": 0.07328008860349655, "learning_rate": 2.0973020743344555e-06, "loss": 0.0967, "step": 594900 }, { "epoch": 5.85, "grad_norm": 9.585067749023438, "learning_rate": 2.097177951880207e-06, "loss": 0.0546, "step": 594925 }, { "epoch": 5.85, "grad_norm": 1.644584059715271, "learning_rate": 2.097053829425959e-06, "loss": 0.0754, "step": 594950 }, { "epoch": 5.85, "grad_norm": 13.740744590759277, "learning_rate": 2.09692970697171e-06, "loss": 0.048, "step": 594975 }, { "epoch": 5.85, "grad_norm": 0.8868786096572876, "learning_rate": 2.0968055845174617e-06, "loss": 0.1109, "step": 595000 }, { "epoch": 5.85, "grad_norm": 13.580565452575684, "learning_rate": 2.0966814620632133e-06, "loss": 0.0527, "step": 595025 }, { "epoch": 5.85, "grad_norm": 12.132893562316895, "learning_rate": 2.096557339608965e-06, "loss": 0.0582, "step": 595050 }, { "epoch": 5.85, "grad_norm": 11.902889251708984, "learning_rate": 2.0964332171547166e-06, "loss": 0.0528, "step": 595075 }, { "epoch": 5.85, "grad_norm": 3.8325746059417725, "learning_rate": 2.0963090947004678e-06, "loss": 0.0947, "step": 595100 }, { "epoch": 5.85, "grad_norm": 24.585308074951172, "learning_rate": 2.0961849722462194e-06, "loss": 0.0616, "step": 595125 }, { "epoch": 5.85, "grad_norm": 0.13740725815296173, "learning_rate": 2.096060849791971e-06, "loss": 0.057, "step": 595150 }, { "epoch": 5.85, "grad_norm": 32.636417388916016, "learning_rate": 2.0959367273377223e-06, "loss": 0.0658, "step": 595175 }, { "epoch": 5.85, "grad_norm": 0.024615231901407242, "learning_rate": 2.095812604883474e-06, "loss": 0.0992, "step": 595200 }, { "epoch": 5.85, "grad_norm": 6.116250038146973, "learning_rate": 2.0956884824292255e-06, "loss": 0.0352, "step": 595225 }, { "epoch": 5.85, "grad_norm": 1.6218613386154175, "learning_rate": 2.095564359974977e-06, "loss": 0.0921, "step": 595250 }, { "epoch": 5.85, "grad_norm": 15.205302238464355, "learning_rate": 2.095440237520729e-06, "loss": 0.067, "step": 595275 }, { "epoch": 5.85, "grad_norm": 2.653918504714966, "learning_rate": 2.0953161150664804e-06, "loss": 0.0869, "step": 595300 }, { "epoch": 5.85, "grad_norm": 8.905451774597168, "learning_rate": 2.0951919926122316e-06, "loss": 0.0418, "step": 595325 }, { "epoch": 5.85, "grad_norm": 1.1242974996566772, "learning_rate": 2.0950678701579833e-06, "loss": 0.0994, "step": 595350 }, { "epoch": 5.85, "grad_norm": 14.520609855651855, "learning_rate": 2.094943747703735e-06, "loss": 0.0427, "step": 595375 }, { "epoch": 5.85, "grad_norm": 6.0200114250183105, "learning_rate": 2.094819625249486e-06, "loss": 0.0727, "step": 595400 }, { "epoch": 5.85, "grad_norm": 7.927270889282227, "learning_rate": 2.0946955027952378e-06, "loss": 0.05, "step": 595425 }, { "epoch": 5.85, "grad_norm": 1.0119178295135498, "learning_rate": 2.0945713803409894e-06, "loss": 0.0547, "step": 595450 }, { "epoch": 5.85, "grad_norm": 6.926474094390869, "learning_rate": 2.094447257886741e-06, "loss": 0.0578, "step": 595475 }, { "epoch": 5.86, "grad_norm": 6.094287395477295, "learning_rate": 2.0943231354324927e-06, "loss": 0.1127, "step": 595500 }, { "epoch": 5.86, "grad_norm": 1.614125370979309, "learning_rate": 2.094199012978244e-06, "loss": 0.0469, "step": 595525 }, { "epoch": 5.86, "grad_norm": 2.1554417610168457, "learning_rate": 2.0940798554221657e-06, "loss": 0.0998, "step": 595550 }, { "epoch": 5.86, "grad_norm": 11.109305381774902, "learning_rate": 2.093955732967917e-06, "loss": 0.0341, "step": 595575 }, { "epoch": 5.86, "grad_norm": 0.18345896899700165, "learning_rate": 2.0938316105136686e-06, "loss": 0.1133, "step": 595600 }, { "epoch": 5.86, "grad_norm": 7.447051048278809, "learning_rate": 2.0937074880594202e-06, "loss": 0.0593, "step": 595625 }, { "epoch": 5.86, "grad_norm": 3.5472049713134766, "learning_rate": 2.0935833656051714e-06, "loss": 0.0686, "step": 595650 }, { "epoch": 5.86, "grad_norm": 10.670045852661133, "learning_rate": 2.093459243150923e-06, "loss": 0.0462, "step": 595675 }, { "epoch": 5.86, "grad_norm": 2.049363613128662, "learning_rate": 2.0933351206966747e-06, "loss": 0.0777, "step": 595700 }, { "epoch": 5.86, "grad_norm": 11.530455589294434, "learning_rate": 2.0932109982424263e-06, "loss": 0.0505, "step": 595725 }, { "epoch": 5.86, "grad_norm": 7.323957920074463, "learning_rate": 2.093086875788178e-06, "loss": 0.0681, "step": 595750 }, { "epoch": 5.86, "grad_norm": 12.589417457580566, "learning_rate": 2.092962753333929e-06, "loss": 0.0686, "step": 595775 }, { "epoch": 5.86, "grad_norm": 0.19660267233848572, "learning_rate": 2.092838630879681e-06, "loss": 0.0802, "step": 595800 }, { "epoch": 5.86, "grad_norm": 7.204867362976074, "learning_rate": 2.0927145084254325e-06, "loss": 0.054, "step": 595825 }, { "epoch": 5.86, "grad_norm": 0.7589742541313171, "learning_rate": 2.0925903859711837e-06, "loss": 0.0603, "step": 595850 }, { "epoch": 5.86, "grad_norm": 16.672433853149414, "learning_rate": 2.0924662635169353e-06, "loss": 0.059, "step": 595875 }, { "epoch": 5.86, "grad_norm": 1.383224368095398, "learning_rate": 2.092342141062687e-06, "loss": 0.0939, "step": 595900 }, { "epoch": 5.86, "grad_norm": 14.267056465148926, "learning_rate": 2.0922180186084386e-06, "loss": 0.0504, "step": 595925 }, { "epoch": 5.86, "grad_norm": 0.2801395058631897, "learning_rate": 2.09209389615419e-06, "loss": 0.0854, "step": 595950 }, { "epoch": 5.86, "grad_norm": 13.549911499023438, "learning_rate": 2.091969773699942e-06, "loss": 0.0498, "step": 595975 }, { "epoch": 5.86, "grad_norm": 0.28757381439208984, "learning_rate": 2.091845651245693e-06, "loss": 0.0987, "step": 596000 }, { "epoch": 5.86, "grad_norm": 10.315962791442871, "learning_rate": 2.0917215287914447e-06, "loss": 0.0369, "step": 596025 }, { "epoch": 5.86, "grad_norm": 0.26627179980278015, "learning_rate": 2.0915974063371963e-06, "loss": 0.0751, "step": 596050 }, { "epoch": 5.86, "grad_norm": 6.271578788757324, "learning_rate": 2.0914732838829475e-06, "loss": 0.0475, "step": 596075 }, { "epoch": 5.86, "grad_norm": 0.48470619320869446, "learning_rate": 2.091349161428699e-06, "loss": 0.0895, "step": 596100 }, { "epoch": 5.86, "grad_norm": 18.910160064697266, "learning_rate": 2.091225038974451e-06, "loss": 0.0689, "step": 596125 }, { "epoch": 5.86, "grad_norm": 3.961219072341919, "learning_rate": 2.0911009165202024e-06, "loss": 0.1044, "step": 596150 }, { "epoch": 5.86, "grad_norm": 14.511367797851562, "learning_rate": 2.090976794065954e-06, "loss": 0.0364, "step": 596175 }, { "epoch": 5.86, "grad_norm": 1.8693068027496338, "learning_rate": 2.0908526716117053e-06, "loss": 0.0564, "step": 596200 }, { "epoch": 5.86, "grad_norm": 13.827319145202637, "learning_rate": 2.090728549157457e-06, "loss": 0.0581, "step": 596225 }, { "epoch": 5.86, "grad_norm": 1.1507095098495483, "learning_rate": 2.0906044267032086e-06, "loss": 0.0784, "step": 596250 }, { "epoch": 5.86, "grad_norm": 16.38222885131836, "learning_rate": 2.0904803042489598e-06, "loss": 0.0555, "step": 596275 }, { "epoch": 5.86, "grad_norm": 1.7840853929519653, "learning_rate": 2.0903561817947114e-06, "loss": 0.0894, "step": 596300 }, { "epoch": 5.86, "grad_norm": 2.0129458904266357, "learning_rate": 2.090232059340463e-06, "loss": 0.0588, "step": 596325 }, { "epoch": 5.86, "grad_norm": 1.2485932111740112, "learning_rate": 2.0901079368862147e-06, "loss": 0.0592, "step": 596350 }, { "epoch": 5.86, "grad_norm": 2.2734875679016113, "learning_rate": 2.0899838144319663e-06, "loss": 0.059, "step": 596375 }, { "epoch": 5.86, "grad_norm": 1.3592876195907593, "learning_rate": 2.089859691977718e-06, "loss": 0.0979, "step": 596400 }, { "epoch": 5.86, "grad_norm": 10.648994445800781, "learning_rate": 2.089735569523469e-06, "loss": 0.0611, "step": 596425 }, { "epoch": 5.86, "grad_norm": 7.279477119445801, "learning_rate": 2.089611447069221e-06, "loss": 0.0983, "step": 596450 }, { "epoch": 5.86, "grad_norm": 3.3283426761627197, "learning_rate": 2.0894873246149724e-06, "loss": 0.0548, "step": 596475 }, { "epoch": 5.86, "grad_norm": 0.056437473744153976, "learning_rate": 2.0893632021607236e-06, "loss": 0.0643, "step": 596500 }, { "epoch": 5.87, "grad_norm": 7.7775468826293945, "learning_rate": 2.0892390797064753e-06, "loss": 0.0346, "step": 596525 }, { "epoch": 5.87, "grad_norm": 0.3294792175292969, "learning_rate": 2.089114957252227e-06, "loss": 0.0867, "step": 596550 }, { "epoch": 5.87, "grad_norm": 16.07487678527832, "learning_rate": 2.0889908347979785e-06, "loss": 0.0571, "step": 596575 }, { "epoch": 5.87, "grad_norm": 5.527571201324463, "learning_rate": 2.08886671234373e-06, "loss": 0.0881, "step": 596600 }, { "epoch": 5.87, "grad_norm": 16.991168975830078, "learning_rate": 2.0887425898894814e-06, "loss": 0.0694, "step": 596625 }, { "epoch": 5.87, "grad_norm": 1.1569336652755737, "learning_rate": 2.088618467435233e-06, "loss": 0.0893, "step": 596650 }, { "epoch": 5.87, "grad_norm": 18.456151962280273, "learning_rate": 2.0884943449809847e-06, "loss": 0.0656, "step": 596675 }, { "epoch": 5.87, "grad_norm": 6.230623245239258, "learning_rate": 2.088370222526736e-06, "loss": 0.079, "step": 596700 }, { "epoch": 5.87, "grad_norm": 4.6842827796936035, "learning_rate": 2.0882461000724875e-06, "loss": 0.0432, "step": 596725 }, { "epoch": 5.87, "grad_norm": 3.1335999965667725, "learning_rate": 2.088121977618239e-06, "loss": 0.0687, "step": 596750 }, { "epoch": 5.87, "grad_norm": 2.6810600757598877, "learning_rate": 2.0879978551639908e-06, "loss": 0.0449, "step": 596775 }, { "epoch": 5.87, "grad_norm": 11.813905715942383, "learning_rate": 2.0878737327097424e-06, "loss": 0.0742, "step": 596800 }, { "epoch": 5.87, "grad_norm": 12.995144844055176, "learning_rate": 2.087749610255494e-06, "loss": 0.0578, "step": 596825 }, { "epoch": 5.87, "grad_norm": 3.791485071182251, "learning_rate": 2.0876254878012453e-06, "loss": 0.0724, "step": 596850 }, { "epoch": 5.87, "grad_norm": 9.15416145324707, "learning_rate": 2.087501365346997e-06, "loss": 0.0369, "step": 596875 }, { "epoch": 5.87, "grad_norm": 0.00498928502202034, "learning_rate": 2.0873772428927485e-06, "loss": 0.0735, "step": 596900 }, { "epoch": 5.87, "grad_norm": 3.409245729446411, "learning_rate": 2.0872531204384997e-06, "loss": 0.0446, "step": 596925 }, { "epoch": 5.87, "grad_norm": 2.3686561584472656, "learning_rate": 2.0871289979842514e-06, "loss": 0.0755, "step": 596950 }, { "epoch": 5.87, "grad_norm": 7.428752422332764, "learning_rate": 2.087004875530003e-06, "loss": 0.0355, "step": 596975 }, { "epoch": 5.87, "grad_norm": 9.82951831817627, "learning_rate": 2.0868807530757546e-06, "loss": 0.0885, "step": 597000 }, { "epoch": 5.87, "grad_norm": 31.19507598876953, "learning_rate": 2.0867566306215063e-06, "loss": 0.0522, "step": 597025 }, { "epoch": 5.87, "grad_norm": 5.829272270202637, "learning_rate": 2.0866325081672575e-06, "loss": 0.0756, "step": 597050 }, { "epoch": 5.87, "grad_norm": 5.169315814971924, "learning_rate": 2.086508385713009e-06, "loss": 0.039, "step": 597075 }, { "epoch": 5.87, "grad_norm": 3.4289374351501465, "learning_rate": 2.0863842632587608e-06, "loss": 0.0687, "step": 597100 }, { "epoch": 5.87, "grad_norm": 10.150779724121094, "learning_rate": 2.086260140804512e-06, "loss": 0.0504, "step": 597125 }, { "epoch": 5.87, "grad_norm": 1.4422943592071533, "learning_rate": 2.0861360183502636e-06, "loss": 0.0744, "step": 597150 }, { "epoch": 5.87, "grad_norm": 4.774884223937988, "learning_rate": 2.0860118958960152e-06, "loss": 0.046, "step": 597175 }, { "epoch": 5.87, "grad_norm": 0.8998968005180359, "learning_rate": 2.085887773441767e-06, "loss": 0.0636, "step": 597200 }, { "epoch": 5.87, "grad_norm": 7.653597354888916, "learning_rate": 2.0857636509875185e-06, "loss": 0.0542, "step": 597225 }, { "epoch": 5.87, "grad_norm": 0.03676223382353783, "learning_rate": 2.08563952853327e-06, "loss": 0.0595, "step": 597250 }, { "epoch": 5.87, "grad_norm": 5.629401683807373, "learning_rate": 2.0855154060790214e-06, "loss": 0.0368, "step": 597275 }, { "epoch": 5.87, "grad_norm": 0.4802698493003845, "learning_rate": 2.085391283624773e-06, "loss": 0.0767, "step": 597300 }, { "epoch": 5.87, "grad_norm": 16.021556854248047, "learning_rate": 2.0852671611705246e-06, "loss": 0.0704, "step": 597325 }, { "epoch": 5.87, "grad_norm": 0.07924784719944, "learning_rate": 2.085143038716276e-06, "loss": 0.0744, "step": 597350 }, { "epoch": 5.87, "grad_norm": 10.766084671020508, "learning_rate": 2.0850189162620275e-06, "loss": 0.0374, "step": 597375 }, { "epoch": 5.87, "grad_norm": 0.34701064229011536, "learning_rate": 2.084894793807779e-06, "loss": 0.0783, "step": 597400 }, { "epoch": 5.87, "grad_norm": 12.389543533325195, "learning_rate": 2.0847706713535307e-06, "loss": 0.05, "step": 597425 }, { "epoch": 5.87, "grad_norm": 3.1412718296051025, "learning_rate": 2.0846465488992824e-06, "loss": 0.0593, "step": 597450 }, { "epoch": 5.87, "grad_norm": 14.49488353729248, "learning_rate": 2.0845224264450336e-06, "loss": 0.0574, "step": 597475 }, { "epoch": 5.87, "grad_norm": 5.776379108428955, "learning_rate": 2.0843983039907852e-06, "loss": 0.1069, "step": 597500 }, { "epoch": 5.87, "grad_norm": 18.60283851623535, "learning_rate": 2.084274181536537e-06, "loss": 0.0467, "step": 597525 }, { "epoch": 5.88, "grad_norm": 4.5537800788879395, "learning_rate": 2.0841500590822885e-06, "loss": 0.0847, "step": 597550 }, { "epoch": 5.88, "grad_norm": 11.423747062683105, "learning_rate": 2.08402593662804e-06, "loss": 0.0713, "step": 597575 }, { "epoch": 5.88, "grad_norm": 0.6117755174636841, "learning_rate": 2.0839067790719616e-06, "loss": 0.1027, "step": 597600 }, { "epoch": 5.88, "grad_norm": 3.0468716621398926, "learning_rate": 2.0837826566177132e-06, "loss": 0.0597, "step": 597625 }, { "epoch": 5.88, "grad_norm": 4.445363521575928, "learning_rate": 2.0836585341634644e-06, "loss": 0.0848, "step": 597650 }, { "epoch": 5.88, "grad_norm": 8.911333084106445, "learning_rate": 2.083534411709216e-06, "loss": 0.0508, "step": 597675 }, { "epoch": 5.88, "grad_norm": 3.3993866443634033, "learning_rate": 2.0834102892549677e-06, "loss": 0.093, "step": 597700 }, { "epoch": 5.88, "grad_norm": 6.5168914794921875, "learning_rate": 2.083286166800719e-06, "loss": 0.0555, "step": 597725 }, { "epoch": 5.88, "grad_norm": 5.902428150177002, "learning_rate": 2.0831620443464705e-06, "loss": 0.0779, "step": 597750 }, { "epoch": 5.88, "grad_norm": 25.4499568939209, "learning_rate": 2.083037921892222e-06, "loss": 0.0697, "step": 597775 }, { "epoch": 5.88, "grad_norm": 6.991116046905518, "learning_rate": 2.082913799437974e-06, "loss": 0.0864, "step": 597800 }, { "epoch": 5.88, "grad_norm": 9.11974811553955, "learning_rate": 2.0827896769837255e-06, "loss": 0.0576, "step": 597825 }, { "epoch": 5.88, "grad_norm": 0.05227147415280342, "learning_rate": 2.082665554529477e-06, "loss": 0.0596, "step": 597850 }, { "epoch": 5.88, "grad_norm": 12.499964714050293, "learning_rate": 2.0825414320752283e-06, "loss": 0.0486, "step": 597875 }, { "epoch": 5.88, "grad_norm": 3.8610355854034424, "learning_rate": 2.08241730962098e-06, "loss": 0.0635, "step": 597900 }, { "epoch": 5.88, "grad_norm": 10.26768684387207, "learning_rate": 2.0822931871667316e-06, "loss": 0.0478, "step": 597925 }, { "epoch": 5.88, "grad_norm": 7.092245101928711, "learning_rate": 2.0821690647124828e-06, "loss": 0.0673, "step": 597950 }, { "epoch": 5.88, "grad_norm": 9.076690673828125, "learning_rate": 2.0820449422582344e-06, "loss": 0.0532, "step": 597975 }, { "epoch": 5.88, "grad_norm": 2.4004416465759277, "learning_rate": 2.081920819803986e-06, "loss": 0.0553, "step": 598000 }, { "epoch": 5.88, "grad_norm": 6.145872592926025, "learning_rate": 2.0817966973497377e-06, "loss": 0.0779, "step": 598025 }, { "epoch": 5.88, "grad_norm": 4.249569892883301, "learning_rate": 2.0816725748954893e-06, "loss": 0.0802, "step": 598050 }, { "epoch": 5.88, "grad_norm": 16.273794174194336, "learning_rate": 2.0815484524412405e-06, "loss": 0.0514, "step": 598075 }, { "epoch": 5.88, "grad_norm": 1.7921310663223267, "learning_rate": 2.081424329986992e-06, "loss": 0.0891, "step": 598100 }, { "epoch": 5.88, "grad_norm": 8.416499137878418, "learning_rate": 2.081300207532744e-06, "loss": 0.0546, "step": 598125 }, { "epoch": 5.88, "grad_norm": 4.175732612609863, "learning_rate": 2.081176085078495e-06, "loss": 0.081, "step": 598150 }, { "epoch": 5.88, "grad_norm": 9.11420726776123, "learning_rate": 2.0810519626242466e-06, "loss": 0.0566, "step": 598175 }, { "epoch": 5.88, "grad_norm": 0.3922713100910187, "learning_rate": 2.0809278401699983e-06, "loss": 0.0861, "step": 598200 }, { "epoch": 5.88, "grad_norm": 9.165748596191406, "learning_rate": 2.08080371771575e-06, "loss": 0.0445, "step": 598225 }, { "epoch": 5.88, "grad_norm": 0.13850943744182587, "learning_rate": 2.0806795952615016e-06, "loss": 0.1025, "step": 598250 }, { "epoch": 5.88, "grad_norm": 17.679248809814453, "learning_rate": 2.080555472807253e-06, "loss": 0.06, "step": 598275 }, { "epoch": 5.88, "grad_norm": 0.48321184515953064, "learning_rate": 2.0804313503530044e-06, "loss": 0.1054, "step": 598300 }, { "epoch": 5.88, "grad_norm": 7.756409168243408, "learning_rate": 2.080307227898756e-06, "loss": 0.0491, "step": 598325 }, { "epoch": 5.88, "grad_norm": 3.4490597248077393, "learning_rate": 2.0801831054445077e-06, "loss": 0.1065, "step": 598350 }, { "epoch": 5.88, "grad_norm": 7.0222859382629395, "learning_rate": 2.080058982990259e-06, "loss": 0.0501, "step": 598375 }, { "epoch": 5.88, "grad_norm": 0.6837593913078308, "learning_rate": 2.0799348605360105e-06, "loss": 0.0706, "step": 598400 }, { "epoch": 5.88, "grad_norm": 16.079370498657227, "learning_rate": 2.079810738081762e-06, "loss": 0.059, "step": 598425 }, { "epoch": 5.88, "grad_norm": 2.890300989151001, "learning_rate": 2.0796866156275138e-06, "loss": 0.1091, "step": 598450 }, { "epoch": 5.88, "grad_norm": 9.991893768310547, "learning_rate": 2.0795624931732654e-06, "loss": 0.066, "step": 598475 }, { "epoch": 5.88, "grad_norm": 1.849948763847351, "learning_rate": 2.0794383707190166e-06, "loss": 0.0691, "step": 598500 }, { "epoch": 5.88, "grad_norm": 0.6290813684463501, "learning_rate": 2.0793142482647683e-06, "loss": 0.041, "step": 598525 }, { "epoch": 5.89, "grad_norm": 4.872063159942627, "learning_rate": 2.07919012581052e-06, "loss": 0.0804, "step": 598550 }, { "epoch": 5.89, "grad_norm": 4.1991071701049805, "learning_rate": 2.079066003356271e-06, "loss": 0.0613, "step": 598575 }, { "epoch": 5.89, "grad_norm": 0.261209636926651, "learning_rate": 2.0789418809020227e-06, "loss": 0.0925, "step": 598600 }, { "epoch": 5.89, "grad_norm": 9.414607048034668, "learning_rate": 2.0788177584477744e-06, "loss": 0.0484, "step": 598625 }, { "epoch": 5.89, "grad_norm": 0.44296401739120483, "learning_rate": 2.078693635993526e-06, "loss": 0.0883, "step": 598650 }, { "epoch": 5.89, "grad_norm": 5.338807582855225, "learning_rate": 2.0785695135392777e-06, "loss": 0.0523, "step": 598675 }, { "epoch": 5.89, "grad_norm": 5.965001583099365, "learning_rate": 2.0784453910850293e-06, "loss": 0.101, "step": 598700 }, { "epoch": 5.89, "grad_norm": 9.87581729888916, "learning_rate": 2.0783212686307805e-06, "loss": 0.0494, "step": 598725 }, { "epoch": 5.89, "grad_norm": 6.34636116027832, "learning_rate": 2.078197146176532e-06, "loss": 0.0933, "step": 598750 }, { "epoch": 5.89, "grad_norm": 13.234004974365234, "learning_rate": 2.0780730237222838e-06, "loss": 0.0544, "step": 598775 }, { "epoch": 5.89, "grad_norm": 0.838882327079773, "learning_rate": 2.077948901268035e-06, "loss": 0.0688, "step": 598800 }, { "epoch": 5.89, "grad_norm": 15.274360656738281, "learning_rate": 2.0778247788137866e-06, "loss": 0.056, "step": 598825 }, { "epoch": 5.89, "grad_norm": 4.379507541656494, "learning_rate": 2.0777006563595382e-06, "loss": 0.0834, "step": 598850 }, { "epoch": 5.89, "grad_norm": 19.8227481842041, "learning_rate": 2.07757653390529e-06, "loss": 0.0629, "step": 598875 }, { "epoch": 5.89, "grad_norm": 3.5915379524230957, "learning_rate": 2.0774524114510415e-06, "loss": 0.102, "step": 598900 }, { "epoch": 5.89, "grad_norm": 25.16304588317871, "learning_rate": 2.0773282889967927e-06, "loss": 0.064, "step": 598925 }, { "epoch": 5.89, "grad_norm": 2.0781095027923584, "learning_rate": 2.0772041665425444e-06, "loss": 0.0969, "step": 598950 }, { "epoch": 5.89, "grad_norm": 6.608835697174072, "learning_rate": 2.077080044088296e-06, "loss": 0.044, "step": 598975 }, { "epoch": 5.89, "grad_norm": 0.14631856977939606, "learning_rate": 2.0769559216340472e-06, "loss": 0.0816, "step": 599000 }, { "epoch": 5.89, "grad_norm": 13.65629768371582, "learning_rate": 2.076831799179799e-06, "loss": 0.0483, "step": 599025 }, { "epoch": 5.89, "grad_norm": 0.37066009640693665, "learning_rate": 2.0767076767255505e-06, "loss": 0.0894, "step": 599050 }, { "epoch": 5.89, "grad_norm": 11.65739631652832, "learning_rate": 2.076583554271302e-06, "loss": 0.0321, "step": 599075 }, { "epoch": 5.89, "grad_norm": 5.616399765014648, "learning_rate": 2.0764594318170538e-06, "loss": 0.0821, "step": 599100 }, { "epoch": 5.89, "grad_norm": 9.787896156311035, "learning_rate": 2.0763353093628054e-06, "loss": 0.0372, "step": 599125 }, { "epoch": 5.89, "grad_norm": 6.5660223960876465, "learning_rate": 2.0762111869085566e-06, "loss": 0.0787, "step": 599150 }, { "epoch": 5.89, "grad_norm": 10.183918952941895, "learning_rate": 2.0760870644543082e-06, "loss": 0.0566, "step": 599175 }, { "epoch": 5.89, "grad_norm": 0.6436402797698975, "learning_rate": 2.07596294200006e-06, "loss": 0.0715, "step": 599200 }, { "epoch": 5.89, "grad_norm": 10.790522575378418, "learning_rate": 2.075838819545811e-06, "loss": 0.0438, "step": 599225 }, { "epoch": 5.89, "grad_norm": 0.05668405070900917, "learning_rate": 2.0757146970915627e-06, "loss": 0.0929, "step": 599250 }, { "epoch": 5.89, "grad_norm": 19.191572189331055, "learning_rate": 2.0755905746373143e-06, "loss": 0.0449, "step": 599275 }, { "epoch": 5.89, "grad_norm": 5.300939559936523, "learning_rate": 2.075466452183066e-06, "loss": 0.0726, "step": 599300 }, { "epoch": 5.89, "grad_norm": 2.8385088443756104, "learning_rate": 2.0753423297288176e-06, "loss": 0.0451, "step": 599325 }, { "epoch": 5.89, "grad_norm": 11.816255569458008, "learning_rate": 2.075218207274569e-06, "loss": 0.0854, "step": 599350 }, { "epoch": 5.89, "grad_norm": 5.8545966148376465, "learning_rate": 2.0750940848203205e-06, "loss": 0.0412, "step": 599375 }, { "epoch": 5.89, "grad_norm": 8.36596965789795, "learning_rate": 2.074969962366072e-06, "loss": 0.1095, "step": 599400 }, { "epoch": 5.89, "grad_norm": 7.605998992919922, "learning_rate": 2.0748458399118233e-06, "loss": 0.0465, "step": 599425 }, { "epoch": 5.89, "grad_norm": 0.27608028054237366, "learning_rate": 2.074721717457575e-06, "loss": 0.0697, "step": 599450 }, { "epoch": 5.89, "grad_norm": 5.83840799331665, "learning_rate": 2.0745975950033266e-06, "loss": 0.0602, "step": 599475 }, { "epoch": 5.89, "grad_norm": 0.2584044635295868, "learning_rate": 2.0744734725490782e-06, "loss": 0.0788, "step": 599500 }, { "epoch": 5.89, "grad_norm": 10.081137657165527, "learning_rate": 2.07434935009483e-06, "loss": 0.0575, "step": 599525 }, { "epoch": 5.89, "grad_norm": 7.225454807281494, "learning_rate": 2.0742252276405815e-06, "loss": 0.0753, "step": 599550 }, { "epoch": 5.9, "grad_norm": 4.592650890350342, "learning_rate": 2.0741011051863327e-06, "loss": 0.0699, "step": 599575 }, { "epoch": 5.9, "grad_norm": 1.514946699142456, "learning_rate": 2.0739769827320843e-06, "loss": 0.0752, "step": 599600 }, { "epoch": 5.9, "grad_norm": 7.139976501464844, "learning_rate": 2.073852860277836e-06, "loss": 0.0571, "step": 599625 }, { "epoch": 5.9, "grad_norm": 0.1848382204771042, "learning_rate": 2.073728737823587e-06, "loss": 0.0723, "step": 599650 }, { "epoch": 5.9, "grad_norm": 6.8436102867126465, "learning_rate": 2.073604615369339e-06, "loss": 0.076, "step": 599675 }, { "epoch": 5.9, "grad_norm": 5.4181013107299805, "learning_rate": 2.0734804929150905e-06, "loss": 0.0862, "step": 599700 }, { "epoch": 5.9, "grad_norm": 10.079463958740234, "learning_rate": 2.073356370460842e-06, "loss": 0.0632, "step": 599725 }, { "epoch": 5.9, "grad_norm": 2.2479794025421143, "learning_rate": 2.0732322480065937e-06, "loss": 0.0778, "step": 599750 }, { "epoch": 5.9, "grad_norm": 6.491302013397217, "learning_rate": 2.073108125552345e-06, "loss": 0.0542, "step": 599775 }, { "epoch": 5.9, "grad_norm": 4.193663597106934, "learning_rate": 2.0729840030980966e-06, "loss": 0.0717, "step": 599800 }, { "epoch": 5.9, "grad_norm": 17.816186904907227, "learning_rate": 2.072859880643848e-06, "loss": 0.0569, "step": 599825 }, { "epoch": 5.9, "grad_norm": 0.7438623309135437, "learning_rate": 2.0727357581895994e-06, "loss": 0.0928, "step": 599850 }, { "epoch": 5.9, "grad_norm": 9.204055786132812, "learning_rate": 2.072611635735351e-06, "loss": 0.0422, "step": 599875 }, { "epoch": 5.9, "grad_norm": 0.5268508791923523, "learning_rate": 2.0724875132811027e-06, "loss": 0.0902, "step": 599900 }, { "epoch": 5.9, "grad_norm": 10.16236686706543, "learning_rate": 2.0723633908268543e-06, "loss": 0.0589, "step": 599925 }, { "epoch": 5.9, "grad_norm": 4.840709686279297, "learning_rate": 2.072239268372606e-06, "loss": 0.0915, "step": 599950 }, { "epoch": 5.9, "grad_norm": 9.486553192138672, "learning_rate": 2.0721151459183576e-06, "loss": 0.0792, "step": 599975 }, { "epoch": 5.9, "grad_norm": 4.131954669952393, "learning_rate": 2.071991023464109e-06, "loss": 0.0794, "step": 600000 }, { "epoch": 5.9, "eval_loss": 0.7566215395927429, "eval_runtime": 6108.6262, "eval_samples_per_second": 1.55, "eval_steps_per_second": 0.194, "eval_wer": 0.11343591064185926, "step": 600000 }, { "epoch": 5.9, "grad_norm": 8.490426063537598, "learning_rate": 2.0718669010098604e-06, "loss": 0.0484, "step": 600025 }, { "epoch": 5.9, "grad_norm": 0.19908297061920166, "learning_rate": 2.071742778555612e-06, "loss": 0.0952, "step": 600050 }, { "epoch": 5.9, "grad_norm": 0.512880802154541, "learning_rate": 2.0716186561013633e-06, "loss": 0.058, "step": 600075 }, { "epoch": 5.9, "grad_norm": 4.032925605773926, "learning_rate": 2.0714994985452847e-06, "loss": 0.0923, "step": 600100 }, { "epoch": 5.9, "grad_norm": 10.87142276763916, "learning_rate": 2.0713753760910364e-06, "loss": 0.0751, "step": 600125 }, { "epoch": 5.9, "grad_norm": 4.025012969970703, "learning_rate": 2.071251253636788e-06, "loss": 0.077, "step": 600150 }, { "epoch": 5.9, "grad_norm": 8.868512153625488, "learning_rate": 2.0711271311825396e-06, "loss": 0.0573, "step": 600175 }, { "epoch": 5.9, "grad_norm": 3.3717286586761475, "learning_rate": 2.0710030087282913e-06, "loss": 0.0718, "step": 600200 }, { "epoch": 5.9, "grad_norm": 10.200309753417969, "learning_rate": 2.070878886274043e-06, "loss": 0.0513, "step": 600225 }, { "epoch": 5.9, "grad_norm": 0.1737966537475586, "learning_rate": 2.070754763819794e-06, "loss": 0.1008, "step": 600250 }, { "epoch": 5.9, "grad_norm": 13.27181339263916, "learning_rate": 2.0706306413655458e-06, "loss": 0.059, "step": 600275 }, { "epoch": 5.9, "grad_norm": 0.10228278487920761, "learning_rate": 2.070506518911297e-06, "loss": 0.0901, "step": 600300 }, { "epoch": 5.9, "grad_norm": 7.764076232910156, "learning_rate": 2.0703823964570486e-06, "loss": 0.0367, "step": 600325 }, { "epoch": 5.9, "grad_norm": 5.145415782928467, "learning_rate": 2.0702582740028002e-06, "loss": 0.0863, "step": 600350 }, { "epoch": 5.9, "grad_norm": 5.762113094329834, "learning_rate": 2.070134151548552e-06, "loss": 0.04, "step": 600375 }, { "epoch": 5.9, "grad_norm": 6.802370548248291, "learning_rate": 2.0700100290943035e-06, "loss": 0.0875, "step": 600400 }, { "epoch": 5.9, "grad_norm": 11.483717918395996, "learning_rate": 2.069885906640055e-06, "loss": 0.041, "step": 600425 }, { "epoch": 5.9, "grad_norm": 2.5701146125793457, "learning_rate": 2.0697617841858064e-06, "loss": 0.0746, "step": 600450 }, { "epoch": 5.9, "grad_norm": 22.849822998046875, "learning_rate": 2.069637661731558e-06, "loss": 0.0721, "step": 600475 }, { "epoch": 5.9, "grad_norm": 3.0274221897125244, "learning_rate": 2.0695135392773096e-06, "loss": 0.0836, "step": 600500 }, { "epoch": 5.9, "grad_norm": 8.77849006652832, "learning_rate": 2.0693894168230613e-06, "loss": 0.0688, "step": 600525 }, { "epoch": 5.9, "grad_norm": 15.779775619506836, "learning_rate": 2.069265294368813e-06, "loss": 0.0806, "step": 600550 }, { "epoch": 5.9, "grad_norm": 11.393092155456543, "learning_rate": 2.0691411719145645e-06, "loss": 0.0568, "step": 600575 }, { "epoch": 5.91, "grad_norm": 0.20123301446437836, "learning_rate": 2.0690170494603157e-06, "loss": 0.0749, "step": 600600 }, { "epoch": 5.91, "grad_norm": 11.66538143157959, "learning_rate": 2.0688929270060674e-06, "loss": 0.0458, "step": 600625 }, { "epoch": 5.91, "grad_norm": 3.3570737838745117, "learning_rate": 2.068768804551819e-06, "loss": 0.0873, "step": 600650 }, { "epoch": 5.91, "grad_norm": 15.389534950256348, "learning_rate": 2.0686446820975702e-06, "loss": 0.051, "step": 600675 }, { "epoch": 5.91, "grad_norm": 1.4310745000839233, "learning_rate": 2.068520559643322e-06, "loss": 0.0815, "step": 600700 }, { "epoch": 5.91, "grad_norm": 4.70588493347168, "learning_rate": 2.0683964371890735e-06, "loss": 0.0573, "step": 600725 }, { "epoch": 5.91, "grad_norm": 0.041684288531541824, "learning_rate": 2.068272314734825e-06, "loss": 0.0668, "step": 600750 }, { "epoch": 5.91, "grad_norm": 6.266854286193848, "learning_rate": 2.0681481922805768e-06, "loss": 0.0427, "step": 600775 }, { "epoch": 5.91, "grad_norm": 6.401613235473633, "learning_rate": 2.068024069826328e-06, "loss": 0.0815, "step": 600800 }, { "epoch": 5.91, "grad_norm": 12.200325965881348, "learning_rate": 2.0678999473720796e-06, "loss": 0.0621, "step": 600825 }, { "epoch": 5.91, "grad_norm": 7.465326309204102, "learning_rate": 2.0677758249178312e-06, "loss": 0.1008, "step": 600850 }, { "epoch": 5.91, "grad_norm": 6.621246337890625, "learning_rate": 2.0676517024635825e-06, "loss": 0.0327, "step": 600875 }, { "epoch": 5.91, "grad_norm": 7.045843124389648, "learning_rate": 2.067527580009334e-06, "loss": 0.1071, "step": 600900 }, { "epoch": 5.91, "grad_norm": 4.102765083312988, "learning_rate": 2.0674034575550857e-06, "loss": 0.0556, "step": 600925 }, { "epoch": 5.91, "grad_norm": 4.548943042755127, "learning_rate": 2.0672793351008374e-06, "loss": 0.0804, "step": 600950 }, { "epoch": 5.91, "grad_norm": 8.774331092834473, "learning_rate": 2.067155212646589e-06, "loss": 0.0365, "step": 600975 }, { "epoch": 5.91, "grad_norm": 4.571117877960205, "learning_rate": 2.0670310901923406e-06, "loss": 0.1067, "step": 601000 }, { "epoch": 5.91, "grad_norm": 16.911497116088867, "learning_rate": 2.066906967738092e-06, "loss": 0.058, "step": 601025 }, { "epoch": 5.91, "grad_norm": 0.821694016456604, "learning_rate": 2.0667828452838435e-06, "loss": 0.0669, "step": 601050 }, { "epoch": 5.91, "grad_norm": 12.08342170715332, "learning_rate": 2.066658722829595e-06, "loss": 0.0467, "step": 601075 }, { "epoch": 5.91, "grad_norm": 0.9624191522598267, "learning_rate": 2.0665346003753463e-06, "loss": 0.0707, "step": 601100 }, { "epoch": 5.91, "grad_norm": 15.619766235351562, "learning_rate": 2.066410477921098e-06, "loss": 0.0608, "step": 601125 }, { "epoch": 5.91, "grad_norm": 5.540173053741455, "learning_rate": 2.0662863554668496e-06, "loss": 0.0639, "step": 601150 }, { "epoch": 5.91, "grad_norm": 7.5626606941223145, "learning_rate": 2.0661622330126012e-06, "loss": 0.0448, "step": 601175 }, { "epoch": 5.91, "grad_norm": 5.41936731338501, "learning_rate": 2.066038110558353e-06, "loss": 0.0811, "step": 601200 }, { "epoch": 5.91, "grad_norm": 11.61925220489502, "learning_rate": 2.065913988104104e-06, "loss": 0.0548, "step": 601225 }, { "epoch": 5.91, "grad_norm": 2.5365583896636963, "learning_rate": 2.0657898656498557e-06, "loss": 0.1005, "step": 601250 }, { "epoch": 5.91, "grad_norm": 10.359390258789062, "learning_rate": 2.0656657431956073e-06, "loss": 0.0618, "step": 601275 }, { "epoch": 5.91, "grad_norm": 1.7052490711212158, "learning_rate": 2.0655416207413586e-06, "loss": 0.0978, "step": 601300 }, { "epoch": 5.91, "grad_norm": 6.727884769439697, "learning_rate": 2.06541749828711e-06, "loss": 0.045, "step": 601325 }, { "epoch": 5.91, "grad_norm": 1.3286691904067993, "learning_rate": 2.065293375832862e-06, "loss": 0.0795, "step": 601350 }, { "epoch": 5.91, "grad_norm": 8.300765991210938, "learning_rate": 2.0651692533786135e-06, "loss": 0.0465, "step": 601375 }, { "epoch": 5.91, "grad_norm": 1.5423654317855835, "learning_rate": 2.065045130924365e-06, "loss": 0.0917, "step": 601400 }, { "epoch": 5.91, "grad_norm": 18.10274887084961, "learning_rate": 2.0649210084701167e-06, "loss": 0.0367, "step": 601425 }, { "epoch": 5.91, "grad_norm": 15.323110580444336, "learning_rate": 2.064796886015868e-06, "loss": 0.1104, "step": 601450 }, { "epoch": 5.91, "grad_norm": 11.55394172668457, "learning_rate": 2.0646727635616196e-06, "loss": 0.0554, "step": 601475 }, { "epoch": 5.91, "grad_norm": 7.12364387512207, "learning_rate": 2.064548641107371e-06, "loss": 0.0893, "step": 601500 }, { "epoch": 5.91, "grad_norm": 15.307823181152344, "learning_rate": 2.0644245186531224e-06, "loss": 0.0719, "step": 601525 }, { "epoch": 5.91, "grad_norm": 0.10005129873752594, "learning_rate": 2.064300396198874e-06, "loss": 0.0915, "step": 601550 }, { "epoch": 5.91, "grad_norm": 7.5609917640686035, "learning_rate": 2.0641762737446257e-06, "loss": 0.0491, "step": 601575 }, { "epoch": 5.92, "grad_norm": 10.5794038772583, "learning_rate": 2.0640521512903773e-06, "loss": 0.0999, "step": 601600 }, { "epoch": 5.92, "grad_norm": 19.87599754333496, "learning_rate": 2.063928028836129e-06, "loss": 0.0421, "step": 601625 }, { "epoch": 5.92, "grad_norm": 2.3358027935028076, "learning_rate": 2.06380390638188e-06, "loss": 0.0841, "step": 601650 }, { "epoch": 5.92, "grad_norm": 3.214634418487549, "learning_rate": 2.063679783927632e-06, "loss": 0.0414, "step": 601675 }, { "epoch": 5.92, "grad_norm": 0.24971061944961548, "learning_rate": 2.0635556614733834e-06, "loss": 0.1138, "step": 601700 }, { "epoch": 5.92, "grad_norm": 1.3156304359436035, "learning_rate": 2.0634315390191347e-06, "loss": 0.0477, "step": 601725 }, { "epoch": 5.92, "grad_norm": 2.039884090423584, "learning_rate": 2.0633074165648863e-06, "loss": 0.1125, "step": 601750 }, { "epoch": 5.92, "grad_norm": 17.8240966796875, "learning_rate": 2.063183294110638e-06, "loss": 0.0586, "step": 601775 }, { "epoch": 5.92, "grad_norm": 3.4099578857421875, "learning_rate": 2.0630591716563896e-06, "loss": 0.0857, "step": 601800 }, { "epoch": 5.92, "grad_norm": 8.764625549316406, "learning_rate": 2.062935049202141e-06, "loss": 0.0662, "step": 601825 }, { "epoch": 5.92, "grad_norm": 4.796339988708496, "learning_rate": 2.062810926747893e-06, "loss": 0.0899, "step": 601850 }, { "epoch": 5.92, "grad_norm": 8.67066478729248, "learning_rate": 2.062686804293644e-06, "loss": 0.0644, "step": 601875 }, { "epoch": 5.92, "grad_norm": 6.123068809509277, "learning_rate": 2.0625626818393957e-06, "loss": 0.088, "step": 601900 }, { "epoch": 5.92, "grad_norm": 11.125951766967773, "learning_rate": 2.0624385593851473e-06, "loss": 0.0561, "step": 601925 }, { "epoch": 5.92, "grad_norm": 9.905641555786133, "learning_rate": 2.0623144369308985e-06, "loss": 0.0683, "step": 601950 }, { "epoch": 5.92, "grad_norm": 3.6980440616607666, "learning_rate": 2.06219031447665e-06, "loss": 0.0423, "step": 601975 }, { "epoch": 5.92, "grad_norm": 2.4920856952667236, "learning_rate": 2.062066192022402e-06, "loss": 0.0767, "step": 602000 }, { "epoch": 5.92, "grad_norm": 2.587484121322632, "learning_rate": 2.0619420695681534e-06, "loss": 0.0476, "step": 602025 }, { "epoch": 5.92, "grad_norm": 0.7932862043380737, "learning_rate": 2.061817947113905e-06, "loss": 0.0797, "step": 602050 }, { "epoch": 5.92, "grad_norm": 6.659818172454834, "learning_rate": 2.0616938246596563e-06, "loss": 0.036, "step": 602075 }, { "epoch": 5.92, "grad_norm": 2.938850164413452, "learning_rate": 2.061569702205408e-06, "loss": 0.0902, "step": 602100 }, { "epoch": 5.92, "grad_norm": 10.005578994750977, "learning_rate": 2.0614455797511595e-06, "loss": 0.0587, "step": 602125 }, { "epoch": 5.92, "grad_norm": 8.864160537719727, "learning_rate": 2.0613214572969108e-06, "loss": 0.1173, "step": 602150 }, { "epoch": 5.92, "grad_norm": 7.859665870666504, "learning_rate": 2.0611973348426624e-06, "loss": 0.0477, "step": 602175 }, { "epoch": 5.92, "grad_norm": 0.1206708773970604, "learning_rate": 2.061073212388414e-06, "loss": 0.0711, "step": 602200 }, { "epoch": 5.92, "grad_norm": 13.961910247802734, "learning_rate": 2.0609490899341657e-06, "loss": 0.0594, "step": 602225 }, { "epoch": 5.92, "grad_norm": 0.24794386327266693, "learning_rate": 2.0608249674799173e-06, "loss": 0.0659, "step": 602250 }, { "epoch": 5.92, "grad_norm": 13.123865127563477, "learning_rate": 2.060700845025669e-06, "loss": 0.036, "step": 602275 }, { "epoch": 5.92, "grad_norm": 0.006298006046563387, "learning_rate": 2.06057672257142e-06, "loss": 0.0579, "step": 602300 }, { "epoch": 5.92, "grad_norm": 9.883838653564453, "learning_rate": 2.0604526001171718e-06, "loss": 0.0585, "step": 602325 }, { "epoch": 5.92, "grad_norm": 4.294837951660156, "learning_rate": 2.0603284776629234e-06, "loss": 0.0887, "step": 602350 }, { "epoch": 5.92, "grad_norm": 8.939294815063477, "learning_rate": 2.0602043552086746e-06, "loss": 0.0527, "step": 602375 }, { "epoch": 5.92, "grad_norm": 0.16826272010803223, "learning_rate": 2.0600802327544263e-06, "loss": 0.0865, "step": 602400 }, { "epoch": 5.92, "grad_norm": 10.486604690551758, "learning_rate": 2.059956110300178e-06, "loss": 0.0483, "step": 602425 }, { "epoch": 5.92, "grad_norm": 2.9843971729278564, "learning_rate": 2.0598319878459295e-06, "loss": 0.0852, "step": 602450 }, { "epoch": 5.92, "grad_norm": 2.719597101211548, "learning_rate": 2.059707865391681e-06, "loss": 0.0442, "step": 602475 }, { "epoch": 5.92, "grad_norm": 4.250190734863281, "learning_rate": 2.0595837429374324e-06, "loss": 0.0824, "step": 602500 }, { "epoch": 5.92, "grad_norm": 13.41266918182373, "learning_rate": 2.0594645853813542e-06, "loss": 0.0565, "step": 602525 }, { "epoch": 5.92, "grad_norm": 1.304668664932251, "learning_rate": 2.0593404629271055e-06, "loss": 0.063, "step": 602550 }, { "epoch": 5.92, "grad_norm": 2.406409502029419, "learning_rate": 2.059216340472857e-06, "loss": 0.0359, "step": 602575 }, { "epoch": 5.92, "grad_norm": 6.25261926651001, "learning_rate": 2.0590922180186083e-06, "loss": 0.0845, "step": 602600 }, { "epoch": 5.93, "grad_norm": 11.636272430419922, "learning_rate": 2.05896809556436e-06, "loss": 0.0538, "step": 602625 }, { "epoch": 5.93, "grad_norm": 0.12999731302261353, "learning_rate": 2.0588439731101116e-06, "loss": 0.0916, "step": 602650 }, { "epoch": 5.93, "grad_norm": 5.242142200469971, "learning_rate": 2.058719850655863e-06, "loss": 0.0578, "step": 602675 }, { "epoch": 5.93, "grad_norm": 0.13352550566196442, "learning_rate": 2.058595728201615e-06, "loss": 0.0742, "step": 602700 }, { "epoch": 5.93, "grad_norm": 3.67126727104187, "learning_rate": 2.0584716057473665e-06, "loss": 0.0531, "step": 602725 }, { "epoch": 5.93, "grad_norm": 2.399291753768921, "learning_rate": 2.0583474832931177e-06, "loss": 0.1152, "step": 602750 }, { "epoch": 5.93, "grad_norm": 4.38016414642334, "learning_rate": 2.0582233608388693e-06, "loss": 0.0353, "step": 602775 }, { "epoch": 5.93, "grad_norm": 0.05888175219297409, "learning_rate": 2.058099238384621e-06, "loss": 0.0606, "step": 602800 }, { "epoch": 5.93, "grad_norm": 7.114405155181885, "learning_rate": 2.057975115930372e-06, "loss": 0.0319, "step": 602825 }, { "epoch": 5.93, "grad_norm": 0.045995619148015976, "learning_rate": 2.057850993476124e-06, "loss": 0.091, "step": 602850 }, { "epoch": 5.93, "grad_norm": 3.481297492980957, "learning_rate": 2.0577268710218754e-06, "loss": 0.0472, "step": 602875 }, { "epoch": 5.93, "grad_norm": 3.606862783432007, "learning_rate": 2.057602748567627e-06, "loss": 0.0642, "step": 602900 }, { "epoch": 5.93, "grad_norm": 10.329002380371094, "learning_rate": 2.0574786261133787e-06, "loss": 0.0568, "step": 602925 }, { "epoch": 5.93, "grad_norm": 1.293498158454895, "learning_rate": 2.0573545036591303e-06, "loss": 0.0759, "step": 602950 }, { "epoch": 5.93, "grad_norm": 11.70096206665039, "learning_rate": 2.0572303812048816e-06, "loss": 0.04, "step": 602975 }, { "epoch": 5.93, "grad_norm": 0.05520385131239891, "learning_rate": 2.057106258750633e-06, "loss": 0.0987, "step": 603000 }, { "epoch": 5.93, "grad_norm": 32.87519073486328, "learning_rate": 2.0569821362963844e-06, "loss": 0.067, "step": 603025 }, { "epoch": 5.93, "grad_norm": 0.5608901977539062, "learning_rate": 2.056858013842136e-06, "loss": 0.0862, "step": 603050 }, { "epoch": 5.93, "grad_norm": 8.00322151184082, "learning_rate": 2.0567338913878877e-06, "loss": 0.0507, "step": 603075 }, { "epoch": 5.93, "grad_norm": 0.03867639973759651, "learning_rate": 2.0566097689336393e-06, "loss": 0.0842, "step": 603100 }, { "epoch": 5.93, "grad_norm": 5.733193874359131, "learning_rate": 2.056485646479391e-06, "loss": 0.0346, "step": 603125 }, { "epoch": 5.93, "grad_norm": 3.261915445327759, "learning_rate": 2.0563615240251426e-06, "loss": 0.0643, "step": 603150 }, { "epoch": 5.93, "grad_norm": 5.846291542053223, "learning_rate": 2.056237401570894e-06, "loss": 0.0443, "step": 603175 }, { "epoch": 5.93, "grad_norm": 0.40266963839530945, "learning_rate": 2.0561132791166454e-06, "loss": 0.0687, "step": 603200 }, { "epoch": 5.93, "grad_norm": 15.842013359069824, "learning_rate": 2.055989156662397e-06, "loss": 0.0598, "step": 603225 }, { "epoch": 5.93, "grad_norm": 5.2162699699401855, "learning_rate": 2.0558650342081483e-06, "loss": 0.0927, "step": 603250 }, { "epoch": 5.93, "grad_norm": 7.3681488037109375, "learning_rate": 2.0557409117539e-06, "loss": 0.0416, "step": 603275 }, { "epoch": 5.93, "grad_norm": 7.531885623931885, "learning_rate": 2.0556167892996515e-06, "loss": 0.0834, "step": 603300 }, { "epoch": 5.93, "grad_norm": 7.80665397644043, "learning_rate": 2.055492666845403e-06, "loss": 0.0631, "step": 603325 }, { "epoch": 5.93, "grad_norm": 0.05929911509156227, "learning_rate": 2.055368544391155e-06, "loss": 0.0529, "step": 603350 }, { "epoch": 5.93, "grad_norm": 14.357108116149902, "learning_rate": 2.0552444219369064e-06, "loss": 0.0606, "step": 603375 }, { "epoch": 5.93, "grad_norm": 3.383122205734253, "learning_rate": 2.0551202994826577e-06, "loss": 0.0689, "step": 603400 }, { "epoch": 5.93, "grad_norm": 5.935855388641357, "learning_rate": 2.0549961770284093e-06, "loss": 0.0547, "step": 603425 }, { "epoch": 5.93, "grad_norm": 0.0532577745616436, "learning_rate": 2.054872054574161e-06, "loss": 0.0923, "step": 603450 }, { "epoch": 5.93, "grad_norm": 1.7921593189239502, "learning_rate": 2.0547479321199126e-06, "loss": 0.0402, "step": 603475 }, { "epoch": 5.93, "grad_norm": 6.6308979988098145, "learning_rate": 2.054623809665664e-06, "loss": 0.0926, "step": 603500 }, { "epoch": 5.93, "grad_norm": 9.637649536132812, "learning_rate": 2.0544996872114154e-06, "loss": 0.0514, "step": 603525 }, { "epoch": 5.93, "grad_norm": 1.6466937065124512, "learning_rate": 2.054375564757167e-06, "loss": 0.0896, "step": 603550 }, { "epoch": 5.93, "grad_norm": 11.558048248291016, "learning_rate": 2.0542514423029187e-06, "loss": 0.0438, "step": 603575 }, { "epoch": 5.93, "grad_norm": 5.40172815322876, "learning_rate": 2.05412731984867e-06, "loss": 0.0667, "step": 603600 }, { "epoch": 5.93, "grad_norm": 9.338376998901367, "learning_rate": 2.0540031973944215e-06, "loss": 0.0651, "step": 603625 }, { "epoch": 5.94, "grad_norm": 10.519464492797852, "learning_rate": 2.053879074940173e-06, "loss": 0.0885, "step": 603650 }, { "epoch": 5.94, "grad_norm": 14.779112815856934, "learning_rate": 2.053754952485925e-06, "loss": 0.0445, "step": 603675 }, { "epoch": 5.94, "grad_norm": 3.4929983615875244, "learning_rate": 2.0536308300316764e-06, "loss": 0.067, "step": 603700 }, { "epoch": 5.94, "grad_norm": 8.073248863220215, "learning_rate": 2.053506707577428e-06, "loss": 0.0387, "step": 603725 }, { "epoch": 5.94, "grad_norm": 0.6493954658508301, "learning_rate": 2.0533825851231793e-06, "loss": 0.1137, "step": 603750 }, { "epoch": 5.94, "grad_norm": 11.67344856262207, "learning_rate": 2.053258462668931e-06, "loss": 0.0429, "step": 603775 }, { "epoch": 5.94, "grad_norm": 3.352266788482666, "learning_rate": 2.0531343402146825e-06, "loss": 0.0579, "step": 603800 }, { "epoch": 5.94, "grad_norm": 12.62621021270752, "learning_rate": 2.0530102177604338e-06, "loss": 0.0635, "step": 603825 }, { "epoch": 5.94, "grad_norm": 1.5267307758331299, "learning_rate": 2.0528860953061854e-06, "loss": 0.0963, "step": 603850 }, { "epoch": 5.94, "grad_norm": 10.934894561767578, "learning_rate": 2.052761972851937e-06, "loss": 0.0427, "step": 603875 }, { "epoch": 5.94, "grad_norm": 2.599780559539795, "learning_rate": 2.0526378503976887e-06, "loss": 0.0896, "step": 603900 }, { "epoch": 5.94, "grad_norm": 3.3112142086029053, "learning_rate": 2.0525137279434403e-06, "loss": 0.0432, "step": 603925 }, { "epoch": 5.94, "grad_norm": 1.2004811763763428, "learning_rate": 2.0523896054891915e-06, "loss": 0.0882, "step": 603950 }, { "epoch": 5.94, "grad_norm": 8.486488342285156, "learning_rate": 2.052265483034943e-06, "loss": 0.072, "step": 603975 }, { "epoch": 5.94, "grad_norm": 1.0838221311569214, "learning_rate": 2.0521413605806948e-06, "loss": 0.0663, "step": 604000 }, { "epoch": 5.94, "grad_norm": 7.331155776977539, "learning_rate": 2.052017238126446e-06, "loss": 0.0411, "step": 604025 }, { "epoch": 5.94, "grad_norm": 5.247517108917236, "learning_rate": 2.0518931156721976e-06, "loss": 0.1365, "step": 604050 }, { "epoch": 5.94, "grad_norm": 13.599346160888672, "learning_rate": 2.0517689932179493e-06, "loss": 0.0701, "step": 604075 }, { "epoch": 5.94, "grad_norm": 0.13476933538913727, "learning_rate": 2.051644870763701e-06, "loss": 0.074, "step": 604100 }, { "epoch": 5.94, "grad_norm": 9.396625518798828, "learning_rate": 2.0515207483094525e-06, "loss": 0.0574, "step": 604125 }, { "epoch": 5.94, "grad_norm": 3.4925882816314697, "learning_rate": 2.051396625855204e-06, "loss": 0.101, "step": 604150 }, { "epoch": 5.94, "grad_norm": 3.404332160949707, "learning_rate": 2.0512725034009554e-06, "loss": 0.0498, "step": 604175 }, { "epoch": 5.94, "grad_norm": 3.5692250728607178, "learning_rate": 2.051148380946707e-06, "loss": 0.0677, "step": 604200 }, { "epoch": 5.94, "grad_norm": 12.705732345581055, "learning_rate": 2.0510242584924586e-06, "loss": 0.0368, "step": 604225 }, { "epoch": 5.94, "grad_norm": 0.049405746161937714, "learning_rate": 2.05090013603821e-06, "loss": 0.0964, "step": 604250 }, { "epoch": 5.94, "grad_norm": 11.739129066467285, "learning_rate": 2.0507760135839615e-06, "loss": 0.0753, "step": 604275 }, { "epoch": 5.94, "grad_norm": 0.047790560871362686, "learning_rate": 2.050651891129713e-06, "loss": 0.1033, "step": 604300 }, { "epoch": 5.94, "grad_norm": 6.964575290679932, "learning_rate": 2.0505277686754648e-06, "loss": 0.0514, "step": 604325 }, { "epoch": 5.94, "grad_norm": 2.062990665435791, "learning_rate": 2.0504036462212164e-06, "loss": 0.089, "step": 604350 }, { "epoch": 5.94, "grad_norm": 26.26314353942871, "learning_rate": 2.0502795237669676e-06, "loss": 0.059, "step": 604375 }, { "epoch": 5.94, "grad_norm": 0.009614381939172745, "learning_rate": 2.0501554013127192e-06, "loss": 0.083, "step": 604400 }, { "epoch": 5.94, "grad_norm": 4.326446533203125, "learning_rate": 2.050031278858471e-06, "loss": 0.0435, "step": 604425 }, { "epoch": 5.94, "grad_norm": 0.8185690641403198, "learning_rate": 2.049907156404222e-06, "loss": 0.0815, "step": 604450 }, { "epoch": 5.94, "grad_norm": 10.161033630371094, "learning_rate": 2.0497830339499737e-06, "loss": 0.0467, "step": 604475 }, { "epoch": 5.94, "grad_norm": 1.224367380142212, "learning_rate": 2.0496589114957254e-06, "loss": 0.0707, "step": 604500 }, { "epoch": 5.94, "grad_norm": 10.091718673706055, "learning_rate": 2.049534789041477e-06, "loss": 0.0466, "step": 604525 }, { "epoch": 5.94, "grad_norm": 2.5150978565216064, "learning_rate": 2.0494106665872286e-06, "loss": 0.0756, "step": 604550 }, { "epoch": 5.94, "grad_norm": 5.541525363922119, "learning_rate": 2.0492865441329803e-06, "loss": 0.0508, "step": 604575 }, { "epoch": 5.94, "grad_norm": 5.194073677062988, "learning_rate": 2.0491624216787315e-06, "loss": 0.0641, "step": 604600 }, { "epoch": 5.94, "grad_norm": 13.387619018554688, "learning_rate": 2.049038299224483e-06, "loss": 0.0474, "step": 604625 }, { "epoch": 5.95, "grad_norm": 0.016687730327248573, "learning_rate": 2.0489141767702347e-06, "loss": 0.092, "step": 604650 }, { "epoch": 5.95, "grad_norm": 11.812029838562012, "learning_rate": 2.048790054315986e-06, "loss": 0.0759, "step": 604675 }, { "epoch": 5.95, "grad_norm": 1.7921684980392456, "learning_rate": 2.0486659318617376e-06, "loss": 0.0994, "step": 604700 }, { "epoch": 5.95, "grad_norm": 7.612837791442871, "learning_rate": 2.0485418094074892e-06, "loss": 0.0436, "step": 604725 }, { "epoch": 5.95, "grad_norm": 0.43951961398124695, "learning_rate": 2.048417686953241e-06, "loss": 0.0906, "step": 604750 }, { "epoch": 5.95, "grad_norm": 8.58967399597168, "learning_rate": 2.0482935644989925e-06, "loss": 0.0518, "step": 604775 }, { "epoch": 5.95, "grad_norm": 1.4571279287338257, "learning_rate": 2.0481694420447437e-06, "loss": 0.0839, "step": 604800 }, { "epoch": 5.95, "grad_norm": 10.548981666564941, "learning_rate": 2.0480453195904953e-06, "loss": 0.04, "step": 604825 }, { "epoch": 5.95, "grad_norm": 0.8021336793899536, "learning_rate": 2.047921197136247e-06, "loss": 0.0749, "step": 604850 }, { "epoch": 5.95, "grad_norm": 7.104945659637451, "learning_rate": 2.047797074681998e-06, "loss": 0.0442, "step": 604875 }, { "epoch": 5.95, "grad_norm": 1.4185826778411865, "learning_rate": 2.04767295222775e-06, "loss": 0.088, "step": 604900 }, { "epoch": 5.95, "grad_norm": 10.253387451171875, "learning_rate": 2.0475488297735015e-06, "loss": 0.045, "step": 604925 }, { "epoch": 5.95, "grad_norm": 6.030256271362305, "learning_rate": 2.047424707319253e-06, "loss": 0.091, "step": 604950 }, { "epoch": 5.95, "grad_norm": 5.675756454467773, "learning_rate": 2.0473005848650047e-06, "loss": 0.0533, "step": 604975 }, { "epoch": 5.95, "grad_norm": 3.9818596839904785, "learning_rate": 2.0471764624107564e-06, "loss": 0.0773, "step": 605000 }, { "epoch": 5.95, "grad_norm": 9.876909255981445, "learning_rate": 2.0470523399565076e-06, "loss": 0.0475, "step": 605025 }, { "epoch": 5.95, "grad_norm": 0.17382986843585968, "learning_rate": 2.0469282175022592e-06, "loss": 0.0749, "step": 605050 }, { "epoch": 5.95, "grad_norm": 8.589738845825195, "learning_rate": 2.046804095048011e-06, "loss": 0.0707, "step": 605075 }, { "epoch": 5.95, "grad_norm": 6.005465030670166, "learning_rate": 2.046679972593762e-06, "loss": 0.0922, "step": 605100 }, { "epoch": 5.95, "grad_norm": 13.365321159362793, "learning_rate": 2.0465558501395137e-06, "loss": 0.0552, "step": 605125 }, { "epoch": 5.95, "grad_norm": 4.109635829925537, "learning_rate": 2.0464317276852653e-06, "loss": 0.0603, "step": 605150 }, { "epoch": 5.95, "grad_norm": 13.347887992858887, "learning_rate": 2.046307605231017e-06, "loss": 0.0443, "step": 605175 }, { "epoch": 5.95, "grad_norm": 0.95253586769104, "learning_rate": 2.0461834827767686e-06, "loss": 0.0609, "step": 605200 }, { "epoch": 5.95, "grad_norm": 7.058482646942139, "learning_rate": 2.04605936032252e-06, "loss": 0.0587, "step": 605225 }, { "epoch": 5.95, "grad_norm": 3.5974435806274414, "learning_rate": 2.0459352378682714e-06, "loss": 0.0902, "step": 605250 }, { "epoch": 5.95, "grad_norm": 11.711767196655273, "learning_rate": 2.045811115414023e-06, "loss": 0.0611, "step": 605275 }, { "epoch": 5.95, "grad_norm": 4.274115562438965, "learning_rate": 2.0456919578579445e-06, "loss": 0.0583, "step": 605300 }, { "epoch": 5.95, "grad_norm": 9.50115966796875, "learning_rate": 2.0455678354036957e-06, "loss": 0.077, "step": 605325 }, { "epoch": 5.95, "grad_norm": 0.05593060702085495, "learning_rate": 2.0454437129494474e-06, "loss": 0.0837, "step": 605350 }, { "epoch": 5.95, "grad_norm": 15.561759948730469, "learning_rate": 2.045319590495199e-06, "loss": 0.0657, "step": 605375 }, { "epoch": 5.95, "grad_norm": 7.140460014343262, "learning_rate": 2.0451954680409506e-06, "loss": 0.071, "step": 605400 }, { "epoch": 5.95, "grad_norm": 8.616446495056152, "learning_rate": 2.0450713455867023e-06, "loss": 0.0634, "step": 605425 }, { "epoch": 5.95, "grad_norm": 5.101889133453369, "learning_rate": 2.044947223132454e-06, "loss": 0.0881, "step": 605450 }, { "epoch": 5.95, "grad_norm": 6.720808506011963, "learning_rate": 2.044823100678205e-06, "loss": 0.0525, "step": 605475 }, { "epoch": 5.95, "grad_norm": 0.5614330172538757, "learning_rate": 2.0446989782239568e-06, "loss": 0.0824, "step": 605500 }, { "epoch": 5.95, "grad_norm": 11.734972953796387, "learning_rate": 2.0445748557697084e-06, "loss": 0.0597, "step": 605525 }, { "epoch": 5.95, "grad_norm": 2.476297378540039, "learning_rate": 2.0444507333154596e-06, "loss": 0.0971, "step": 605550 }, { "epoch": 5.95, "grad_norm": 7.624448776245117, "learning_rate": 2.0443266108612112e-06, "loss": 0.0343, "step": 605575 }, { "epoch": 5.95, "grad_norm": 3.431805372238159, "learning_rate": 2.044202488406963e-06, "loss": 0.0742, "step": 605600 }, { "epoch": 5.95, "grad_norm": 13.295109748840332, "learning_rate": 2.0440783659527145e-06, "loss": 0.0438, "step": 605625 }, { "epoch": 5.95, "grad_norm": 5.435818195343018, "learning_rate": 2.043954243498466e-06, "loss": 0.0944, "step": 605650 }, { "epoch": 5.96, "grad_norm": 72.79886627197266, "learning_rate": 2.0438301210442178e-06, "loss": 0.044, "step": 605675 }, { "epoch": 5.96, "grad_norm": 0.5092805624008179, "learning_rate": 2.043705998589969e-06, "loss": 0.0875, "step": 605700 }, { "epoch": 5.96, "grad_norm": 16.26643180847168, "learning_rate": 2.0435818761357206e-06, "loss": 0.0503, "step": 605725 }, { "epoch": 5.96, "grad_norm": 0.39479029178619385, "learning_rate": 2.043457753681472e-06, "loss": 0.1, "step": 605750 }, { "epoch": 5.96, "grad_norm": 10.530426979064941, "learning_rate": 2.0433336312272235e-06, "loss": 0.0539, "step": 605775 }, { "epoch": 5.96, "grad_norm": 1.2751717567443848, "learning_rate": 2.043209508772975e-06, "loss": 0.0748, "step": 605800 }, { "epoch": 5.96, "grad_norm": 11.478699684143066, "learning_rate": 2.0430853863187267e-06, "loss": 0.0376, "step": 605825 }, { "epoch": 5.96, "grad_norm": 0.8288210034370422, "learning_rate": 2.0429612638644784e-06, "loss": 0.0899, "step": 605850 }, { "epoch": 5.96, "grad_norm": 11.563952445983887, "learning_rate": 2.04283714141023e-06, "loss": 0.0486, "step": 605875 }, { "epoch": 5.96, "grad_norm": 0.23559343814849854, "learning_rate": 2.0427130189559812e-06, "loss": 0.0887, "step": 605900 }, { "epoch": 5.96, "grad_norm": 13.420589447021484, "learning_rate": 2.042588896501733e-06, "loss": 0.0378, "step": 605925 }, { "epoch": 5.96, "grad_norm": 1.2428632974624634, "learning_rate": 2.0424647740474845e-06, "loss": 0.1353, "step": 605950 }, { "epoch": 5.96, "grad_norm": 10.988520622253418, "learning_rate": 2.0423406515932357e-06, "loss": 0.0462, "step": 605975 }, { "epoch": 5.96, "grad_norm": 0.016421718522906303, "learning_rate": 2.0422165291389873e-06, "loss": 0.0776, "step": 606000 }, { "epoch": 5.96, "grad_norm": 14.039469718933105, "learning_rate": 2.042092406684739e-06, "loss": 0.0586, "step": 606025 }, { "epoch": 5.96, "grad_norm": 1.3614262342453003, "learning_rate": 2.0419682842304906e-06, "loss": 0.092, "step": 606050 }, { "epoch": 5.96, "grad_norm": 10.128474235534668, "learning_rate": 2.0418441617762423e-06, "loss": 0.0502, "step": 606075 }, { "epoch": 5.96, "grad_norm": 5.719412803649902, "learning_rate": 2.041720039321994e-06, "loss": 0.1036, "step": 606100 }, { "epoch": 5.96, "grad_norm": 9.615143775939941, "learning_rate": 2.041595916867745e-06, "loss": 0.0535, "step": 606125 }, { "epoch": 5.96, "grad_norm": 3.7776730060577393, "learning_rate": 2.0414717944134967e-06, "loss": 0.0894, "step": 606150 }, { "epoch": 5.96, "grad_norm": 9.497532844543457, "learning_rate": 2.041347671959248e-06, "loss": 0.047, "step": 606175 }, { "epoch": 5.96, "grad_norm": 4.2961835861206055, "learning_rate": 2.0412235495049996e-06, "loss": 0.0892, "step": 606200 }, { "epoch": 5.96, "grad_norm": 10.42002010345459, "learning_rate": 2.0410994270507512e-06, "loss": 0.0415, "step": 606225 }, { "epoch": 5.96, "grad_norm": 0.11353439837694168, "learning_rate": 2.040975304596503e-06, "loss": 0.0751, "step": 606250 }, { "epoch": 5.96, "grad_norm": 13.407142639160156, "learning_rate": 2.0408511821422545e-06, "loss": 0.0486, "step": 606275 }, { "epoch": 5.96, "grad_norm": 2.9519143104553223, "learning_rate": 2.040727059688006e-06, "loss": 0.1012, "step": 606300 }, { "epoch": 5.96, "grad_norm": 14.448080062866211, "learning_rate": 2.0406029372337573e-06, "loss": 0.0575, "step": 606325 }, { "epoch": 5.96, "grad_norm": 0.05296707525849342, "learning_rate": 2.040478814779509e-06, "loss": 0.0701, "step": 606350 }, { "epoch": 5.96, "grad_norm": 15.583961486816406, "learning_rate": 2.0403546923252606e-06, "loss": 0.0646, "step": 606375 }, { "epoch": 5.96, "grad_norm": 5.306076526641846, "learning_rate": 2.0402305698710122e-06, "loss": 0.0934, "step": 606400 }, { "epoch": 5.96, "grad_norm": 10.780126571655273, "learning_rate": 2.040106447416764e-06, "loss": 0.0453, "step": 606425 }, { "epoch": 5.96, "grad_norm": 6.204289436340332, "learning_rate": 2.0399823249625155e-06, "loss": 0.0944, "step": 606450 }, { "epoch": 5.96, "grad_norm": 2.4774563312530518, "learning_rate": 2.0398582025082667e-06, "loss": 0.0541, "step": 606475 }, { "epoch": 5.96, "grad_norm": 0.04648619517683983, "learning_rate": 2.0397340800540184e-06, "loss": 0.0769, "step": 606500 }, { "epoch": 5.96, "grad_norm": 4.006213665008545, "learning_rate": 2.03960995759977e-06, "loss": 0.0711, "step": 606525 }, { "epoch": 5.96, "grad_norm": 10.204054832458496, "learning_rate": 2.039485835145521e-06, "loss": 0.0758, "step": 606550 }, { "epoch": 5.96, "grad_norm": 4.497591018676758, "learning_rate": 2.039361712691273e-06, "loss": 0.0389, "step": 606575 }, { "epoch": 5.96, "grad_norm": 0.16084852814674377, "learning_rate": 2.0392375902370245e-06, "loss": 0.093, "step": 606600 }, { "epoch": 5.96, "grad_norm": 16.282451629638672, "learning_rate": 2.039113467782776e-06, "loss": 0.0488, "step": 606625 }, { "epoch": 5.96, "grad_norm": 0.08987986296415329, "learning_rate": 2.0389893453285277e-06, "loss": 0.0857, "step": 606650 }, { "epoch": 5.96, "grad_norm": 13.446245193481445, "learning_rate": 2.038865222874279e-06, "loss": 0.0591, "step": 606675 }, { "epoch": 5.97, "grad_norm": 3.221827745437622, "learning_rate": 2.0387411004200306e-06, "loss": 0.0891, "step": 606700 }, { "epoch": 5.97, "grad_norm": 12.903398513793945, "learning_rate": 2.0386169779657822e-06, "loss": 0.0464, "step": 606725 }, { "epoch": 5.97, "grad_norm": 1.8591266870498657, "learning_rate": 2.0384928555115334e-06, "loss": 0.0859, "step": 606750 }, { "epoch": 5.97, "grad_norm": 10.909024238586426, "learning_rate": 2.038368733057285e-06, "loss": 0.0476, "step": 606775 }, { "epoch": 5.97, "grad_norm": 1.8065024614334106, "learning_rate": 2.0382446106030367e-06, "loss": 0.0989, "step": 606800 }, { "epoch": 5.97, "grad_norm": 11.0897216796875, "learning_rate": 2.0381204881487883e-06, "loss": 0.0408, "step": 606825 }, { "epoch": 5.97, "grad_norm": 2.0648703575134277, "learning_rate": 2.03799636569454e-06, "loss": 0.067, "step": 606850 }, { "epoch": 5.97, "grad_norm": 15.248651504516602, "learning_rate": 2.0378722432402916e-06, "loss": 0.0538, "step": 606875 }, { "epoch": 5.97, "grad_norm": 2.4426558017730713, "learning_rate": 2.037748120786043e-06, "loss": 0.0808, "step": 606900 }, { "epoch": 5.97, "grad_norm": 10.148575782775879, "learning_rate": 2.0376239983317945e-06, "loss": 0.0579, "step": 606925 }, { "epoch": 5.97, "grad_norm": 0.8862553238868713, "learning_rate": 2.037499875877546e-06, "loss": 0.0764, "step": 606950 }, { "epoch": 5.97, "grad_norm": 17.16661834716797, "learning_rate": 2.0373757534232973e-06, "loss": 0.0486, "step": 606975 }, { "epoch": 5.97, "grad_norm": 2.9355287551879883, "learning_rate": 2.037251630969049e-06, "loss": 0.09, "step": 607000 }, { "epoch": 5.97, "grad_norm": 18.06290626525879, "learning_rate": 2.0371275085148006e-06, "loss": 0.0496, "step": 607025 }, { "epoch": 5.97, "grad_norm": 1.339890956878662, "learning_rate": 2.037003386060552e-06, "loss": 0.0823, "step": 607050 }, { "epoch": 5.97, "grad_norm": 15.818923950195312, "learning_rate": 2.036879263606304e-06, "loss": 0.0464, "step": 607075 }, { "epoch": 5.97, "grad_norm": 1.5932503938674927, "learning_rate": 2.036755141152055e-06, "loss": 0.0608, "step": 607100 }, { "epoch": 5.97, "grad_norm": 7.506938457489014, "learning_rate": 2.0366310186978067e-06, "loss": 0.0402, "step": 607125 }, { "epoch": 5.97, "grad_norm": 0.09051351994276047, "learning_rate": 2.0365068962435583e-06, "loss": 0.0878, "step": 607150 }, { "epoch": 5.97, "grad_norm": 3.9827451705932617, "learning_rate": 2.0363827737893095e-06, "loss": 0.0513, "step": 607175 }, { "epoch": 5.97, "grad_norm": 0.8460562229156494, "learning_rate": 2.036258651335061e-06, "loss": 0.0659, "step": 607200 }, { "epoch": 5.97, "grad_norm": 10.030207633972168, "learning_rate": 2.036134528880813e-06, "loss": 0.0542, "step": 607225 }, { "epoch": 5.97, "grad_norm": 10.968202590942383, "learning_rate": 2.0360104064265644e-06, "loss": 0.066, "step": 607250 }, { "epoch": 5.97, "grad_norm": 9.257974624633789, "learning_rate": 2.035886283972316e-06, "loss": 0.056, "step": 607275 }, { "epoch": 5.97, "grad_norm": 2.8749032020568848, "learning_rate": 2.0357621615180677e-06, "loss": 0.0752, "step": 607300 }, { "epoch": 5.97, "grad_norm": 6.5966033935546875, "learning_rate": 2.035638039063819e-06, "loss": 0.0464, "step": 607325 }, { "epoch": 5.97, "grad_norm": 1.0525296926498413, "learning_rate": 2.0355139166095706e-06, "loss": 0.0765, "step": 607350 }, { "epoch": 5.97, "grad_norm": 10.139813423156738, "learning_rate": 2.035389794155322e-06, "loss": 0.0365, "step": 607375 }, { "epoch": 5.97, "grad_norm": 2.13427472114563, "learning_rate": 2.0352656717010734e-06, "loss": 0.1044, "step": 607400 }, { "epoch": 5.97, "grad_norm": 9.53736400604248, "learning_rate": 2.035141549246825e-06, "loss": 0.0408, "step": 607425 }, { "epoch": 5.97, "grad_norm": 1.4352543354034424, "learning_rate": 2.0350174267925767e-06, "loss": 0.1152, "step": 607450 }, { "epoch": 5.97, "grad_norm": 2.7220966815948486, "learning_rate": 2.0348933043383283e-06, "loss": 0.0534, "step": 607475 }, { "epoch": 5.97, "grad_norm": 9.176227569580078, "learning_rate": 2.03476918188408e-06, "loss": 0.0789, "step": 607500 }, { "epoch": 5.97, "grad_norm": 1.0867148637771606, "learning_rate": 2.034645059429831e-06, "loss": 0.0358, "step": 607525 }, { "epoch": 5.97, "grad_norm": 3.75216007232666, "learning_rate": 2.0345209369755828e-06, "loss": 0.0806, "step": 607550 }, { "epoch": 5.97, "grad_norm": 16.502716064453125, "learning_rate": 2.0343968145213344e-06, "loss": 0.0873, "step": 607575 }, { "epoch": 5.97, "grad_norm": 1.4646800756454468, "learning_rate": 2.0342726920670856e-06, "loss": 0.0959, "step": 607600 }, { "epoch": 5.97, "grad_norm": 6.453151226043701, "learning_rate": 2.0341485696128373e-06, "loss": 0.0577, "step": 607625 }, { "epoch": 5.97, "grad_norm": 0.00403933459892869, "learning_rate": 2.034024447158589e-06, "loss": 0.0914, "step": 607650 }, { "epoch": 5.97, "grad_norm": 9.350887298583984, "learning_rate": 2.0339003247043405e-06, "loss": 0.0552, "step": 607675 }, { "epoch": 5.98, "grad_norm": 23.150793075561523, "learning_rate": 2.033781167148262e-06, "loss": 0.1058, "step": 607700 }, { "epoch": 5.98, "grad_norm": 10.507308006286621, "learning_rate": 2.0336570446940136e-06, "loss": 0.0503, "step": 607725 }, { "epoch": 5.98, "grad_norm": 0.2533170282840729, "learning_rate": 2.0335329222397653e-06, "loss": 0.0699, "step": 607750 }, { "epoch": 5.98, "grad_norm": 5.8710036277771, "learning_rate": 2.0334087997855165e-06, "loss": 0.0358, "step": 607775 }, { "epoch": 5.98, "grad_norm": 1.4307137727737427, "learning_rate": 2.033284677331268e-06, "loss": 0.0808, "step": 607800 }, { "epoch": 5.98, "grad_norm": 11.639320373535156, "learning_rate": 2.0331605548770197e-06, "loss": 0.0516, "step": 607825 }, { "epoch": 5.98, "grad_norm": 1.7757489681243896, "learning_rate": 2.033036432422771e-06, "loss": 0.0935, "step": 607850 }, { "epoch": 5.98, "grad_norm": 12.423788070678711, "learning_rate": 2.0329123099685226e-06, "loss": 0.0459, "step": 607875 }, { "epoch": 5.98, "grad_norm": 1.7892075777053833, "learning_rate": 2.0327881875142742e-06, "loss": 0.0837, "step": 607900 }, { "epoch": 5.98, "grad_norm": 2.4142489433288574, "learning_rate": 2.032664065060026e-06, "loss": 0.0612, "step": 607925 }, { "epoch": 5.98, "grad_norm": 0.43801000714302063, "learning_rate": 2.0325399426057775e-06, "loss": 0.0686, "step": 607950 }, { "epoch": 5.98, "grad_norm": 12.411314010620117, "learning_rate": 2.032415820151529e-06, "loss": 0.0419, "step": 607975 }, { "epoch": 5.98, "grad_norm": 1.3113064765930176, "learning_rate": 2.0322916976972803e-06, "loss": 0.0631, "step": 608000 }, { "epoch": 5.98, "grad_norm": 11.202526092529297, "learning_rate": 2.032167575243032e-06, "loss": 0.0413, "step": 608025 }, { "epoch": 5.98, "grad_norm": 3.064183235168457, "learning_rate": 2.032043452788783e-06, "loss": 0.1256, "step": 608050 }, { "epoch": 5.98, "grad_norm": 10.196636199951172, "learning_rate": 2.031919330334535e-06, "loss": 0.0432, "step": 608075 }, { "epoch": 5.98, "grad_norm": 0.024933604523539543, "learning_rate": 2.0317952078802865e-06, "loss": 0.09, "step": 608100 }, { "epoch": 5.98, "grad_norm": 12.629061698913574, "learning_rate": 2.031671085426038e-06, "loss": 0.0464, "step": 608125 }, { "epoch": 5.98, "grad_norm": 5.169040679931641, "learning_rate": 2.0315469629717897e-06, "loss": 0.0822, "step": 608150 }, { "epoch": 5.98, "grad_norm": 12.36281967163086, "learning_rate": 2.0314228405175414e-06, "loss": 0.0503, "step": 608175 }, { "epoch": 5.98, "grad_norm": 0.24037492275238037, "learning_rate": 2.0312987180632926e-06, "loss": 0.0889, "step": 608200 }, { "epoch": 5.98, "grad_norm": 10.23967456817627, "learning_rate": 2.031174595609044e-06, "loss": 0.0372, "step": 608225 }, { "epoch": 5.98, "grad_norm": 2.996666669845581, "learning_rate": 2.031050473154796e-06, "loss": 0.0956, "step": 608250 }, { "epoch": 5.98, "grad_norm": 16.803136825561523, "learning_rate": 2.030926350700547e-06, "loss": 0.0612, "step": 608275 }, { "epoch": 5.98, "grad_norm": 0.3796885013580322, "learning_rate": 2.0308022282462987e-06, "loss": 0.0794, "step": 608300 }, { "epoch": 5.98, "grad_norm": 11.858017921447754, "learning_rate": 2.0306781057920503e-06, "loss": 0.046, "step": 608325 }, { "epoch": 5.98, "grad_norm": 3.5021772384643555, "learning_rate": 2.030553983337802e-06, "loss": 0.0854, "step": 608350 }, { "epoch": 5.98, "grad_norm": 7.127371311187744, "learning_rate": 2.0304298608835536e-06, "loss": 0.0404, "step": 608375 }, { "epoch": 5.98, "grad_norm": 7.718769550323486, "learning_rate": 2.0303057384293052e-06, "loss": 0.072, "step": 608400 }, { "epoch": 5.98, "grad_norm": 12.105175018310547, "learning_rate": 2.0301816159750564e-06, "loss": 0.04, "step": 608425 }, { "epoch": 5.98, "grad_norm": 6.256403923034668, "learning_rate": 2.030057493520808e-06, "loss": 0.0855, "step": 608450 }, { "epoch": 5.98, "grad_norm": 8.478754997253418, "learning_rate": 2.0299333710665593e-06, "loss": 0.0444, "step": 608475 }, { "epoch": 5.98, "grad_norm": 3.7266993522644043, "learning_rate": 2.029809248612311e-06, "loss": 0.0761, "step": 608500 }, { "epoch": 5.98, "grad_norm": 7.821841716766357, "learning_rate": 2.0296851261580626e-06, "loss": 0.0596, "step": 608525 }, { "epoch": 5.98, "grad_norm": 15.067462921142578, "learning_rate": 2.029561003703814e-06, "loss": 0.0849, "step": 608550 }, { "epoch": 5.98, "grad_norm": 16.284135818481445, "learning_rate": 2.029436881249566e-06, "loss": 0.0496, "step": 608575 }, { "epoch": 5.98, "grad_norm": 7.232449531555176, "learning_rate": 2.0293127587953175e-06, "loss": 0.0782, "step": 608600 }, { "epoch": 5.98, "grad_norm": 15.366479873657227, "learning_rate": 2.0291886363410687e-06, "loss": 0.0522, "step": 608625 }, { "epoch": 5.98, "grad_norm": 2.8641164302825928, "learning_rate": 2.0290645138868203e-06, "loss": 0.0851, "step": 608650 }, { "epoch": 5.98, "grad_norm": 1.2187557220458984, "learning_rate": 2.028940391432572e-06, "loss": 0.0395, "step": 608675 }, { "epoch": 5.98, "grad_norm": 0.04048273712396622, "learning_rate": 2.028816268978323e-06, "loss": 0.0996, "step": 608700 }, { "epoch": 5.99, "grad_norm": 0.5165396928787231, "learning_rate": 2.0286921465240748e-06, "loss": 0.0458, "step": 608725 }, { "epoch": 5.99, "grad_norm": 0.08177485316991806, "learning_rate": 2.0285680240698264e-06, "loss": 0.0726, "step": 608750 }, { "epoch": 5.99, "grad_norm": 23.792068481445312, "learning_rate": 2.028443901615578e-06, "loss": 0.0344, "step": 608775 }, { "epoch": 5.99, "grad_norm": 0.9425113201141357, "learning_rate": 2.0283197791613297e-06, "loss": 0.0897, "step": 608800 }, { "epoch": 5.99, "grad_norm": 3.620117664337158, "learning_rate": 2.0281956567070813e-06, "loss": 0.0425, "step": 608825 }, { "epoch": 5.99, "grad_norm": 0.7578600645065308, "learning_rate": 2.0280715342528325e-06, "loss": 0.079, "step": 608850 }, { "epoch": 5.99, "grad_norm": 11.813165664672852, "learning_rate": 2.027947411798584e-06, "loss": 0.053, "step": 608875 }, { "epoch": 5.99, "grad_norm": 0.02571178413927555, "learning_rate": 2.0278232893443354e-06, "loss": 0.1013, "step": 608900 }, { "epoch": 5.99, "grad_norm": 3.8665547370910645, "learning_rate": 2.027699166890087e-06, "loss": 0.038, "step": 608925 }, { "epoch": 5.99, "grad_norm": 0.5208176970481873, "learning_rate": 2.0275750444358387e-06, "loss": 0.055, "step": 608950 }, { "epoch": 5.99, "grad_norm": 6.40305233001709, "learning_rate": 2.0274509219815903e-06, "loss": 0.0585, "step": 608975 }, { "epoch": 5.99, "grad_norm": 1.4631524085998535, "learning_rate": 2.027326799527342e-06, "loss": 0.0797, "step": 609000 }, { "epoch": 5.99, "grad_norm": 3.6863186359405518, "learning_rate": 2.0272026770730936e-06, "loss": 0.0529, "step": 609025 }, { "epoch": 5.99, "grad_norm": 0.7735620737075806, "learning_rate": 2.0270785546188448e-06, "loss": 0.0723, "step": 609050 }, { "epoch": 5.99, "grad_norm": 14.143558502197266, "learning_rate": 2.0269544321645964e-06, "loss": 0.0341, "step": 609075 }, { "epoch": 5.99, "grad_norm": 3.208954095840454, "learning_rate": 2.026830309710348e-06, "loss": 0.1011, "step": 609100 }, { "epoch": 5.99, "grad_norm": 16.280811309814453, "learning_rate": 2.0267061872560997e-06, "loss": 0.0484, "step": 609125 }, { "epoch": 5.99, "grad_norm": 0.048316892236471176, "learning_rate": 2.0265820648018513e-06, "loss": 0.0801, "step": 609150 }, { "epoch": 5.99, "grad_norm": 9.45865535736084, "learning_rate": 2.026457942347603e-06, "loss": 0.0512, "step": 609175 }, { "epoch": 5.99, "grad_norm": 4.251277923583984, "learning_rate": 2.026333819893354e-06, "loss": 0.0801, "step": 609200 }, { "epoch": 5.99, "grad_norm": 16.660703659057617, "learning_rate": 2.026209697439106e-06, "loss": 0.0504, "step": 609225 }, { "epoch": 5.99, "grad_norm": 0.9796248078346252, "learning_rate": 2.0260855749848574e-06, "loss": 0.0691, "step": 609250 }, { "epoch": 5.99, "grad_norm": 8.913481712341309, "learning_rate": 2.0259614525306086e-06, "loss": 0.0358, "step": 609275 }, { "epoch": 5.99, "grad_norm": 4.762571334838867, "learning_rate": 2.0258373300763603e-06, "loss": 0.0789, "step": 609300 }, { "epoch": 5.99, "grad_norm": 15.133654594421387, "learning_rate": 2.025713207622112e-06, "loss": 0.0582, "step": 609325 }, { "epoch": 5.99, "grad_norm": 0.08450864255428314, "learning_rate": 2.0255890851678635e-06, "loss": 0.0738, "step": 609350 }, { "epoch": 5.99, "grad_norm": 8.44473648071289, "learning_rate": 2.025464962713615e-06, "loss": 0.0297, "step": 609375 }, { "epoch": 5.99, "grad_norm": 3.9888298511505127, "learning_rate": 2.0253408402593664e-06, "loss": 0.0785, "step": 609400 }, { "epoch": 5.99, "grad_norm": 1.2610341310501099, "learning_rate": 2.025216717805118e-06, "loss": 0.0613, "step": 609425 }, { "epoch": 5.99, "grad_norm": 0.8605770468711853, "learning_rate": 2.0250925953508697e-06, "loss": 0.0844, "step": 609450 }, { "epoch": 5.99, "grad_norm": 7.837457656860352, "learning_rate": 2.024968472896621e-06, "loss": 0.0429, "step": 609475 }, { "epoch": 5.99, "grad_norm": 0.26763954758644104, "learning_rate": 2.0248443504423725e-06, "loss": 0.0718, "step": 609500 }, { "epoch": 5.99, "grad_norm": 14.184279441833496, "learning_rate": 2.024720227988124e-06, "loss": 0.055, "step": 609525 }, { "epoch": 5.99, "grad_norm": 2.5592446327209473, "learning_rate": 2.0245961055338758e-06, "loss": 0.072, "step": 609550 }, { "epoch": 5.99, "grad_norm": 17.31806755065918, "learning_rate": 2.0244719830796274e-06, "loss": 0.0513, "step": 609575 }, { "epoch": 5.99, "grad_norm": 0.00279072648845613, "learning_rate": 2.024347860625379e-06, "loss": 0.0825, "step": 609600 }, { "epoch": 5.99, "grad_norm": 20.623598098754883, "learning_rate": 2.0242237381711303e-06, "loss": 0.0469, "step": 609625 }, { "epoch": 5.99, "grad_norm": 4.572418212890625, "learning_rate": 2.024099615716882e-06, "loss": 0.0778, "step": 609650 }, { "epoch": 5.99, "grad_norm": 11.82997989654541, "learning_rate": 2.0239754932626335e-06, "loss": 0.0482, "step": 609675 }, { "epoch": 5.99, "grad_norm": 1.1633086204528809, "learning_rate": 2.0238513708083847e-06, "loss": 0.0969, "step": 609700 }, { "epoch": 5.99, "grad_norm": 16.200862884521484, "learning_rate": 2.0237272483541364e-06, "loss": 0.0394, "step": 609725 }, { "epoch": 6.0, "grad_norm": 4.128814697265625, "learning_rate": 2.023603125899888e-06, "loss": 0.0727, "step": 609750 }, { "epoch": 6.0, "grad_norm": 10.244974136352539, "learning_rate": 2.0234790034456396e-06, "loss": 0.0504, "step": 609775 }, { "epoch": 6.0, "grad_norm": 5.693626880645752, "learning_rate": 2.023359845889561e-06, "loss": 0.099, "step": 609800 }, { "epoch": 6.0, "grad_norm": 5.808016300201416, "learning_rate": 2.0232357234353127e-06, "loss": 0.0558, "step": 609825 }, { "epoch": 6.0, "grad_norm": 2.9808759689331055, "learning_rate": 2.023111600981064e-06, "loss": 0.0846, "step": 609850 }, { "epoch": 6.0, "grad_norm": 9.310148239135742, "learning_rate": 2.0229874785268156e-06, "loss": 0.0817, "step": 609875 }, { "epoch": 6.0, "grad_norm": 0.2710941433906555, "learning_rate": 2.0228633560725672e-06, "loss": 0.079, "step": 609900 }, { "epoch": 6.0, "grad_norm": 14.613987922668457, "learning_rate": 2.0227392336183184e-06, "loss": 0.0439, "step": 609925 }, { "epoch": 6.0, "grad_norm": 0.0964021235704422, "learning_rate": 2.02261511116407e-06, "loss": 0.0929, "step": 609950 }, { "epoch": 6.0, "grad_norm": 8.77426528930664, "learning_rate": 2.0224909887098217e-06, "loss": 0.0544, "step": 609975 }, { "epoch": 6.0, "grad_norm": 5.421733856201172, "learning_rate": 2.0223668662555733e-06, "loss": 0.0646, "step": 610000 }, { "epoch": 6.0, "grad_norm": 9.821860313415527, "learning_rate": 2.022242743801325e-06, "loss": 0.0532, "step": 610025 }, { "epoch": 6.0, "grad_norm": 5.127973556518555, "learning_rate": 2.0221186213470766e-06, "loss": 0.0622, "step": 610050 }, { "epoch": 6.0, "grad_norm": 9.460684776306152, "learning_rate": 2.021994498892828e-06, "loss": 0.0519, "step": 610075 }, { "epoch": 6.0, "grad_norm": 0.356625497341156, "learning_rate": 2.0218703764385794e-06, "loss": 0.0911, "step": 610100 }, { "epoch": 6.0, "grad_norm": 9.39409065246582, "learning_rate": 2.021746253984331e-06, "loss": 0.0623, "step": 610125 }, { "epoch": 6.0, "grad_norm": 1.064351201057434, "learning_rate": 2.0216221315300823e-06, "loss": 0.0767, "step": 610150 }, { "epoch": 6.0, "grad_norm": 6.863601207733154, "learning_rate": 2.021498009075834e-06, "loss": 0.0456, "step": 610175 }, { "epoch": 6.0, "grad_norm": 2.7401351928710938, "learning_rate": 2.0213738866215856e-06, "loss": 0.1031, "step": 610200 }, { "epoch": 6.0, "grad_norm": 18.00157356262207, "learning_rate": 2.021249764167337e-06, "loss": 0.0462, "step": 610225 }, { "epoch": 6.0, "grad_norm": 0.4084848463535309, "learning_rate": 2.021125641713089e-06, "loss": 0.1115, "step": 610250 }, { "epoch": 6.0, "grad_norm": 5.9847731590271, "learning_rate": 2.02100151925884e-06, "loss": 0.0252, "step": 610275 }, { "epoch": 6.0, "grad_norm": 1.931051254272461, "learning_rate": 2.0208773968045917e-06, "loss": 0.0665, "step": 610300 }, { "epoch": 6.0, "grad_norm": 25.921049118041992, "learning_rate": 2.0207532743503433e-06, "loss": 0.0162, "step": 610325 }, { "epoch": 6.0, "grad_norm": 2.8730056285858154, "learning_rate": 2.0206291518960945e-06, "loss": 0.0662, "step": 610350 }, { "epoch": 6.0, "grad_norm": 0.5120272040367126, "learning_rate": 2.020505029441846e-06, "loss": 0.0173, "step": 610375 }, { "epoch": 6.0, "grad_norm": 6.173841953277588, "learning_rate": 2.020380906987598e-06, "loss": 0.0855, "step": 610400 }, { "epoch": 6.0, "grad_norm": 12.471724510192871, "learning_rate": 2.0202567845333494e-06, "loss": 0.0302, "step": 610425 }, { "epoch": 6.0, "grad_norm": 1.1426225900650024, "learning_rate": 2.020132662079101e-06, "loss": 0.0738, "step": 610450 }, { "epoch": 6.0, "grad_norm": 9.753569602966309, "learning_rate": 2.0200085396248527e-06, "loss": 0.017, "step": 610475 }, { "epoch": 6.0, "grad_norm": 8.31364917755127, "learning_rate": 2.019884417170604e-06, "loss": 0.0654, "step": 610500 }, { "epoch": 6.0, "grad_norm": 4.21627950668335, "learning_rate": 2.0197602947163555e-06, "loss": 0.0157, "step": 610525 }, { "epoch": 6.0, "grad_norm": 1.281401515007019, "learning_rate": 2.019636172262107e-06, "loss": 0.0713, "step": 610550 }, { "epoch": 6.0, "grad_norm": 4.870802402496338, "learning_rate": 2.0195120498078584e-06, "loss": 0.0251, "step": 610575 }, { "epoch": 6.0, "grad_norm": 0.3111019730567932, "learning_rate": 2.01938792735361e-06, "loss": 0.0691, "step": 610600 }, { "epoch": 6.0, "grad_norm": 9.915952682495117, "learning_rate": 2.0192638048993617e-06, "loss": 0.0288, "step": 610625 }, { "epoch": 6.0, "grad_norm": 0.44656217098236084, "learning_rate": 2.0191396824451133e-06, "loss": 0.0647, "step": 610650 }, { "epoch": 6.0, "grad_norm": 6.5218353271484375, "learning_rate": 2.019015559990865e-06, "loss": 0.0278, "step": 610675 }, { "epoch": 6.0, "grad_norm": 2.3719780445098877, "learning_rate": 2.0188914375366166e-06, "loss": 0.0682, "step": 610700 }, { "epoch": 6.0, "grad_norm": 5.788640022277832, "learning_rate": 2.0187673150823678e-06, "loss": 0.0138, "step": 610725 }, { "epoch": 6.0, "grad_norm": 6.231290340423584, "learning_rate": 2.0186431926281194e-06, "loss": 0.0619, "step": 610750 }, { "epoch": 6.01, "grad_norm": 1.8872977495193481, "learning_rate": 2.0185190701738706e-06, "loss": 0.0159, "step": 610775 }, { "epoch": 6.01, "grad_norm": 4.061426162719727, "learning_rate": 2.0183949477196223e-06, "loss": 0.0751, "step": 610800 }, { "epoch": 6.01, "grad_norm": 7.956246852874756, "learning_rate": 2.018270825265374e-06, "loss": 0.0266, "step": 610825 }, { "epoch": 6.01, "grad_norm": 4.321599006652832, "learning_rate": 2.0181467028111255e-06, "loss": 0.0622, "step": 610850 }, { "epoch": 6.01, "grad_norm": 10.303898811340332, "learning_rate": 2.018022580356877e-06, "loss": 0.0268, "step": 610875 }, { "epoch": 6.01, "grad_norm": 0.8863802552223206, "learning_rate": 2.017898457902629e-06, "loss": 0.0521, "step": 610900 }, { "epoch": 6.01, "grad_norm": 1.2790580987930298, "learning_rate": 2.01777433544838e-06, "loss": 0.0182, "step": 610925 }, { "epoch": 6.01, "grad_norm": 8.708395957946777, "learning_rate": 2.0176502129941316e-06, "loss": 0.0608, "step": 610950 }, { "epoch": 6.01, "grad_norm": 7.3582868576049805, "learning_rate": 2.0175260905398833e-06, "loss": 0.0174, "step": 610975 }, { "epoch": 6.01, "grad_norm": 0.3160983622074127, "learning_rate": 2.0174019680856345e-06, "loss": 0.0756, "step": 611000 }, { "epoch": 6.01, "grad_norm": 15.959242820739746, "learning_rate": 2.017277845631386e-06, "loss": 0.0267, "step": 611025 }, { "epoch": 6.01, "grad_norm": 5.25950813293457, "learning_rate": 2.0171537231771378e-06, "loss": 0.0629, "step": 611050 }, { "epoch": 6.01, "grad_norm": 0.4883864223957062, "learning_rate": 2.0170296007228894e-06, "loss": 0.0185, "step": 611075 }, { "epoch": 6.01, "grad_norm": 4.464822769165039, "learning_rate": 2.016905478268641e-06, "loss": 0.0808, "step": 611100 }, { "epoch": 6.01, "grad_norm": 0.8982642889022827, "learning_rate": 2.0167813558143927e-06, "loss": 0.0207, "step": 611125 }, { "epoch": 6.01, "grad_norm": 1.605162262916565, "learning_rate": 2.016657233360144e-06, "loss": 0.0627, "step": 611150 }, { "epoch": 6.01, "grad_norm": 1.2480411529541016, "learning_rate": 2.0165331109058955e-06, "loss": 0.0143, "step": 611175 }, { "epoch": 6.01, "grad_norm": 1.7797493934631348, "learning_rate": 2.0164089884516467e-06, "loss": 0.0542, "step": 611200 }, { "epoch": 6.01, "grad_norm": 10.836359977722168, "learning_rate": 2.0162848659973984e-06, "loss": 0.0271, "step": 611225 }, { "epoch": 6.01, "grad_norm": 3.2600924968719482, "learning_rate": 2.01616074354315e-06, "loss": 0.0676, "step": 611250 }, { "epoch": 6.01, "grad_norm": 2.534317970275879, "learning_rate": 2.0160366210889016e-06, "loss": 0.0265, "step": 611275 }, { "epoch": 6.01, "grad_norm": 0.38439857959747314, "learning_rate": 2.0159124986346533e-06, "loss": 0.0841, "step": 611300 }, { "epoch": 6.01, "grad_norm": 4.197981834411621, "learning_rate": 2.015788376180405e-06, "loss": 0.0107, "step": 611325 }, { "epoch": 6.01, "grad_norm": 5.745488166809082, "learning_rate": 2.015664253726156e-06, "loss": 0.05, "step": 611350 }, { "epoch": 6.01, "grad_norm": 2.8251569271087646, "learning_rate": 2.0155401312719077e-06, "loss": 0.027, "step": 611375 }, { "epoch": 6.01, "grad_norm": 1.8594300746917725, "learning_rate": 2.0154160088176594e-06, "loss": 0.0966, "step": 611400 }, { "epoch": 6.01, "grad_norm": 2.01531720161438, "learning_rate": 2.0152918863634106e-06, "loss": 0.0132, "step": 611425 }, { "epoch": 6.01, "grad_norm": 3.002706527709961, "learning_rate": 2.0151677639091622e-06, "loss": 0.1084, "step": 611450 }, { "epoch": 6.01, "grad_norm": 6.065701484680176, "learning_rate": 2.015043641454914e-06, "loss": 0.0273, "step": 611475 }, { "epoch": 6.01, "grad_norm": 0.780526340007782, "learning_rate": 2.0149195190006655e-06, "loss": 0.0917, "step": 611500 }, { "epoch": 6.01, "grad_norm": 8.52247428894043, "learning_rate": 2.014795396546417e-06, "loss": 0.0473, "step": 611525 }, { "epoch": 6.01, "grad_norm": 9.499750137329102, "learning_rate": 2.0146712740921688e-06, "loss": 0.0721, "step": 611550 }, { "epoch": 6.01, "grad_norm": 5.432517051696777, "learning_rate": 2.01454715163792e-06, "loss": 0.0099, "step": 611575 }, { "epoch": 6.01, "grad_norm": 0.7116190195083618, "learning_rate": 2.0144230291836716e-06, "loss": 0.0755, "step": 611600 }, { "epoch": 6.01, "grad_norm": 7.7953715324401855, "learning_rate": 2.014298906729423e-06, "loss": 0.0204, "step": 611625 }, { "epoch": 6.01, "grad_norm": 0.4019816219806671, "learning_rate": 2.0141747842751745e-06, "loss": 0.0719, "step": 611650 }, { "epoch": 6.01, "grad_norm": 8.74420166015625, "learning_rate": 2.014050661820926e-06, "loss": 0.0284, "step": 611675 }, { "epoch": 6.01, "grad_norm": 0.2569851279258728, "learning_rate": 2.0139265393666777e-06, "loss": 0.0845, "step": 611700 }, { "epoch": 6.01, "grad_norm": 12.76273250579834, "learning_rate": 2.0138024169124294e-06, "loss": 0.0198, "step": 611725 }, { "epoch": 6.01, "grad_norm": 4.004491329193115, "learning_rate": 2.013678294458181e-06, "loss": 0.0866, "step": 611750 }, { "epoch": 6.02, "grad_norm": 6.528497219085693, "learning_rate": 2.0135541720039322e-06, "loss": 0.0292, "step": 611775 }, { "epoch": 6.02, "grad_norm": 0.2717702090740204, "learning_rate": 2.0134350144478537e-06, "loss": 0.0478, "step": 611800 }, { "epoch": 6.02, "grad_norm": 2.1628830432891846, "learning_rate": 2.0133108919936053e-06, "loss": 0.0227, "step": 611825 }, { "epoch": 6.02, "grad_norm": 6.959001541137695, "learning_rate": 2.013186769539357e-06, "loss": 0.0733, "step": 611850 }, { "epoch": 6.02, "grad_norm": 0.5914803743362427, "learning_rate": 2.013062647085108e-06, "loss": 0.0387, "step": 611875 }, { "epoch": 6.02, "grad_norm": 2.4848196506500244, "learning_rate": 2.0129385246308598e-06, "loss": 0.0901, "step": 611900 }, { "epoch": 6.02, "grad_norm": 2.3743062019348145, "learning_rate": 2.0128144021766114e-06, "loss": 0.0199, "step": 611925 }, { "epoch": 6.02, "grad_norm": 7.157012939453125, "learning_rate": 2.012690279722363e-06, "loss": 0.0705, "step": 611950 }, { "epoch": 6.02, "grad_norm": 1.5323398113250732, "learning_rate": 2.0125661572681147e-06, "loss": 0.0201, "step": 611975 }, { "epoch": 6.02, "grad_norm": 1.800382137298584, "learning_rate": 2.0124420348138663e-06, "loss": 0.0596, "step": 612000 }, { "epoch": 6.02, "grad_norm": 3.1497154235839844, "learning_rate": 2.0123179123596175e-06, "loss": 0.0194, "step": 612025 }, { "epoch": 6.02, "grad_norm": 0.344115287065506, "learning_rate": 2.012193789905369e-06, "loss": 0.0623, "step": 612050 }, { "epoch": 6.02, "grad_norm": 4.552411079406738, "learning_rate": 2.012069667451121e-06, "loss": 0.0118, "step": 612075 }, { "epoch": 6.02, "grad_norm": 0.452406108379364, "learning_rate": 2.011945544996872e-06, "loss": 0.0701, "step": 612100 }, { "epoch": 6.02, "grad_norm": 6.154345989227295, "learning_rate": 2.0118214225426236e-06, "loss": 0.0212, "step": 612125 }, { "epoch": 6.02, "grad_norm": 1.1463161706924438, "learning_rate": 2.0116973000883753e-06, "loss": 0.0667, "step": 612150 }, { "epoch": 6.02, "grad_norm": 1.3485186100006104, "learning_rate": 2.011573177634127e-06, "loss": 0.0156, "step": 612175 }, { "epoch": 6.02, "grad_norm": 1.788007378578186, "learning_rate": 2.0114490551798786e-06, "loss": 0.0644, "step": 612200 }, { "epoch": 6.02, "grad_norm": 8.187055587768555, "learning_rate": 2.0113249327256298e-06, "loss": 0.0228, "step": 612225 }, { "epoch": 6.02, "grad_norm": 6.341457366943359, "learning_rate": 2.0112008102713814e-06, "loss": 0.0621, "step": 612250 }, { "epoch": 6.02, "grad_norm": 1.9754934310913086, "learning_rate": 2.011076687817133e-06, "loss": 0.0133, "step": 612275 }, { "epoch": 6.02, "grad_norm": 3.3621461391448975, "learning_rate": 2.0109525653628847e-06, "loss": 0.0723, "step": 612300 }, { "epoch": 6.02, "grad_norm": 2.446404218673706, "learning_rate": 2.0108284429086363e-06, "loss": 0.0351, "step": 612325 }, { "epoch": 6.02, "grad_norm": 6.573886394500732, "learning_rate": 2.010704320454388e-06, "loss": 0.0805, "step": 612350 }, { "epoch": 6.02, "grad_norm": 12.131911277770996, "learning_rate": 2.010580198000139e-06, "loss": 0.024, "step": 612375 }, { "epoch": 6.02, "grad_norm": 1.5990427732467651, "learning_rate": 2.0104560755458908e-06, "loss": 0.0692, "step": 612400 }, { "epoch": 6.02, "grad_norm": 2.6073572635650635, "learning_rate": 2.0103319530916424e-06, "loss": 0.0157, "step": 612425 }, { "epoch": 6.02, "grad_norm": 0.361287385225296, "learning_rate": 2.0102078306373936e-06, "loss": 0.0591, "step": 612450 }, { "epoch": 6.02, "grad_norm": 4.9469804763793945, "learning_rate": 2.0100837081831453e-06, "loss": 0.0282, "step": 612475 }, { "epoch": 6.02, "grad_norm": 4.005712032318115, "learning_rate": 2.009959585728897e-06, "loss": 0.0569, "step": 612500 }, { "epoch": 6.02, "grad_norm": 4.777402400970459, "learning_rate": 2.0098354632746485e-06, "loss": 0.0227, "step": 612525 }, { "epoch": 6.02, "grad_norm": 0.5427104830741882, "learning_rate": 2.0097113408204e-06, "loss": 0.0548, "step": 612550 }, { "epoch": 6.02, "grad_norm": 9.906270027160645, "learning_rate": 2.0095872183661514e-06, "loss": 0.0297, "step": 612575 }, { "epoch": 6.02, "grad_norm": 2.442373037338257, "learning_rate": 2.009463095911903e-06, "loss": 0.0698, "step": 612600 }, { "epoch": 6.02, "grad_norm": 4.721402168273926, "learning_rate": 2.0093389734576547e-06, "loss": 0.0266, "step": 612625 }, { "epoch": 6.02, "grad_norm": 2.8429408073425293, "learning_rate": 2.009214851003406e-06, "loss": 0.0853, "step": 612650 }, { "epoch": 6.02, "grad_norm": 7.48730993270874, "learning_rate": 2.0090907285491575e-06, "loss": 0.0185, "step": 612675 }, { "epoch": 6.02, "grad_norm": 6.103190898895264, "learning_rate": 2.008966606094909e-06, "loss": 0.0586, "step": 612700 }, { "epoch": 6.02, "grad_norm": 4.980559825897217, "learning_rate": 2.0088424836406608e-06, "loss": 0.0145, "step": 612725 }, { "epoch": 6.02, "grad_norm": 3.5178749561309814, "learning_rate": 2.0087183611864124e-06, "loss": 0.0844, "step": 612750 }, { "epoch": 6.02, "grad_norm": 1.6041923761367798, "learning_rate": 2.008594238732164e-06, "loss": 0.0184, "step": 612775 }, { "epoch": 6.03, "grad_norm": 2.533909559249878, "learning_rate": 2.0084701162779153e-06, "loss": 0.0686, "step": 612800 }, { "epoch": 6.03, "grad_norm": 2.977295398712158, "learning_rate": 2.008345993823667e-06, "loss": 0.0207, "step": 612825 }, { "epoch": 6.03, "grad_norm": 1.8175461292266846, "learning_rate": 2.0082218713694185e-06, "loss": 0.0956, "step": 612850 }, { "epoch": 6.03, "grad_norm": 5.939431190490723, "learning_rate": 2.0080977489151697e-06, "loss": 0.0157, "step": 612875 }, { "epoch": 6.03, "grad_norm": 0.41240349411964417, "learning_rate": 2.0079736264609214e-06, "loss": 0.0676, "step": 612900 }, { "epoch": 6.03, "grad_norm": 12.405257225036621, "learning_rate": 2.007849504006673e-06, "loss": 0.0211, "step": 612925 }, { "epoch": 6.03, "grad_norm": 1.5503109693527222, "learning_rate": 2.0077253815524246e-06, "loss": 0.0933, "step": 612950 }, { "epoch": 6.03, "grad_norm": 10.029423713684082, "learning_rate": 2.0076012590981763e-06, "loss": 0.0179, "step": 612975 }, { "epoch": 6.03, "grad_norm": 0.7645732760429382, "learning_rate": 2.0074771366439275e-06, "loss": 0.0718, "step": 613000 }, { "epoch": 6.03, "grad_norm": 8.804137229919434, "learning_rate": 2.007353014189679e-06, "loss": 0.0169, "step": 613025 }, { "epoch": 6.03, "grad_norm": 8.80970287322998, "learning_rate": 2.0072288917354308e-06, "loss": 0.0547, "step": 613050 }, { "epoch": 6.03, "grad_norm": 4.2823591232299805, "learning_rate": 2.007104769281182e-06, "loss": 0.0159, "step": 613075 }, { "epoch": 6.03, "grad_norm": 1.1280516386032104, "learning_rate": 2.0069806468269336e-06, "loss": 0.0746, "step": 613100 }, { "epoch": 6.03, "grad_norm": 0.5632532238960266, "learning_rate": 2.0068565243726852e-06, "loss": 0.0378, "step": 613125 }, { "epoch": 6.03, "grad_norm": 0.556403636932373, "learning_rate": 2.006732401918437e-06, "loss": 0.0515, "step": 613150 }, { "epoch": 6.03, "grad_norm": 10.559931755065918, "learning_rate": 2.0066082794641885e-06, "loss": 0.0278, "step": 613175 }, { "epoch": 6.03, "grad_norm": 3.0097713470458984, "learning_rate": 2.00648415700994e-06, "loss": 0.0553, "step": 613200 }, { "epoch": 6.03, "grad_norm": 11.348124504089355, "learning_rate": 2.0063600345556914e-06, "loss": 0.0218, "step": 613225 }, { "epoch": 6.03, "grad_norm": 2.6501476764678955, "learning_rate": 2.006235912101443e-06, "loss": 0.0724, "step": 613250 }, { "epoch": 6.03, "grad_norm": 5.302865982055664, "learning_rate": 2.0061117896471946e-06, "loss": 0.0323, "step": 613275 }, { "epoch": 6.03, "grad_norm": 2.4132652282714844, "learning_rate": 2.005987667192946e-06, "loss": 0.0727, "step": 613300 }, { "epoch": 6.03, "grad_norm": 5.885538578033447, "learning_rate": 2.0058635447386975e-06, "loss": 0.0221, "step": 613325 }, { "epoch": 6.03, "grad_norm": 4.851393222808838, "learning_rate": 2.005739422284449e-06, "loss": 0.0644, "step": 613350 }, { "epoch": 6.03, "grad_norm": 2.635432243347168, "learning_rate": 2.0056152998302007e-06, "loss": 0.0178, "step": 613375 }, { "epoch": 6.03, "grad_norm": 2.469280242919922, "learning_rate": 2.0054911773759524e-06, "loss": 0.0659, "step": 613400 }, { "epoch": 6.03, "grad_norm": 7.814918041229248, "learning_rate": 2.0053670549217036e-06, "loss": 0.0179, "step": 613425 }, { "epoch": 6.03, "grad_norm": 6.2746381759643555, "learning_rate": 2.0052429324674552e-06, "loss": 0.0776, "step": 613450 }, { "epoch": 6.03, "grad_norm": 0.5536075830459595, "learning_rate": 2.005118810013207e-06, "loss": 0.0306, "step": 613475 }, { "epoch": 6.03, "grad_norm": 0.48671606183052063, "learning_rate": 2.004994687558958e-06, "loss": 0.0758, "step": 613500 }, { "epoch": 6.03, "grad_norm": 6.491940021514893, "learning_rate": 2.0048705651047097e-06, "loss": 0.0229, "step": 613525 }, { "epoch": 6.03, "grad_norm": 3.3101515769958496, "learning_rate": 2.0047464426504613e-06, "loss": 0.0953, "step": 613550 }, { "epoch": 6.03, "grad_norm": 9.762770652770996, "learning_rate": 2.004622320196213e-06, "loss": 0.0311, "step": 613575 }, { "epoch": 6.03, "grad_norm": 0.43384605646133423, "learning_rate": 2.0044981977419646e-06, "loss": 0.0669, "step": 613600 }, { "epoch": 6.03, "grad_norm": 9.560781478881836, "learning_rate": 2.0043740752877162e-06, "loss": 0.0294, "step": 613625 }, { "epoch": 6.03, "grad_norm": 6.11430549621582, "learning_rate": 2.0042499528334675e-06, "loss": 0.0714, "step": 613650 }, { "epoch": 6.03, "grad_norm": 9.244165420532227, "learning_rate": 2.004125830379219e-06, "loss": 0.0139, "step": 613675 }, { "epoch": 6.03, "grad_norm": 7.308234691619873, "learning_rate": 2.0040017079249707e-06, "loss": 0.0892, "step": 613700 }, { "epoch": 6.03, "grad_norm": 9.71699333190918, "learning_rate": 2.003877585470722e-06, "loss": 0.011, "step": 613725 }, { "epoch": 6.03, "grad_norm": 0.17230656743049622, "learning_rate": 2.0037534630164736e-06, "loss": 0.0703, "step": 613750 }, { "epoch": 6.03, "grad_norm": 8.609002113342285, "learning_rate": 2.003629340562225e-06, "loss": 0.034, "step": 613775 }, { "epoch": 6.03, "grad_norm": 0.13558760285377502, "learning_rate": 2.003505218107977e-06, "loss": 0.0691, "step": 613800 }, { "epoch": 6.04, "grad_norm": 6.073998928070068, "learning_rate": 2.0033810956537285e-06, "loss": 0.0288, "step": 613825 }, { "epoch": 6.04, "grad_norm": 0.527130126953125, "learning_rate": 2.00326193809765e-06, "loss": 0.0596, "step": 613850 }, { "epoch": 6.04, "grad_norm": 7.220605850219727, "learning_rate": 2.0031378156434016e-06, "loss": 0.0247, "step": 613875 }, { "epoch": 6.04, "grad_norm": 2.0819790363311768, "learning_rate": 2.0030136931891528e-06, "loss": 0.0724, "step": 613900 }, { "epoch": 6.04, "grad_norm": 3.4148223400115967, "learning_rate": 2.0028895707349044e-06, "loss": 0.0251, "step": 613925 }, { "epoch": 6.04, "grad_norm": 2.56819486618042, "learning_rate": 2.002765448280656e-06, "loss": 0.0779, "step": 613950 }, { "epoch": 6.04, "grad_norm": 0.48483043909072876, "learning_rate": 2.0026413258264073e-06, "loss": 0.0127, "step": 613975 }, { "epoch": 6.04, "grad_norm": 1.3349363803863525, "learning_rate": 2.002517203372159e-06, "loss": 0.0757, "step": 614000 }, { "epoch": 6.04, "grad_norm": 0.772955596446991, "learning_rate": 2.0023930809179105e-06, "loss": 0.0152, "step": 614025 }, { "epoch": 6.04, "grad_norm": 4.237107753753662, "learning_rate": 2.002268958463662e-06, "loss": 0.0712, "step": 614050 }, { "epoch": 6.04, "grad_norm": 8.50062370300293, "learning_rate": 2.002144836009414e-06, "loss": 0.0209, "step": 614075 }, { "epoch": 6.04, "grad_norm": 0.6054694056510925, "learning_rate": 2.002020713555165e-06, "loss": 0.0739, "step": 614100 }, { "epoch": 6.04, "grad_norm": 19.504304885864258, "learning_rate": 2.0018965911009166e-06, "loss": 0.0277, "step": 614125 }, { "epoch": 6.04, "grad_norm": 1.3853874206542969, "learning_rate": 2.0017724686466683e-06, "loss": 0.0817, "step": 614150 }, { "epoch": 6.04, "grad_norm": 2.0986785888671875, "learning_rate": 2.0016483461924195e-06, "loss": 0.021, "step": 614175 }, { "epoch": 6.04, "grad_norm": 1.4481431245803833, "learning_rate": 2.001524223738171e-06, "loss": 0.0874, "step": 614200 }, { "epoch": 6.04, "grad_norm": 15.817474365234375, "learning_rate": 2.0014001012839228e-06, "loss": 0.0282, "step": 614225 }, { "epoch": 6.04, "grad_norm": 2.791794776916504, "learning_rate": 2.0012759788296744e-06, "loss": 0.0792, "step": 614250 }, { "epoch": 6.04, "grad_norm": 11.153180122375488, "learning_rate": 2.001151856375426e-06, "loss": 0.0285, "step": 614275 }, { "epoch": 6.04, "grad_norm": 0.32130104303359985, "learning_rate": 2.0010277339211777e-06, "loss": 0.0844, "step": 614300 }, { "epoch": 6.04, "grad_norm": 7.759958267211914, "learning_rate": 2.000903611466929e-06, "loss": 0.0215, "step": 614325 }, { "epoch": 6.04, "grad_norm": 6.034566879272461, "learning_rate": 2.0007794890126805e-06, "loss": 0.0686, "step": 614350 }, { "epoch": 6.04, "grad_norm": 4.038305759429932, "learning_rate": 2.000655366558432e-06, "loss": 0.0241, "step": 614375 }, { "epoch": 6.04, "grad_norm": 0.25533801317214966, "learning_rate": 2.0005312441041834e-06, "loss": 0.0564, "step": 614400 }, { "epoch": 6.04, "grad_norm": 3.8486878871917725, "learning_rate": 2.000407121649935e-06, "loss": 0.0218, "step": 614425 }, { "epoch": 6.04, "grad_norm": 13.558938980102539, "learning_rate": 2.0002829991956866e-06, "loss": 0.0765, "step": 614450 }, { "epoch": 6.04, "grad_norm": 4.378809928894043, "learning_rate": 2.0001588767414383e-06, "loss": 0.0223, "step": 614475 }, { "epoch": 6.04, "grad_norm": 0.15825040638446808, "learning_rate": 2.00003475428719e-06, "loss": 0.0619, "step": 614500 }, { "epoch": 6.04, "grad_norm": 8.389923095703125, "learning_rate": 1.999910631832941e-06, "loss": 0.0201, "step": 614525 }, { "epoch": 6.04, "grad_norm": 1.7164554595947266, "learning_rate": 1.9997865093786927e-06, "loss": 0.072, "step": 614550 }, { "epoch": 6.04, "grad_norm": 10.902166366577148, "learning_rate": 1.9996623869244444e-06, "loss": 0.021, "step": 614575 }, { "epoch": 6.04, "grad_norm": 2.8046629428863525, "learning_rate": 1.9995382644701956e-06, "loss": 0.0946, "step": 614600 }, { "epoch": 6.04, "grad_norm": 5.014628887176514, "learning_rate": 1.9994141420159472e-06, "loss": 0.0144, "step": 614625 }, { "epoch": 6.04, "grad_norm": 1.224932074546814, "learning_rate": 1.999290019561699e-06, "loss": 0.0613, "step": 614650 }, { "epoch": 6.04, "grad_norm": 0.3598478436470032, "learning_rate": 1.9991658971074505e-06, "loss": 0.023, "step": 614675 }, { "epoch": 6.04, "grad_norm": 14.338706016540527, "learning_rate": 1.999041774653202e-06, "loss": 0.0971, "step": 614700 }, { "epoch": 6.04, "grad_norm": 9.806295394897461, "learning_rate": 1.9989176521989538e-06, "loss": 0.0214, "step": 614725 }, { "epoch": 6.04, "grad_norm": 7.648878574371338, "learning_rate": 1.998793529744705e-06, "loss": 0.078, "step": 614750 }, { "epoch": 6.04, "grad_norm": 12.77213191986084, "learning_rate": 1.9986694072904566e-06, "loss": 0.0223, "step": 614775 }, { "epoch": 6.04, "grad_norm": 7.535845756530762, "learning_rate": 1.9985452848362082e-06, "loss": 0.0827, "step": 614800 }, { "epoch": 6.05, "grad_norm": 8.611527442932129, "learning_rate": 1.9984211623819595e-06, "loss": 0.026, "step": 614825 }, { "epoch": 6.05, "grad_norm": 2.285618782043457, "learning_rate": 1.998297039927711e-06, "loss": 0.0895, "step": 614850 }, { "epoch": 6.05, "grad_norm": 11.264291763305664, "learning_rate": 1.9981729174734627e-06, "loss": 0.0201, "step": 614875 }, { "epoch": 6.05, "grad_norm": 1.170265555381775, "learning_rate": 1.9980487950192144e-06, "loss": 0.0576, "step": 614900 }, { "epoch": 6.05, "grad_norm": 14.620466232299805, "learning_rate": 1.997924672564966e-06, "loss": 0.027, "step": 614925 }, { "epoch": 6.05, "grad_norm": 7.509016036987305, "learning_rate": 1.997800550110717e-06, "loss": 0.0642, "step": 614950 }, { "epoch": 6.05, "grad_norm": 2.6185598373413086, "learning_rate": 1.997676427656469e-06, "loss": 0.0238, "step": 614975 }, { "epoch": 6.05, "grad_norm": 1.0308172702789307, "learning_rate": 1.9975523052022205e-06, "loss": 0.0571, "step": 615000 }, { "epoch": 6.05, "grad_norm": 8.545241355895996, "learning_rate": 1.997428182747972e-06, "loss": 0.0332, "step": 615025 }, { "epoch": 6.05, "grad_norm": 3.2652268409729004, "learning_rate": 1.9973040602937237e-06, "loss": 0.0753, "step": 615050 }, { "epoch": 6.05, "grad_norm": 5.413517475128174, "learning_rate": 1.9971799378394754e-06, "loss": 0.0141, "step": 615075 }, { "epoch": 6.05, "grad_norm": 3.962651252746582, "learning_rate": 1.9970558153852266e-06, "loss": 0.0602, "step": 615100 }, { "epoch": 6.05, "grad_norm": 5.316922187805176, "learning_rate": 1.9969316929309782e-06, "loss": 0.0224, "step": 615125 }, { "epoch": 6.05, "grad_norm": 0.024342026561498642, "learning_rate": 1.99680757047673e-06, "loss": 0.0729, "step": 615150 }, { "epoch": 6.05, "grad_norm": 5.943282604217529, "learning_rate": 1.996683448022481e-06, "loss": 0.0266, "step": 615175 }, { "epoch": 6.05, "grad_norm": 1.1984152793884277, "learning_rate": 1.9965593255682327e-06, "loss": 0.0729, "step": 615200 }, { "epoch": 6.05, "grad_norm": 10.980619430541992, "learning_rate": 1.9964352031139843e-06, "loss": 0.0277, "step": 615225 }, { "epoch": 6.05, "grad_norm": 1.0759360790252686, "learning_rate": 1.996311080659736e-06, "loss": 0.0805, "step": 615250 }, { "epoch": 6.05, "grad_norm": 11.512960433959961, "learning_rate": 1.9961869582054876e-06, "loss": 0.0351, "step": 615275 }, { "epoch": 6.05, "grad_norm": 1.0086100101470947, "learning_rate": 1.996062835751239e-06, "loss": 0.0581, "step": 615300 }, { "epoch": 6.05, "grad_norm": 0.1633489578962326, "learning_rate": 1.9959387132969905e-06, "loss": 0.0187, "step": 615325 }, { "epoch": 6.05, "grad_norm": 1.7580171823501587, "learning_rate": 1.995814590842742e-06, "loss": 0.0544, "step": 615350 }, { "epoch": 6.05, "grad_norm": 3.449481725692749, "learning_rate": 1.9956904683884933e-06, "loss": 0.0243, "step": 615375 }, { "epoch": 6.05, "grad_norm": 3.217893362045288, "learning_rate": 1.995566345934245e-06, "loss": 0.0722, "step": 615400 }, { "epoch": 6.05, "grad_norm": 7.495262622833252, "learning_rate": 1.9954422234799966e-06, "loss": 0.0263, "step": 615425 }, { "epoch": 6.05, "grad_norm": 1.4458953142166138, "learning_rate": 1.995318101025748e-06, "loss": 0.0733, "step": 615450 }, { "epoch": 6.05, "grad_norm": 3.1624739170074463, "learning_rate": 1.9951939785715e-06, "loss": 0.0293, "step": 615475 }, { "epoch": 6.05, "grad_norm": 1.2009538412094116, "learning_rate": 1.9950698561172515e-06, "loss": 0.088, "step": 615500 }, { "epoch": 6.05, "grad_norm": 1.16280198097229, "learning_rate": 1.9949457336630027e-06, "loss": 0.0202, "step": 615525 }, { "epoch": 6.05, "grad_norm": 0.21649572253227234, "learning_rate": 1.9948216112087543e-06, "loss": 0.0553, "step": 615550 }, { "epoch": 6.05, "grad_norm": 5.1815314292907715, "learning_rate": 1.994697488754506e-06, "loss": 0.0186, "step": 615575 }, { "epoch": 6.05, "grad_norm": 0.1747850775718689, "learning_rate": 1.994573366300257e-06, "loss": 0.0712, "step": 615600 }, { "epoch": 6.05, "grad_norm": 6.197912693023682, "learning_rate": 1.994449243846009e-06, "loss": 0.017, "step": 615625 }, { "epoch": 6.05, "grad_norm": 6.057983875274658, "learning_rate": 1.9943251213917604e-06, "loss": 0.0704, "step": 615650 }, { "epoch": 6.05, "grad_norm": 2.334714651107788, "learning_rate": 1.994200998937512e-06, "loss": 0.0204, "step": 615675 }, { "epoch": 6.05, "grad_norm": 5.56907844543457, "learning_rate": 1.9940768764832637e-06, "loss": 0.0765, "step": 615700 }, { "epoch": 6.05, "grad_norm": 2.7819879055023193, "learning_rate": 1.993952754029015e-06, "loss": 0.0354, "step": 615725 }, { "epoch": 6.05, "grad_norm": 1.2927497625350952, "learning_rate": 1.9938286315747666e-06, "loss": 0.0785, "step": 615750 }, { "epoch": 6.05, "grad_norm": 8.982437133789062, "learning_rate": 1.993704509120518e-06, "loss": 0.0388, "step": 615775 }, { "epoch": 6.05, "grad_norm": 1.0901556015014648, "learning_rate": 1.9935803866662694e-06, "loss": 0.0872, "step": 615800 }, { "epoch": 6.05, "grad_norm": 0.9835096597671509, "learning_rate": 1.993456264212021e-06, "loss": 0.0255, "step": 615825 }, { "epoch": 6.06, "grad_norm": 8.006169319152832, "learning_rate": 1.9933321417577727e-06, "loss": 0.0639, "step": 615850 }, { "epoch": 6.06, "grad_norm": 8.795560836791992, "learning_rate": 1.9932080193035243e-06, "loss": 0.0248, "step": 615875 }, { "epoch": 6.06, "grad_norm": 0.869967520236969, "learning_rate": 1.993083896849276e-06, "loss": 0.0791, "step": 615900 }, { "epoch": 6.06, "grad_norm": 1.2104308605194092, "learning_rate": 1.9929597743950276e-06, "loss": 0.0125, "step": 615925 }, { "epoch": 6.06, "grad_norm": 2.215973138809204, "learning_rate": 1.992835651940779e-06, "loss": 0.0588, "step": 615950 }, { "epoch": 6.06, "grad_norm": 5.173393726348877, "learning_rate": 1.9927115294865304e-06, "loss": 0.0123, "step": 615975 }, { "epoch": 6.06, "grad_norm": 1.261182188987732, "learning_rate": 1.992587407032282e-06, "loss": 0.0502, "step": 616000 }, { "epoch": 6.06, "grad_norm": 7.142642498016357, "learning_rate": 1.9924632845780333e-06, "loss": 0.0201, "step": 616025 }, { "epoch": 6.06, "grad_norm": 1.0246831178665161, "learning_rate": 1.992339162123785e-06, "loss": 0.0611, "step": 616050 }, { "epoch": 6.06, "grad_norm": 1.6637839078903198, "learning_rate": 1.9922150396695365e-06, "loss": 0.0245, "step": 616075 }, { "epoch": 6.06, "grad_norm": 1.1486554145812988, "learning_rate": 1.992090917215288e-06, "loss": 0.0592, "step": 616100 }, { "epoch": 6.06, "grad_norm": 5.566523551940918, "learning_rate": 1.99196679476104e-06, "loss": 0.0224, "step": 616125 }, { "epoch": 6.06, "grad_norm": 2.53511118888855, "learning_rate": 1.991842672306791e-06, "loss": 0.0504, "step": 616150 }, { "epoch": 6.06, "grad_norm": 7.021163463592529, "learning_rate": 1.9917185498525427e-06, "loss": 0.0303, "step": 616175 }, { "epoch": 6.06, "grad_norm": 1.4109477996826172, "learning_rate": 1.9915944273982943e-06, "loss": 0.0845, "step": 616200 }, { "epoch": 6.06, "grad_norm": 3.0414915084838867, "learning_rate": 1.9914703049440455e-06, "loss": 0.0147, "step": 616225 }, { "epoch": 6.06, "grad_norm": 1.6721168756484985, "learning_rate": 1.9913511473879674e-06, "loss": 0.1119, "step": 616250 }, { "epoch": 6.06, "grad_norm": 0.5609210729598999, "learning_rate": 1.9912270249337186e-06, "loss": 0.0125, "step": 616275 }, { "epoch": 6.06, "grad_norm": 1.8321415185928345, "learning_rate": 1.9911029024794702e-06, "loss": 0.0847, "step": 616300 }, { "epoch": 6.06, "grad_norm": 9.66445541381836, "learning_rate": 1.990978780025222e-06, "loss": 0.0226, "step": 616325 }, { "epoch": 6.06, "grad_norm": 0.21206340193748474, "learning_rate": 1.9908546575709735e-06, "loss": 0.085, "step": 616350 }, { "epoch": 6.06, "grad_norm": 6.690197944641113, "learning_rate": 1.990730535116725e-06, "loss": 0.0367, "step": 616375 }, { "epoch": 6.06, "grad_norm": 0.3513423800468445, "learning_rate": 1.9906064126624763e-06, "loss": 0.0578, "step": 616400 }, { "epoch": 6.06, "grad_norm": 5.699488639831543, "learning_rate": 1.990482290208228e-06, "loss": 0.0298, "step": 616425 }, { "epoch": 6.06, "grad_norm": 0.8344139456748962, "learning_rate": 1.9903581677539796e-06, "loss": 0.0948, "step": 616450 }, { "epoch": 6.06, "grad_norm": 3.390442132949829, "learning_rate": 1.990234045299731e-06, "loss": 0.0229, "step": 616475 }, { "epoch": 6.06, "grad_norm": 6.687310218811035, "learning_rate": 1.9901099228454825e-06, "loss": 0.1037, "step": 616500 }, { "epoch": 6.06, "grad_norm": 15.443205833435059, "learning_rate": 1.989985800391234e-06, "loss": 0.0338, "step": 616525 }, { "epoch": 6.06, "grad_norm": 4.980728626251221, "learning_rate": 1.9898616779369857e-06, "loss": 0.087, "step": 616550 }, { "epoch": 6.06, "grad_norm": 1.0804896354675293, "learning_rate": 1.9897375554827374e-06, "loss": 0.0208, "step": 616575 }, { "epoch": 6.06, "grad_norm": 0.8853244185447693, "learning_rate": 1.989613433028489e-06, "loss": 0.0743, "step": 616600 }, { "epoch": 6.06, "grad_norm": 3.506486654281616, "learning_rate": 1.98948931057424e-06, "loss": 0.017, "step": 616625 }, { "epoch": 6.06, "grad_norm": 0.6714814305305481, "learning_rate": 1.989365188119992e-06, "loss": 0.0754, "step": 616650 }, { "epoch": 6.06, "grad_norm": 10.094493865966797, "learning_rate": 1.9892410656657435e-06, "loss": 0.0316, "step": 616675 }, { "epoch": 6.06, "grad_norm": 2.6186323165893555, "learning_rate": 1.9891169432114947e-06, "loss": 0.0615, "step": 616700 }, { "epoch": 6.06, "grad_norm": 1.9834715127944946, "learning_rate": 1.9889928207572463e-06, "loss": 0.0139, "step": 616725 }, { "epoch": 6.06, "grad_norm": 0.8014566898345947, "learning_rate": 1.988868698302998e-06, "loss": 0.0867, "step": 616750 }, { "epoch": 6.06, "grad_norm": 3.363448143005371, "learning_rate": 1.9887445758487496e-06, "loss": 0.0325, "step": 616775 }, { "epoch": 6.06, "grad_norm": 3.2149910926818848, "learning_rate": 1.9886204533945012e-06, "loss": 0.092, "step": 616800 }, { "epoch": 6.06, "grad_norm": 1.0203301906585693, "learning_rate": 1.9884963309402524e-06, "loss": 0.031, "step": 616825 }, { "epoch": 6.06, "grad_norm": 0.9330154657363892, "learning_rate": 1.988372208486004e-06, "loss": 0.0967, "step": 616850 }, { "epoch": 6.07, "grad_norm": 8.130773544311523, "learning_rate": 1.9882480860317557e-06, "loss": 0.0218, "step": 616875 }, { "epoch": 6.07, "grad_norm": 1.4480973482131958, "learning_rate": 1.988123963577507e-06, "loss": 0.0776, "step": 616900 }, { "epoch": 6.07, "grad_norm": 5.610330104827881, "learning_rate": 1.9879998411232586e-06, "loss": 0.03, "step": 616925 }, { "epoch": 6.07, "grad_norm": 2.316697120666504, "learning_rate": 1.98787571866901e-06, "loss": 0.0557, "step": 616950 }, { "epoch": 6.07, "grad_norm": 6.548624038696289, "learning_rate": 1.987751596214762e-06, "loss": 0.017, "step": 616975 }, { "epoch": 6.07, "grad_norm": 0.651544451713562, "learning_rate": 1.9876274737605135e-06, "loss": 0.0614, "step": 617000 }, { "epoch": 6.07, "grad_norm": 6.571836948394775, "learning_rate": 1.987503351306265e-06, "loss": 0.0155, "step": 617025 }, { "epoch": 6.07, "grad_norm": 2.9164228439331055, "learning_rate": 1.9873792288520163e-06, "loss": 0.0622, "step": 617050 }, { "epoch": 6.07, "grad_norm": 4.946361064910889, "learning_rate": 1.987255106397768e-06, "loss": 0.0276, "step": 617075 }, { "epoch": 6.07, "grad_norm": 0.794331431388855, "learning_rate": 1.9871309839435196e-06, "loss": 0.0935, "step": 617100 }, { "epoch": 6.07, "grad_norm": 13.072694778442383, "learning_rate": 1.987006861489271e-06, "loss": 0.0113, "step": 617125 }, { "epoch": 6.07, "grad_norm": 7.747666358947754, "learning_rate": 1.9868827390350224e-06, "loss": 0.0594, "step": 617150 }, { "epoch": 6.07, "grad_norm": 7.010401725769043, "learning_rate": 1.986758616580774e-06, "loss": 0.0302, "step": 617175 }, { "epoch": 6.07, "grad_norm": 1.5566545724868774, "learning_rate": 1.9866344941265257e-06, "loss": 0.075, "step": 617200 }, { "epoch": 6.07, "grad_norm": 1.7650736570358276, "learning_rate": 1.9865103716722773e-06, "loss": 0.0163, "step": 617225 }, { "epoch": 6.07, "grad_norm": 0.24706590175628662, "learning_rate": 1.9863862492180285e-06, "loss": 0.0849, "step": 617250 }, { "epoch": 6.07, "grad_norm": 5.506739139556885, "learning_rate": 1.98626212676378e-06, "loss": 0.0228, "step": 617275 }, { "epoch": 6.07, "grad_norm": 2.8041069507598877, "learning_rate": 1.986138004309532e-06, "loss": 0.0862, "step": 617300 }, { "epoch": 6.07, "grad_norm": 5.869273662567139, "learning_rate": 1.986013881855283e-06, "loss": 0.0244, "step": 617325 }, { "epoch": 6.07, "grad_norm": 1.2909817695617676, "learning_rate": 1.9858897594010347e-06, "loss": 0.0873, "step": 617350 }, { "epoch": 6.07, "grad_norm": 4.513141632080078, "learning_rate": 1.9857656369467863e-06, "loss": 0.0177, "step": 617375 }, { "epoch": 6.07, "grad_norm": 1.7477080821990967, "learning_rate": 1.985641514492538e-06, "loss": 0.0962, "step": 617400 }, { "epoch": 6.07, "grad_norm": 1.6361234188079834, "learning_rate": 1.9855173920382896e-06, "loss": 0.0138, "step": 617425 }, { "epoch": 6.07, "grad_norm": 4.071207523345947, "learning_rate": 1.985393269584041e-06, "loss": 0.0699, "step": 617450 }, { "epoch": 6.07, "grad_norm": 13.588486671447754, "learning_rate": 1.9852691471297924e-06, "loss": 0.0198, "step": 617475 }, { "epoch": 6.07, "grad_norm": 3.278701066970825, "learning_rate": 1.985145024675544e-06, "loss": 0.1081, "step": 617500 }, { "epoch": 6.07, "grad_norm": 2.567464590072632, "learning_rate": 1.9850209022212957e-06, "loss": 0.0126, "step": 617525 }, { "epoch": 6.07, "grad_norm": 2.4775004386901855, "learning_rate": 1.984896779767047e-06, "loss": 0.0716, "step": 617550 }, { "epoch": 6.07, "grad_norm": 7.664440155029297, "learning_rate": 1.9847726573127985e-06, "loss": 0.0214, "step": 617575 }, { "epoch": 6.07, "grad_norm": 0.9259445667266846, "learning_rate": 1.98464853485855e-06, "loss": 0.0743, "step": 617600 }, { "epoch": 6.07, "grad_norm": 7.044600009918213, "learning_rate": 1.984524412404302e-06, "loss": 0.0167, "step": 617625 }, { "epoch": 6.07, "grad_norm": 4.028160572052002, "learning_rate": 1.9844002899500534e-06, "loss": 0.0752, "step": 617650 }, { "epoch": 6.07, "grad_norm": 2.240058183670044, "learning_rate": 1.9842761674958046e-06, "loss": 0.0155, "step": 617675 }, { "epoch": 6.07, "grad_norm": 2.849925994873047, "learning_rate": 1.9841520450415563e-06, "loss": 0.0889, "step": 617700 }, { "epoch": 6.07, "grad_norm": 3.2869651317596436, "learning_rate": 1.984027922587308e-06, "loss": 0.0132, "step": 617725 }, { "epoch": 6.07, "grad_norm": 1.2846057415008545, "learning_rate": 1.983903800133059e-06, "loss": 0.0653, "step": 617750 }, { "epoch": 6.07, "grad_norm": 4.839213848114014, "learning_rate": 1.9837796776788108e-06, "loss": 0.0233, "step": 617775 }, { "epoch": 6.07, "grad_norm": 0.759442150592804, "learning_rate": 1.9836555552245624e-06, "loss": 0.0676, "step": 617800 }, { "epoch": 6.07, "grad_norm": 0.13741745054721832, "learning_rate": 1.983531432770314e-06, "loss": 0.0137, "step": 617825 }, { "epoch": 6.07, "grad_norm": 1.2558730840682983, "learning_rate": 1.9834073103160657e-06, "loss": 0.0635, "step": 617850 }, { "epoch": 6.08, "grad_norm": 10.347036361694336, "learning_rate": 1.9832831878618173e-06, "loss": 0.0244, "step": 617875 }, { "epoch": 6.08, "grad_norm": 1.0459905862808228, "learning_rate": 1.9831590654075685e-06, "loss": 0.0694, "step": 617900 }, { "epoch": 6.08, "grad_norm": 4.016430854797363, "learning_rate": 1.98303494295332e-06, "loss": 0.021, "step": 617925 }, { "epoch": 6.08, "grad_norm": 3.99798583984375, "learning_rate": 1.9829108204990718e-06, "loss": 0.088, "step": 617950 }, { "epoch": 6.08, "grad_norm": 6.859579086303711, "learning_rate": 1.9827866980448234e-06, "loss": 0.029, "step": 617975 }, { "epoch": 6.08, "grad_norm": 2.8273861408233643, "learning_rate": 1.982662575590575e-06, "loss": 0.0602, "step": 618000 }, { "epoch": 6.08, "grad_norm": 8.160112380981445, "learning_rate": 1.9825384531363263e-06, "loss": 0.0436, "step": 618025 }, { "epoch": 6.08, "grad_norm": 0.6808872818946838, "learning_rate": 1.982414330682078e-06, "loss": 0.0709, "step": 618050 }, { "epoch": 6.08, "grad_norm": 11.370429992675781, "learning_rate": 1.9822902082278295e-06, "loss": 0.0183, "step": 618075 }, { "epoch": 6.08, "grad_norm": 3.71799898147583, "learning_rate": 1.9821660857735807e-06, "loss": 0.0788, "step": 618100 }, { "epoch": 6.08, "grad_norm": 1.8430064916610718, "learning_rate": 1.9820419633193324e-06, "loss": 0.0184, "step": 618125 }, { "epoch": 6.08, "grad_norm": 1.5704925060272217, "learning_rate": 1.981917840865084e-06, "loss": 0.0752, "step": 618150 }, { "epoch": 6.08, "grad_norm": 3.4917850494384766, "learning_rate": 1.9817937184108356e-06, "loss": 0.027, "step": 618175 }, { "epoch": 6.08, "grad_norm": 2.2439277172088623, "learning_rate": 1.9816695959565873e-06, "loss": 0.0651, "step": 618200 }, { "epoch": 6.08, "grad_norm": 13.899992942810059, "learning_rate": 1.981545473502339e-06, "loss": 0.0307, "step": 618225 }, { "epoch": 6.08, "grad_norm": 4.532923698425293, "learning_rate": 1.98142135104809e-06, "loss": 0.0688, "step": 618250 }, { "epoch": 6.08, "grad_norm": 10.41390323638916, "learning_rate": 1.9812972285938418e-06, "loss": 0.0164, "step": 618275 }, { "epoch": 6.08, "grad_norm": 0.10336416214704514, "learning_rate": 1.9811731061395934e-06, "loss": 0.0652, "step": 618300 }, { "epoch": 6.08, "grad_norm": 11.796004295349121, "learning_rate": 1.9810489836853446e-06, "loss": 0.0293, "step": 618325 }, { "epoch": 6.08, "grad_norm": 3.4070663452148438, "learning_rate": 1.9809248612310962e-06, "loss": 0.0974, "step": 618350 }, { "epoch": 6.08, "grad_norm": 3.7166357040405273, "learning_rate": 1.980800738776848e-06, "loss": 0.0344, "step": 618375 }, { "epoch": 6.08, "grad_norm": 1.806605339050293, "learning_rate": 1.9806766163225995e-06, "loss": 0.0879, "step": 618400 }, { "epoch": 6.08, "grad_norm": 8.074806213378906, "learning_rate": 1.980552493868351e-06, "loss": 0.0081, "step": 618425 }, { "epoch": 6.08, "grad_norm": 6.259034633636475, "learning_rate": 1.9804283714141024e-06, "loss": 0.1219, "step": 618450 }, { "epoch": 6.08, "grad_norm": 5.615640640258789, "learning_rate": 1.980304248959854e-06, "loss": 0.0192, "step": 618475 }, { "epoch": 6.08, "grad_norm": 4.516208648681641, "learning_rate": 1.9801801265056056e-06, "loss": 0.0906, "step": 618500 }, { "epoch": 6.08, "grad_norm": 8.59037971496582, "learning_rate": 1.980056004051357e-06, "loss": 0.0168, "step": 618525 }, { "epoch": 6.08, "grad_norm": 6.114771366119385, "learning_rate": 1.9799318815971085e-06, "loss": 0.0998, "step": 618550 }, { "epoch": 6.08, "grad_norm": 11.101630210876465, "learning_rate": 1.97980775914286e-06, "loss": 0.0195, "step": 618575 }, { "epoch": 6.08, "grad_norm": 3.0655910968780518, "learning_rate": 1.9796836366886118e-06, "loss": 0.0661, "step": 618600 }, { "epoch": 6.08, "grad_norm": 0.8756665587425232, "learning_rate": 1.9795595142343634e-06, "loss": 0.017, "step": 618625 }, { "epoch": 6.08, "grad_norm": 1.6702302694320679, "learning_rate": 1.979440356678285e-06, "loss": 0.083, "step": 618650 }, { "epoch": 6.08, "grad_norm": 6.948999404907227, "learning_rate": 1.9793162342240365e-06, "loss": 0.0247, "step": 618675 }, { "epoch": 6.08, "grad_norm": 0.5609880089759827, "learning_rate": 1.9791921117697877e-06, "loss": 0.0812, "step": 618700 }, { "epoch": 6.08, "grad_norm": 8.603240966796875, "learning_rate": 1.9790679893155393e-06, "loss": 0.0286, "step": 618725 }, { "epoch": 6.08, "grad_norm": 0.33140262961387634, "learning_rate": 1.978943866861291e-06, "loss": 0.0675, "step": 618750 }, { "epoch": 6.08, "grad_norm": 8.481741905212402, "learning_rate": 1.978819744407042e-06, "loss": 0.0218, "step": 618775 }, { "epoch": 6.08, "grad_norm": 1.0688209533691406, "learning_rate": 1.978695621952794e-06, "loss": 0.063, "step": 618800 }, { "epoch": 6.08, "grad_norm": 1.3691802024841309, "learning_rate": 1.9785714994985454e-06, "loss": 0.0255, "step": 618825 }, { "epoch": 6.08, "grad_norm": 4.250197410583496, "learning_rate": 1.978447377044297e-06, "loss": 0.0925, "step": 618850 }, { "epoch": 6.08, "grad_norm": 17.948348999023438, "learning_rate": 1.9783232545900487e-06, "loss": 0.0373, "step": 618875 }, { "epoch": 6.09, "grad_norm": 4.9042840003967285, "learning_rate": 1.9781991321358003e-06, "loss": 0.0859, "step": 618900 }, { "epoch": 6.09, "grad_norm": 8.456536293029785, "learning_rate": 1.9780750096815516e-06, "loss": 0.0244, "step": 618925 }, { "epoch": 6.09, "grad_norm": 0.3622528910636902, "learning_rate": 1.977950887227303e-06, "loss": 0.0453, "step": 618950 }, { "epoch": 6.09, "grad_norm": 10.261970520019531, "learning_rate": 1.977826764773055e-06, "loss": 0.0291, "step": 618975 }, { "epoch": 6.09, "grad_norm": 0.7476431131362915, "learning_rate": 1.977702642318806e-06, "loss": 0.0515, "step": 619000 }, { "epoch": 6.09, "grad_norm": 12.689682960510254, "learning_rate": 1.9775785198645577e-06, "loss": 0.0301, "step": 619025 }, { "epoch": 6.09, "grad_norm": 2.5828707218170166, "learning_rate": 1.9774543974103093e-06, "loss": 0.0481, "step": 619050 }, { "epoch": 6.09, "grad_norm": 14.361285209655762, "learning_rate": 1.977330274956061e-06, "loss": 0.0334, "step": 619075 }, { "epoch": 6.09, "grad_norm": 2.677934408187866, "learning_rate": 1.9772061525018126e-06, "loss": 0.0794, "step": 619100 }, { "epoch": 6.09, "grad_norm": 3.3704285621643066, "learning_rate": 1.9770820300475638e-06, "loss": 0.0231, "step": 619125 }, { "epoch": 6.09, "grad_norm": 11.125053405761719, "learning_rate": 1.9769579075933154e-06, "loss": 0.1141, "step": 619150 }, { "epoch": 6.09, "grad_norm": 3.148437738418579, "learning_rate": 1.976833785139067e-06, "loss": 0.0278, "step": 619175 }, { "epoch": 6.09, "grad_norm": 3.201284408569336, "learning_rate": 1.9767096626848183e-06, "loss": 0.0724, "step": 619200 }, { "epoch": 6.09, "grad_norm": 5.979758262634277, "learning_rate": 1.97658554023057e-06, "loss": 0.0284, "step": 619225 }, { "epoch": 6.09, "grad_norm": 0.3400760293006897, "learning_rate": 1.9764614177763215e-06, "loss": 0.0746, "step": 619250 }, { "epoch": 6.09, "grad_norm": 7.976446151733398, "learning_rate": 1.976337295322073e-06, "loss": 0.0168, "step": 619275 }, { "epoch": 6.09, "grad_norm": 7.390224933624268, "learning_rate": 1.976213172867825e-06, "loss": 0.0692, "step": 619300 }, { "epoch": 6.09, "grad_norm": 2.907151699066162, "learning_rate": 1.9760890504135764e-06, "loss": 0.0202, "step": 619325 }, { "epoch": 6.09, "grad_norm": 2.2340705394744873, "learning_rate": 1.9759649279593277e-06, "loss": 0.0562, "step": 619350 }, { "epoch": 6.09, "grad_norm": 2.6079699993133545, "learning_rate": 1.9758408055050793e-06, "loss": 0.0183, "step": 619375 }, { "epoch": 6.09, "grad_norm": 1.0104479789733887, "learning_rate": 1.975716683050831e-06, "loss": 0.081, "step": 619400 }, { "epoch": 6.09, "grad_norm": 3.1643850803375244, "learning_rate": 1.975592560596582e-06, "loss": 0.0323, "step": 619425 }, { "epoch": 6.09, "grad_norm": 3.6535017490386963, "learning_rate": 1.9754684381423338e-06, "loss": 0.0603, "step": 619450 }, { "epoch": 6.09, "grad_norm": 9.523959159851074, "learning_rate": 1.9753443156880854e-06, "loss": 0.0304, "step": 619475 }, { "epoch": 6.09, "grad_norm": 2.628319025039673, "learning_rate": 1.975220193233837e-06, "loss": 0.0626, "step": 619500 }, { "epoch": 6.09, "grad_norm": 3.35404896736145, "learning_rate": 1.9750960707795887e-06, "loss": 0.0167, "step": 619525 }, { "epoch": 6.09, "grad_norm": 1.025381326675415, "learning_rate": 1.97497194832534e-06, "loss": 0.0949, "step": 619550 }, { "epoch": 6.09, "grad_norm": 0.16148284077644348, "learning_rate": 1.9748478258710915e-06, "loss": 0.0294, "step": 619575 }, { "epoch": 6.09, "grad_norm": 7.26089334487915, "learning_rate": 1.974723703416843e-06, "loss": 0.0588, "step": 619600 }, { "epoch": 6.09, "grad_norm": 4.685480117797852, "learning_rate": 1.9745995809625944e-06, "loss": 0.0222, "step": 619625 }, { "epoch": 6.09, "grad_norm": 2.7980287075042725, "learning_rate": 1.974475458508346e-06, "loss": 0.0718, "step": 619650 }, { "epoch": 6.09, "grad_norm": 0.49718981981277466, "learning_rate": 1.9743513360540976e-06, "loss": 0.0221, "step": 619675 }, { "epoch": 6.09, "grad_norm": 4.386083602905273, "learning_rate": 1.9742272135998493e-06, "loss": 0.0677, "step": 619700 }, { "epoch": 6.09, "grad_norm": 2.4032845497131348, "learning_rate": 1.974103091145601e-06, "loss": 0.0172, "step": 619725 }, { "epoch": 6.09, "grad_norm": 0.8443692922592163, "learning_rate": 1.9739789686913525e-06, "loss": 0.06, "step": 619750 }, { "epoch": 6.09, "grad_norm": 6.252532005310059, "learning_rate": 1.9738548462371038e-06, "loss": 0.0264, "step": 619775 }, { "epoch": 6.09, "grad_norm": 1.850817084312439, "learning_rate": 1.9737307237828554e-06, "loss": 0.0489, "step": 619800 }, { "epoch": 6.09, "grad_norm": 4.927425861358643, "learning_rate": 1.973606601328607e-06, "loss": 0.0196, "step": 619825 }, { "epoch": 6.09, "grad_norm": 1.493181586265564, "learning_rate": 1.9734824788743582e-06, "loss": 0.0995, "step": 619850 }, { "epoch": 6.09, "grad_norm": 7.8182806968688965, "learning_rate": 1.97335835642011e-06, "loss": 0.0203, "step": 619875 }, { "epoch": 6.09, "grad_norm": 1.4922263622283936, "learning_rate": 1.9732342339658615e-06, "loss": 0.0831, "step": 619900 }, { "epoch": 6.1, "grad_norm": 3.900688648223877, "learning_rate": 1.973110111511613e-06, "loss": 0.0444, "step": 619925 }, { "epoch": 6.1, "grad_norm": 3.865307092666626, "learning_rate": 1.9729859890573648e-06, "loss": 0.0832, "step": 619950 }, { "epoch": 6.1, "grad_norm": 6.421402931213379, "learning_rate": 1.972861866603116e-06, "loss": 0.0132, "step": 619975 }, { "epoch": 6.1, "grad_norm": 4.74878454208374, "learning_rate": 1.9727377441488676e-06, "loss": 0.0907, "step": 620000 }, { "epoch": 6.1, "eval_loss": 0.8151510953903198, "eval_runtime": 6091.1557, "eval_samples_per_second": 1.554, "eval_steps_per_second": 0.194, "eval_wer": 0.11470739715444538, "step": 620000 }, { "epoch": 6.1, "grad_norm": 12.560850143432617, "learning_rate": 1.9726136216946193e-06, "loss": 0.033, "step": 620025 }, { "epoch": 6.1, "grad_norm": 3.8079752922058105, "learning_rate": 1.9724894992403705e-06, "loss": 0.0629, "step": 620050 }, { "epoch": 6.1, "grad_norm": 6.914117813110352, "learning_rate": 1.972365376786122e-06, "loss": 0.025, "step": 620075 }, { "epoch": 6.1, "grad_norm": 7.349538326263428, "learning_rate": 1.9722412543318737e-06, "loss": 0.0861, "step": 620100 }, { "epoch": 6.1, "grad_norm": 9.70942497253418, "learning_rate": 1.9721171318776254e-06, "loss": 0.04, "step": 620125 }, { "epoch": 6.1, "grad_norm": 5.3947224617004395, "learning_rate": 1.971993009423377e-06, "loss": 0.0808, "step": 620150 }, { "epoch": 6.1, "grad_norm": 11.915226936340332, "learning_rate": 1.9718688869691286e-06, "loss": 0.0273, "step": 620175 }, { "epoch": 6.1, "grad_norm": 0.3716924786567688, "learning_rate": 1.97174476451488e-06, "loss": 0.0582, "step": 620200 }, { "epoch": 6.1, "grad_norm": 4.982781887054443, "learning_rate": 1.9716206420606315e-06, "loss": 0.0262, "step": 620225 }, { "epoch": 6.1, "grad_norm": 0.1457076221704483, "learning_rate": 1.971496519606383e-06, "loss": 0.0979, "step": 620250 }, { "epoch": 6.1, "grad_norm": 3.9251022338867188, "learning_rate": 1.9713723971521343e-06, "loss": 0.0189, "step": 620275 }, { "epoch": 6.1, "grad_norm": 0.22464461624622345, "learning_rate": 1.971248274697886e-06, "loss": 0.0751, "step": 620300 }, { "epoch": 6.1, "grad_norm": 0.07297424226999283, "learning_rate": 1.9711241522436376e-06, "loss": 0.0294, "step": 620325 }, { "epoch": 6.1, "grad_norm": 3.3960423469543457, "learning_rate": 1.9710000297893892e-06, "loss": 0.0867, "step": 620350 }, { "epoch": 6.1, "grad_norm": 7.309767246246338, "learning_rate": 1.970875907335141e-06, "loss": 0.0214, "step": 620375 }, { "epoch": 6.1, "grad_norm": 0.0511779710650444, "learning_rate": 1.970751784880892e-06, "loss": 0.0549, "step": 620400 }, { "epoch": 6.1, "grad_norm": 5.970822811126709, "learning_rate": 1.9706276624266437e-06, "loss": 0.0142, "step": 620425 }, { "epoch": 6.1, "grad_norm": 5.0648932456970215, "learning_rate": 1.9705035399723954e-06, "loss": 0.1089, "step": 620450 }, { "epoch": 6.1, "grad_norm": 2.4587957859039307, "learning_rate": 1.9703794175181466e-06, "loss": 0.024, "step": 620475 }, { "epoch": 6.1, "grad_norm": 2.0449914932250977, "learning_rate": 1.970255295063898e-06, "loss": 0.0556, "step": 620500 }, { "epoch": 6.1, "grad_norm": 8.54996109008789, "learning_rate": 1.97013117260965e-06, "loss": 0.0302, "step": 620525 }, { "epoch": 6.1, "grad_norm": 2.8631319999694824, "learning_rate": 1.9700070501554015e-06, "loss": 0.0721, "step": 620550 }, { "epoch": 6.1, "grad_norm": 10.84986686706543, "learning_rate": 1.969882927701153e-06, "loss": 0.0154, "step": 620575 }, { "epoch": 6.1, "grad_norm": 1.833173394203186, "learning_rate": 1.9697588052469047e-06, "loss": 0.082, "step": 620600 }, { "epoch": 6.1, "grad_norm": 4.010613918304443, "learning_rate": 1.969634682792656e-06, "loss": 0.0106, "step": 620625 }, { "epoch": 6.1, "grad_norm": 2.7947542667388916, "learning_rate": 1.9695105603384076e-06, "loss": 0.0977, "step": 620650 }, { "epoch": 6.1, "grad_norm": 10.183749198913574, "learning_rate": 1.9693864378841592e-06, "loss": 0.0163, "step": 620675 }, { "epoch": 6.1, "grad_norm": 3.45289945602417, "learning_rate": 1.9692623154299104e-06, "loss": 0.0535, "step": 620700 }, { "epoch": 6.1, "grad_norm": 4.644749641418457, "learning_rate": 1.969138192975662e-06, "loss": 0.0124, "step": 620725 }, { "epoch": 6.1, "grad_norm": 1.5525084733963013, "learning_rate": 1.9690140705214137e-06, "loss": 0.0549, "step": 620750 }, { "epoch": 6.1, "grad_norm": 6.562616348266602, "learning_rate": 1.9688899480671653e-06, "loss": 0.0273, "step": 620775 }, { "epoch": 6.1, "grad_norm": 1.226812720298767, "learning_rate": 1.968765825612917e-06, "loss": 0.0579, "step": 620800 }, { "epoch": 6.1, "grad_norm": 4.189621448516846, "learning_rate": 1.968641703158668e-06, "loss": 0.0217, "step": 620825 }, { "epoch": 6.1, "grad_norm": 0.2128704935312271, "learning_rate": 1.96851758070442e-06, "loss": 0.0607, "step": 620850 }, { "epoch": 6.1, "grad_norm": 2.4203755855560303, "learning_rate": 1.9683934582501715e-06, "loss": 0.0258, "step": 620875 }, { "epoch": 6.1, "grad_norm": 6.650636196136475, "learning_rate": 1.968269335795923e-06, "loss": 0.0831, "step": 620900 }, { "epoch": 6.11, "grad_norm": 5.287735462188721, "learning_rate": 1.9681452133416747e-06, "loss": 0.0264, "step": 620925 }, { "epoch": 6.11, "grad_norm": 0.054864201694726944, "learning_rate": 1.9680210908874264e-06, "loss": 0.0694, "step": 620950 }, { "epoch": 6.11, "grad_norm": 2.308039426803589, "learning_rate": 1.9678969684331776e-06, "loss": 0.0133, "step": 620975 }, { "epoch": 6.11, "grad_norm": 5.254667282104492, "learning_rate": 1.967772845978929e-06, "loss": 0.0873, "step": 621000 }, { "epoch": 6.11, "grad_norm": 0.8936827778816223, "learning_rate": 1.967648723524681e-06, "loss": 0.0116, "step": 621025 }, { "epoch": 6.11, "grad_norm": 4.916175365447998, "learning_rate": 1.967524601070432e-06, "loss": 0.0783, "step": 621050 }, { "epoch": 6.11, "grad_norm": 9.71308422088623, "learning_rate": 1.9674004786161837e-06, "loss": 0.0345, "step": 621075 }, { "epoch": 6.11, "grad_norm": 4.560272693634033, "learning_rate": 1.9672763561619353e-06, "loss": 0.0648, "step": 621100 }, { "epoch": 6.11, "grad_norm": 5.534503936767578, "learning_rate": 1.967152233707687e-06, "loss": 0.0221, "step": 621125 }, { "epoch": 6.11, "grad_norm": 10.223312377929688, "learning_rate": 1.9670281112534386e-06, "loss": 0.0701, "step": 621150 }, { "epoch": 6.11, "grad_norm": 3.870030164718628, "learning_rate": 1.96690398879919e-06, "loss": 0.0169, "step": 621175 }, { "epoch": 6.11, "grad_norm": 1.036674976348877, "learning_rate": 1.9667798663449414e-06, "loss": 0.0925, "step": 621200 }, { "epoch": 6.11, "grad_norm": 8.858999252319336, "learning_rate": 1.966655743890693e-06, "loss": 0.029, "step": 621225 }, { "epoch": 6.11, "grad_norm": 1.3422867059707642, "learning_rate": 1.9665316214364443e-06, "loss": 0.0908, "step": 621250 }, { "epoch": 6.11, "grad_norm": 6.2675580978393555, "learning_rate": 1.966407498982196e-06, "loss": 0.0216, "step": 621275 }, { "epoch": 6.11, "grad_norm": 2.9130871295928955, "learning_rate": 1.9662833765279476e-06, "loss": 0.0856, "step": 621300 }, { "epoch": 6.11, "grad_norm": 0.7209635376930237, "learning_rate": 1.966159254073699e-06, "loss": 0.0172, "step": 621325 }, { "epoch": 6.11, "grad_norm": 1.5464730262756348, "learning_rate": 1.966035131619451e-06, "loss": 0.0644, "step": 621350 }, { "epoch": 6.11, "grad_norm": 12.419205665588379, "learning_rate": 1.9659110091652025e-06, "loss": 0.0339, "step": 621375 }, { "epoch": 6.11, "grad_norm": 5.099044322967529, "learning_rate": 1.9657868867109537e-06, "loss": 0.0599, "step": 621400 }, { "epoch": 6.11, "grad_norm": 13.098167419433594, "learning_rate": 1.9656627642567053e-06, "loss": 0.0349, "step": 621425 }, { "epoch": 6.11, "grad_norm": 3.469970703125, "learning_rate": 1.965538641802457e-06, "loss": 0.0997, "step": 621450 }, { "epoch": 6.11, "grad_norm": 6.859419822692871, "learning_rate": 1.965414519348208e-06, "loss": 0.0226, "step": 621475 }, { "epoch": 6.11, "grad_norm": 3.104811429977417, "learning_rate": 1.96529039689396e-06, "loss": 0.0727, "step": 621500 }, { "epoch": 6.11, "grad_norm": 19.63581085205078, "learning_rate": 1.9651662744397114e-06, "loss": 0.0331, "step": 621525 }, { "epoch": 6.11, "grad_norm": 0.6409021615982056, "learning_rate": 1.965042151985463e-06, "loss": 0.0719, "step": 621550 }, { "epoch": 6.11, "grad_norm": 32.071468353271484, "learning_rate": 1.9649180295312147e-06, "loss": 0.0315, "step": 621575 }, { "epoch": 6.11, "grad_norm": 1.8711093664169312, "learning_rate": 1.964793907076966e-06, "loss": 0.084, "step": 621600 }, { "epoch": 6.11, "grad_norm": 5.422794818878174, "learning_rate": 1.9646697846227175e-06, "loss": 0.0219, "step": 621625 }, { "epoch": 6.11, "grad_norm": 3.000852346420288, "learning_rate": 1.964545662168469e-06, "loss": 0.0807, "step": 621650 }, { "epoch": 6.11, "grad_norm": 10.243616104125977, "learning_rate": 1.9644215397142204e-06, "loss": 0.0235, "step": 621675 }, { "epoch": 6.11, "grad_norm": 0.7044113278388977, "learning_rate": 1.964297417259972e-06, "loss": 0.0604, "step": 621700 }, { "epoch": 6.11, "grad_norm": 2.943356990814209, "learning_rate": 1.9641732948057237e-06, "loss": 0.0175, "step": 621725 }, { "epoch": 6.11, "grad_norm": 1.2075698375701904, "learning_rate": 1.9640491723514753e-06, "loss": 0.0646, "step": 621750 }, { "epoch": 6.11, "grad_norm": 12.547981262207031, "learning_rate": 1.963925049897227e-06, "loss": 0.0215, "step": 621775 }, { "epoch": 6.11, "grad_norm": 0.07030917704105377, "learning_rate": 1.9638009274429786e-06, "loss": 0.0549, "step": 621800 }, { "epoch": 6.11, "grad_norm": 4.60889196395874, "learning_rate": 1.9636768049887298e-06, "loss": 0.0158, "step": 621825 }, { "epoch": 6.11, "grad_norm": 0.1714032143354416, "learning_rate": 1.9635526825344814e-06, "loss": 0.0798, "step": 621850 }, { "epoch": 6.11, "grad_norm": 11.209778785705566, "learning_rate": 1.963428560080233e-06, "loss": 0.0252, "step": 621875 }, { "epoch": 6.11, "grad_norm": 2.6720046997070312, "learning_rate": 1.9633044376259843e-06, "loss": 0.0728, "step": 621900 }, { "epoch": 6.11, "grad_norm": 2.8814949989318848, "learning_rate": 1.963180315171736e-06, "loss": 0.0178, "step": 621925 }, { "epoch": 6.12, "grad_norm": 3.8710930347442627, "learning_rate": 1.9630561927174875e-06, "loss": 0.069, "step": 621950 }, { "epoch": 6.12, "grad_norm": 6.101563930511475, "learning_rate": 1.962932070263239e-06, "loss": 0.0415, "step": 621975 }, { "epoch": 6.12, "grad_norm": 8.859846115112305, "learning_rate": 1.962807947808991e-06, "loss": 0.093, "step": 622000 }, { "epoch": 6.12, "grad_norm": 7.848760604858398, "learning_rate": 1.962683825354742e-06, "loss": 0.031, "step": 622025 }, { "epoch": 6.12, "grad_norm": 0.6614089608192444, "learning_rate": 1.9625597029004936e-06, "loss": 0.0766, "step": 622050 }, { "epoch": 6.12, "grad_norm": 5.185592174530029, "learning_rate": 1.9624355804462453e-06, "loss": 0.0144, "step": 622075 }, { "epoch": 6.12, "grad_norm": 4.899442195892334, "learning_rate": 1.9623114579919965e-06, "loss": 0.0569, "step": 622100 }, { "epoch": 6.12, "grad_norm": 5.445735931396484, "learning_rate": 1.962187335537748e-06, "loss": 0.0316, "step": 622125 }, { "epoch": 6.12, "grad_norm": 5.172496795654297, "learning_rate": 1.9620632130834998e-06, "loss": 0.0747, "step": 622150 }, { "epoch": 6.12, "grad_norm": 1.988784909248352, "learning_rate": 1.9619390906292514e-06, "loss": 0.021, "step": 622175 }, { "epoch": 6.12, "grad_norm": 0.19932089745998383, "learning_rate": 1.961819933073173e-06, "loss": 0.0744, "step": 622200 }, { "epoch": 6.12, "grad_norm": 4.69207763671875, "learning_rate": 1.9616958106189245e-06, "loss": 0.0312, "step": 622225 }, { "epoch": 6.12, "grad_norm": 0.8866084814071655, "learning_rate": 1.961571688164676e-06, "loss": 0.0634, "step": 622250 }, { "epoch": 6.12, "grad_norm": 8.76425838470459, "learning_rate": 1.9614475657104273e-06, "loss": 0.0235, "step": 622275 }, { "epoch": 6.12, "grad_norm": 4.770357131958008, "learning_rate": 1.961323443256179e-06, "loss": 0.0625, "step": 622300 }, { "epoch": 6.12, "grad_norm": 13.367816925048828, "learning_rate": 1.9611993208019306e-06, "loss": 0.0202, "step": 622325 }, { "epoch": 6.12, "grad_norm": 2.303895950317383, "learning_rate": 1.961075198347682e-06, "loss": 0.0673, "step": 622350 }, { "epoch": 6.12, "grad_norm": 4.708495616912842, "learning_rate": 1.9609510758934334e-06, "loss": 0.0262, "step": 622375 }, { "epoch": 6.12, "grad_norm": 4.322575092315674, "learning_rate": 1.960826953439185e-06, "loss": 0.0602, "step": 622400 }, { "epoch": 6.12, "grad_norm": 0.953647255897522, "learning_rate": 1.9607028309849367e-06, "loss": 0.0207, "step": 622425 }, { "epoch": 6.12, "grad_norm": 1.5679049491882324, "learning_rate": 1.9605787085306883e-06, "loss": 0.0474, "step": 622450 }, { "epoch": 6.12, "grad_norm": 1.1940288543701172, "learning_rate": 1.96045458607644e-06, "loss": 0.0123, "step": 622475 }, { "epoch": 6.12, "grad_norm": 2.530956983566284, "learning_rate": 1.960330463622191e-06, "loss": 0.0594, "step": 622500 }, { "epoch": 6.12, "grad_norm": 13.540185928344727, "learning_rate": 1.960206341167943e-06, "loss": 0.0194, "step": 622525 }, { "epoch": 6.12, "grad_norm": 5.593521595001221, "learning_rate": 1.9600822187136945e-06, "loss": 0.0612, "step": 622550 }, { "epoch": 6.12, "grad_norm": 9.404718399047852, "learning_rate": 1.9599580962594457e-06, "loss": 0.0237, "step": 622575 }, { "epoch": 6.12, "grad_norm": 8.768081665039062, "learning_rate": 1.9598339738051973e-06, "loss": 0.0702, "step": 622600 }, { "epoch": 6.12, "grad_norm": 0.5867996215820312, "learning_rate": 1.959709851350949e-06, "loss": 0.0126, "step": 622625 }, { "epoch": 6.12, "grad_norm": 4.9628005027771, "learning_rate": 1.9595857288967006e-06, "loss": 0.0858, "step": 622650 }, { "epoch": 6.12, "grad_norm": 13.76004695892334, "learning_rate": 1.9594616064424522e-06, "loss": 0.0205, "step": 622675 }, { "epoch": 6.12, "grad_norm": 1.5018116235733032, "learning_rate": 1.9593374839882034e-06, "loss": 0.0772, "step": 622700 }, { "epoch": 6.12, "grad_norm": 7.957839012145996, "learning_rate": 1.959213361533955e-06, "loss": 0.0219, "step": 622725 }, { "epoch": 6.12, "grad_norm": 1.7081875801086426, "learning_rate": 1.9590892390797067e-06, "loss": 0.0715, "step": 622750 }, { "epoch": 6.12, "grad_norm": 6.922004699707031, "learning_rate": 1.958965116625458e-06, "loss": 0.0209, "step": 622775 }, { "epoch": 6.12, "grad_norm": 1.126718521118164, "learning_rate": 1.9588409941712095e-06, "loss": 0.0879, "step": 622800 }, { "epoch": 6.12, "grad_norm": 6.518133640289307, "learning_rate": 1.958716871716961e-06, "loss": 0.0225, "step": 622825 }, { "epoch": 6.12, "grad_norm": 7.370561599731445, "learning_rate": 1.958592749262713e-06, "loss": 0.0953, "step": 622850 }, { "epoch": 6.12, "grad_norm": 10.37374496459961, "learning_rate": 1.9584686268084644e-06, "loss": 0.0202, "step": 622875 }, { "epoch": 6.12, "grad_norm": 0.7806327939033508, "learning_rate": 1.958344504354216e-06, "loss": 0.0991, "step": 622900 }, { "epoch": 6.12, "grad_norm": 12.598448753356934, "learning_rate": 1.9582203818999673e-06, "loss": 0.0324, "step": 622925 }, { "epoch": 6.12, "grad_norm": 3.102358818054199, "learning_rate": 1.958096259445719e-06, "loss": 0.0875, "step": 622950 }, { "epoch": 6.13, "grad_norm": 6.476619243621826, "learning_rate": 1.9579721369914706e-06, "loss": 0.0255, "step": 622975 }, { "epoch": 6.13, "grad_norm": 2.459975004196167, "learning_rate": 1.9578480145372218e-06, "loss": 0.0639, "step": 623000 }, { "epoch": 6.13, "grad_norm": 10.494465827941895, "learning_rate": 1.9577238920829734e-06, "loss": 0.0236, "step": 623025 }, { "epoch": 6.13, "grad_norm": 7.260723114013672, "learning_rate": 1.957599769628725e-06, "loss": 0.0784, "step": 623050 }, { "epoch": 6.13, "grad_norm": 5.140078067779541, "learning_rate": 1.9574756471744767e-06, "loss": 0.0223, "step": 623075 }, { "epoch": 6.13, "grad_norm": 7.154035568237305, "learning_rate": 1.9573515247202283e-06, "loss": 0.0675, "step": 623100 }, { "epoch": 6.13, "grad_norm": 4.5188093185424805, "learning_rate": 1.9572274022659795e-06, "loss": 0.0251, "step": 623125 }, { "epoch": 6.13, "grad_norm": 5.528555870056152, "learning_rate": 1.957103279811731e-06, "loss": 0.0792, "step": 623150 }, { "epoch": 6.13, "grad_norm": 5.610260009765625, "learning_rate": 1.956979157357483e-06, "loss": 0.022, "step": 623175 }, { "epoch": 6.13, "grad_norm": 4.904993534088135, "learning_rate": 1.956855034903234e-06, "loss": 0.1035, "step": 623200 }, { "epoch": 6.13, "grad_norm": 9.745131492614746, "learning_rate": 1.9567309124489856e-06, "loss": 0.0188, "step": 623225 }, { "epoch": 6.13, "grad_norm": 1.8175595998764038, "learning_rate": 1.9566067899947373e-06, "loss": 0.0664, "step": 623250 }, { "epoch": 6.13, "grad_norm": 0.7081828117370605, "learning_rate": 1.956482667540489e-06, "loss": 0.0204, "step": 623275 }, { "epoch": 6.13, "grad_norm": 3.5513200759887695, "learning_rate": 1.9563585450862405e-06, "loss": 0.0797, "step": 623300 }, { "epoch": 6.13, "grad_norm": 6.913177490234375, "learning_rate": 1.956234422631992e-06, "loss": 0.0203, "step": 623325 }, { "epoch": 6.13, "grad_norm": 2.836738348007202, "learning_rate": 1.9561103001777434e-06, "loss": 0.087, "step": 623350 }, { "epoch": 6.13, "grad_norm": 9.22732162475586, "learning_rate": 1.955986177723495e-06, "loss": 0.0184, "step": 623375 }, { "epoch": 6.13, "grad_norm": 2.0232436656951904, "learning_rate": 1.9558620552692467e-06, "loss": 0.0568, "step": 623400 }, { "epoch": 6.13, "grad_norm": 0.23420317471027374, "learning_rate": 1.955737932814998e-06, "loss": 0.0154, "step": 623425 }, { "epoch": 6.13, "grad_norm": 2.062279224395752, "learning_rate": 1.9556138103607495e-06, "loss": 0.0817, "step": 623450 }, { "epoch": 6.13, "grad_norm": 8.102754592895508, "learning_rate": 1.955489687906501e-06, "loss": 0.0266, "step": 623475 }, { "epoch": 6.13, "grad_norm": 3.1809377670288086, "learning_rate": 1.9553655654522528e-06, "loss": 0.0801, "step": 623500 }, { "epoch": 6.13, "grad_norm": 12.163457870483398, "learning_rate": 1.9552414429980044e-06, "loss": 0.0227, "step": 623525 }, { "epoch": 6.13, "grad_norm": 1.3680771589279175, "learning_rate": 1.9551173205437556e-06, "loss": 0.0831, "step": 623550 }, { "epoch": 6.13, "grad_norm": 12.001426696777344, "learning_rate": 1.9549931980895073e-06, "loss": 0.0243, "step": 623575 }, { "epoch": 6.13, "grad_norm": 3.5262980461120605, "learning_rate": 1.954869075635259e-06, "loss": 0.1033, "step": 623600 }, { "epoch": 6.13, "grad_norm": 13.089552879333496, "learning_rate": 1.95474495318101e-06, "loss": 0.0388, "step": 623625 }, { "epoch": 6.13, "grad_norm": 1.2738144397735596, "learning_rate": 1.9546208307267617e-06, "loss": 0.0819, "step": 623650 }, { "epoch": 6.13, "grad_norm": 14.153951644897461, "learning_rate": 1.9544967082725134e-06, "loss": 0.0259, "step": 623675 }, { "epoch": 6.13, "grad_norm": 1.263067364692688, "learning_rate": 1.954372585818265e-06, "loss": 0.0757, "step": 623700 }, { "epoch": 6.13, "grad_norm": 8.686616897583008, "learning_rate": 1.9542484633640166e-06, "loss": 0.0203, "step": 623725 }, { "epoch": 6.13, "grad_norm": 3.273693799972534, "learning_rate": 1.9541243409097683e-06, "loss": 0.0955, "step": 623750 }, { "epoch": 6.13, "grad_norm": 2.8757107257843018, "learning_rate": 1.9540002184555195e-06, "loss": 0.0163, "step": 623775 }, { "epoch": 6.13, "grad_norm": 0.39000073075294495, "learning_rate": 1.953876096001271e-06, "loss": 0.0761, "step": 623800 }, { "epoch": 6.13, "grad_norm": 8.493856430053711, "learning_rate": 1.9537519735470228e-06, "loss": 0.0171, "step": 623825 }, { "epoch": 6.13, "grad_norm": 1.9618940353393555, "learning_rate": 1.9536278510927744e-06, "loss": 0.0688, "step": 623850 }, { "epoch": 6.13, "grad_norm": 5.339994430541992, "learning_rate": 1.953503728638526e-06, "loss": 0.034, "step": 623875 }, { "epoch": 6.13, "grad_norm": 5.34360408782959, "learning_rate": 1.9533796061842772e-06, "loss": 0.0783, "step": 623900 }, { "epoch": 6.13, "grad_norm": 3.2738869190216064, "learning_rate": 1.953255483730029e-06, "loss": 0.0154, "step": 623925 }, { "epoch": 6.13, "grad_norm": 7.3796586990356445, "learning_rate": 1.9531313612757805e-06, "loss": 0.0685, "step": 623950 }, { "epoch": 6.14, "grad_norm": 7.104101181030273, "learning_rate": 1.9530072388215317e-06, "loss": 0.0279, "step": 623975 }, { "epoch": 6.14, "grad_norm": 1.1620144844055176, "learning_rate": 1.9528831163672834e-06, "loss": 0.067, "step": 624000 }, { "epoch": 6.14, "grad_norm": 6.2252631187438965, "learning_rate": 1.952758993913035e-06, "loss": 0.0256, "step": 624025 }, { "epoch": 6.14, "grad_norm": 7.796166896820068, "learning_rate": 1.9526348714587866e-06, "loss": 0.0608, "step": 624050 }, { "epoch": 6.14, "grad_norm": 4.810455799102783, "learning_rate": 1.9525107490045383e-06, "loss": 0.023, "step": 624075 }, { "epoch": 6.14, "grad_norm": 3.3608903884887695, "learning_rate": 1.95238662655029e-06, "loss": 0.0602, "step": 624100 }, { "epoch": 6.14, "grad_norm": 9.514087677001953, "learning_rate": 1.952262504096041e-06, "loss": 0.0423, "step": 624125 }, { "epoch": 6.14, "grad_norm": 2.016812801361084, "learning_rate": 1.9521383816417927e-06, "loss": 0.0754, "step": 624150 }, { "epoch": 6.14, "grad_norm": 2.7370731830596924, "learning_rate": 1.9520142591875444e-06, "loss": 0.029, "step": 624175 }, { "epoch": 6.14, "grad_norm": 1.5589228868484497, "learning_rate": 1.9518901367332956e-06, "loss": 0.0728, "step": 624200 }, { "epoch": 6.14, "grad_norm": 8.0449800491333, "learning_rate": 1.9517660142790472e-06, "loss": 0.0149, "step": 624225 }, { "epoch": 6.14, "grad_norm": 1.9616438150405884, "learning_rate": 1.951641891824799e-06, "loss": 0.0672, "step": 624250 }, { "epoch": 6.14, "grad_norm": 6.485616683959961, "learning_rate": 1.9515177693705505e-06, "loss": 0.0278, "step": 624275 }, { "epoch": 6.14, "grad_norm": 0.4143863022327423, "learning_rate": 1.951393646916302e-06, "loss": 0.0696, "step": 624300 }, { "epoch": 6.14, "grad_norm": 3.8154866695404053, "learning_rate": 1.9512695244620533e-06, "loss": 0.0302, "step": 624325 }, { "epoch": 6.14, "grad_norm": 2.5544369220733643, "learning_rate": 1.951145402007805e-06, "loss": 0.0609, "step": 624350 }, { "epoch": 6.14, "grad_norm": 7.320345878601074, "learning_rate": 1.9510212795535566e-06, "loss": 0.0201, "step": 624375 }, { "epoch": 6.14, "grad_norm": 0.9484624266624451, "learning_rate": 1.950902121997478e-06, "loss": 0.0531, "step": 624400 }, { "epoch": 6.14, "grad_norm": 1.176392912864685, "learning_rate": 1.9507779995432297e-06, "loss": 0.019, "step": 624425 }, { "epoch": 6.14, "grad_norm": 8.64206600189209, "learning_rate": 1.950653877088981e-06, "loss": 0.0658, "step": 624450 }, { "epoch": 6.14, "grad_norm": 10.920034408569336, "learning_rate": 1.9505297546347325e-06, "loss": 0.0282, "step": 624475 }, { "epoch": 6.14, "grad_norm": 1.9952000379562378, "learning_rate": 1.950405632180484e-06, "loss": 0.0658, "step": 624500 }, { "epoch": 6.14, "grad_norm": 1.0490939617156982, "learning_rate": 1.950281509726236e-06, "loss": 0.0222, "step": 624525 }, { "epoch": 6.14, "grad_norm": 2.615724563598633, "learning_rate": 1.9501573872719875e-06, "loss": 0.0586, "step": 624550 }, { "epoch": 6.14, "grad_norm": 0.9580134153366089, "learning_rate": 1.9500332648177387e-06, "loss": 0.0254, "step": 624575 }, { "epoch": 6.14, "grad_norm": 5.542570114135742, "learning_rate": 1.9499091423634903e-06, "loss": 0.0946, "step": 624600 }, { "epoch": 6.14, "grad_norm": 9.138680458068848, "learning_rate": 1.949785019909242e-06, "loss": 0.0354, "step": 624625 }, { "epoch": 6.14, "grad_norm": 3.677539587020874, "learning_rate": 1.949660897454993e-06, "loss": 0.0714, "step": 624650 }, { "epoch": 6.14, "grad_norm": 7.610756874084473, "learning_rate": 1.9495367750007448e-06, "loss": 0.0251, "step": 624675 }, { "epoch": 6.14, "grad_norm": 3.9740169048309326, "learning_rate": 1.9494126525464964e-06, "loss": 0.1014, "step": 624700 }, { "epoch": 6.14, "grad_norm": 3.747382164001465, "learning_rate": 1.949288530092248e-06, "loss": 0.0237, "step": 624725 }, { "epoch": 6.14, "grad_norm": 2.73696231842041, "learning_rate": 1.9491644076379997e-06, "loss": 0.0545, "step": 624750 }, { "epoch": 6.14, "grad_norm": 7.168440818786621, "learning_rate": 1.9490402851837513e-06, "loss": 0.0257, "step": 624775 }, { "epoch": 6.14, "grad_norm": 3.1201975345611572, "learning_rate": 1.9489161627295025e-06, "loss": 0.0779, "step": 624800 }, { "epoch": 6.14, "grad_norm": 6.407354354858398, "learning_rate": 1.948792040275254e-06, "loss": 0.0226, "step": 624825 }, { "epoch": 6.14, "grad_norm": 3.2148549556732178, "learning_rate": 1.948667917821006e-06, "loss": 0.0745, "step": 624850 }, { "epoch": 6.14, "grad_norm": 5.607579231262207, "learning_rate": 1.948543795366757e-06, "loss": 0.0177, "step": 624875 }, { "epoch": 6.14, "grad_norm": 4.591522216796875, "learning_rate": 1.9484196729125086e-06, "loss": 0.07, "step": 624900 }, { "epoch": 6.14, "grad_norm": 14.764464378356934, "learning_rate": 1.9482955504582603e-06, "loss": 0.0254, "step": 624925 }, { "epoch": 6.14, "grad_norm": 0.3979169428348541, "learning_rate": 1.948171428004012e-06, "loss": 0.0765, "step": 624950 }, { "epoch": 6.14, "grad_norm": 2.8014512062072754, "learning_rate": 1.9480473055497636e-06, "loss": 0.0155, "step": 624975 }, { "epoch": 6.15, "grad_norm": 2.631648540496826, "learning_rate": 1.9479231830955148e-06, "loss": 0.093, "step": 625000 }, { "epoch": 6.15, "grad_norm": 3.7525932788848877, "learning_rate": 1.9477990606412664e-06, "loss": 0.0262, "step": 625025 }, { "epoch": 6.15, "grad_norm": 2.042160749435425, "learning_rate": 1.947674938187018e-06, "loss": 0.0692, "step": 625050 }, { "epoch": 6.15, "grad_norm": 6.1457014083862305, "learning_rate": 1.9475508157327692e-06, "loss": 0.0199, "step": 625075 }, { "epoch": 6.15, "grad_norm": 0.22131460905075073, "learning_rate": 1.947426693278521e-06, "loss": 0.0865, "step": 625100 }, { "epoch": 6.15, "grad_norm": 10.516215324401855, "learning_rate": 1.9473025708242725e-06, "loss": 0.0202, "step": 625125 }, { "epoch": 6.15, "grad_norm": 3.6568803787231445, "learning_rate": 1.947178448370024e-06, "loss": 0.0588, "step": 625150 }, { "epoch": 6.15, "grad_norm": 2.7310791015625, "learning_rate": 1.9470543259157758e-06, "loss": 0.0185, "step": 625175 }, { "epoch": 6.15, "grad_norm": 0.8418000340461731, "learning_rate": 1.9469302034615274e-06, "loss": 0.0906, "step": 625200 }, { "epoch": 6.15, "grad_norm": 1.6233402490615845, "learning_rate": 1.9468060810072786e-06, "loss": 0.026, "step": 625225 }, { "epoch": 6.15, "grad_norm": 4.851064205169678, "learning_rate": 1.9466819585530303e-06, "loss": 0.0596, "step": 625250 }, { "epoch": 6.15, "grad_norm": 8.495920181274414, "learning_rate": 1.946557836098782e-06, "loss": 0.0185, "step": 625275 }, { "epoch": 6.15, "grad_norm": 1.17899489402771, "learning_rate": 1.946433713644533e-06, "loss": 0.0614, "step": 625300 }, { "epoch": 6.15, "grad_norm": 4.939852714538574, "learning_rate": 1.9463095911902847e-06, "loss": 0.0186, "step": 625325 }, { "epoch": 6.15, "grad_norm": 6.161129474639893, "learning_rate": 1.9461854687360364e-06, "loss": 0.074, "step": 625350 }, { "epoch": 6.15, "grad_norm": 13.907381057739258, "learning_rate": 1.946061346281788e-06, "loss": 0.0267, "step": 625375 }, { "epoch": 6.15, "grad_norm": 5.409311294555664, "learning_rate": 1.9459372238275397e-06, "loss": 0.0793, "step": 625400 }, { "epoch": 6.15, "grad_norm": 4.41512393951416, "learning_rate": 1.945813101373291e-06, "loss": 0.0247, "step": 625425 }, { "epoch": 6.15, "grad_norm": 2.5200116634368896, "learning_rate": 1.9456889789190425e-06, "loss": 0.0604, "step": 625450 }, { "epoch": 6.15, "grad_norm": 1.3472541570663452, "learning_rate": 1.945564856464794e-06, "loss": 0.0229, "step": 625475 }, { "epoch": 6.15, "grad_norm": 4.304481506347656, "learning_rate": 1.9454407340105453e-06, "loss": 0.0681, "step": 625500 }, { "epoch": 6.15, "grad_norm": 0.2779295742511749, "learning_rate": 1.945316611556297e-06, "loss": 0.0193, "step": 625525 }, { "epoch": 6.15, "grad_norm": 0.3671624958515167, "learning_rate": 1.9451924891020486e-06, "loss": 0.0601, "step": 625550 }, { "epoch": 6.15, "grad_norm": 6.601337909698486, "learning_rate": 1.9450683666478003e-06, "loss": 0.0251, "step": 625575 }, { "epoch": 6.15, "grad_norm": 1.1188735961914062, "learning_rate": 1.944944244193552e-06, "loss": 0.0676, "step": 625600 }, { "epoch": 6.15, "grad_norm": 11.408279418945312, "learning_rate": 1.9448201217393035e-06, "loss": 0.0213, "step": 625625 }, { "epoch": 6.15, "grad_norm": 3.0103983879089355, "learning_rate": 1.9446959992850547e-06, "loss": 0.0933, "step": 625650 }, { "epoch": 6.15, "grad_norm": 10.015783309936523, "learning_rate": 1.9445718768308064e-06, "loss": 0.0205, "step": 625675 }, { "epoch": 6.15, "grad_norm": 8.200364112854004, "learning_rate": 1.944447754376558e-06, "loss": 0.0796, "step": 625700 }, { "epoch": 6.15, "grad_norm": 2.9046144485473633, "learning_rate": 1.9443236319223092e-06, "loss": 0.016, "step": 625725 }, { "epoch": 6.15, "grad_norm": 8.182046890258789, "learning_rate": 1.944199509468061e-06, "loss": 0.0799, "step": 625750 }, { "epoch": 6.15, "grad_norm": 10.145477294921875, "learning_rate": 1.9440753870138125e-06, "loss": 0.0176, "step": 625775 }, { "epoch": 6.15, "grad_norm": 2.3973350524902344, "learning_rate": 1.943951264559564e-06, "loss": 0.053, "step": 625800 }, { "epoch": 6.15, "grad_norm": 4.482271671295166, "learning_rate": 1.9438271421053158e-06, "loss": 0.0165, "step": 625825 }, { "epoch": 6.15, "grad_norm": 0.9396129846572876, "learning_rate": 1.943703019651067e-06, "loss": 0.0792, "step": 625850 }, { "epoch": 6.15, "grad_norm": 7.0702290534973145, "learning_rate": 1.9435788971968186e-06, "loss": 0.0137, "step": 625875 }, { "epoch": 6.15, "grad_norm": 0.20244374871253967, "learning_rate": 1.9434547747425702e-06, "loss": 0.0556, "step": 625900 }, { "epoch": 6.15, "grad_norm": 14.643843650817871, "learning_rate": 1.9433306522883214e-06, "loss": 0.0335, "step": 625925 }, { "epoch": 6.15, "grad_norm": 3.7977921962738037, "learning_rate": 1.943206529834073e-06, "loss": 0.0661, "step": 625950 }, { "epoch": 6.15, "grad_norm": 1.3895145654678345, "learning_rate": 1.9430824073798247e-06, "loss": 0.035, "step": 625975 }, { "epoch": 6.15, "grad_norm": 1.6913245916366577, "learning_rate": 1.9429582849255764e-06, "loss": 0.0979, "step": 626000 }, { "epoch": 6.16, "grad_norm": 7.543750286102295, "learning_rate": 1.942834162471328e-06, "loss": 0.0245, "step": 626025 }, { "epoch": 6.16, "grad_norm": 2.0541861057281494, "learning_rate": 1.9427100400170796e-06, "loss": 0.0551, "step": 626050 }, { "epoch": 6.16, "grad_norm": 4.260375022888184, "learning_rate": 1.942585917562831e-06, "loss": 0.0178, "step": 626075 }, { "epoch": 6.16, "grad_norm": 4.460152626037598, "learning_rate": 1.9424617951085825e-06, "loss": 0.0823, "step": 626100 }, { "epoch": 6.16, "grad_norm": 9.723570823669434, "learning_rate": 1.942337672654334e-06, "loss": 0.014, "step": 626125 }, { "epoch": 6.16, "grad_norm": 0.529546320438385, "learning_rate": 1.9422135502000853e-06, "loss": 0.0825, "step": 626150 }, { "epoch": 6.16, "grad_norm": 3.275625705718994, "learning_rate": 1.942089427745837e-06, "loss": 0.0258, "step": 626175 }, { "epoch": 6.16, "grad_norm": 5.432260513305664, "learning_rate": 1.9419653052915886e-06, "loss": 0.0611, "step": 626200 }, { "epoch": 6.16, "grad_norm": 10.422518730163574, "learning_rate": 1.9418411828373402e-06, "loss": 0.0329, "step": 626225 }, { "epoch": 6.16, "grad_norm": 0.012538663111627102, "learning_rate": 1.941717060383092e-06, "loss": 0.0552, "step": 626250 }, { "epoch": 6.16, "grad_norm": 7.862965106964111, "learning_rate": 1.941592937928843e-06, "loss": 0.0182, "step": 626275 }, { "epoch": 6.16, "grad_norm": 2.1336894035339355, "learning_rate": 1.9414688154745947e-06, "loss": 0.0644, "step": 626300 }, { "epoch": 6.16, "grad_norm": 0.10104513168334961, "learning_rate": 1.9413446930203463e-06, "loss": 0.012, "step": 626325 }, { "epoch": 6.16, "grad_norm": 0.690362274646759, "learning_rate": 1.9412205705660975e-06, "loss": 0.0646, "step": 626350 }, { "epoch": 6.16, "grad_norm": 12.466696739196777, "learning_rate": 1.941096448111849e-06, "loss": 0.0194, "step": 626375 }, { "epoch": 6.16, "grad_norm": 1.8619794845581055, "learning_rate": 1.940972325657601e-06, "loss": 0.0927, "step": 626400 }, { "epoch": 6.16, "grad_norm": 4.592111110687256, "learning_rate": 1.9408482032033525e-06, "loss": 0.0168, "step": 626425 }, { "epoch": 6.16, "grad_norm": 2.943938970565796, "learning_rate": 1.940724080749104e-06, "loss": 0.0875, "step": 626450 }, { "epoch": 6.16, "grad_norm": 6.122430324554443, "learning_rate": 1.9405999582948557e-06, "loss": 0.0301, "step": 626475 }, { "epoch": 6.16, "grad_norm": 4.971103191375732, "learning_rate": 1.940475835840607e-06, "loss": 0.079, "step": 626500 }, { "epoch": 6.16, "grad_norm": 1.4720059633255005, "learning_rate": 1.9403517133863586e-06, "loss": 0.0269, "step": 626525 }, { "epoch": 6.16, "grad_norm": 2.9852192401885986, "learning_rate": 1.94022759093211e-06, "loss": 0.086, "step": 626550 }, { "epoch": 6.16, "grad_norm": 3.6022255420684814, "learning_rate": 1.940103468477862e-06, "loss": 0.0118, "step": 626575 }, { "epoch": 6.16, "grad_norm": 1.209063172340393, "learning_rate": 1.9399793460236135e-06, "loss": 0.0865, "step": 626600 }, { "epoch": 6.16, "grad_norm": 1.5833468437194824, "learning_rate": 1.9398552235693647e-06, "loss": 0.0184, "step": 626625 }, { "epoch": 6.16, "grad_norm": 0.5178943872451782, "learning_rate": 1.9397311011151163e-06, "loss": 0.0896, "step": 626650 }, { "epoch": 6.16, "grad_norm": 2.301090717315674, "learning_rate": 1.939606978660868e-06, "loss": 0.0149, "step": 626675 }, { "epoch": 6.16, "grad_norm": 1.547744631767273, "learning_rate": 1.939482856206619e-06, "loss": 0.0745, "step": 626700 }, { "epoch": 6.16, "grad_norm": 1.6367855072021484, "learning_rate": 1.939358733752371e-06, "loss": 0.0226, "step": 626725 }, { "epoch": 6.16, "grad_norm": 3.9419562816619873, "learning_rate": 1.9392346112981224e-06, "loss": 0.0976, "step": 626750 }, { "epoch": 6.16, "grad_norm": 4.0316033363342285, "learning_rate": 1.939110488843874e-06, "loss": 0.0207, "step": 626775 }, { "epoch": 6.16, "grad_norm": 8.297937393188477, "learning_rate": 1.9389863663896257e-06, "loss": 0.0666, "step": 626800 }, { "epoch": 6.16, "grad_norm": 0.581298291683197, "learning_rate": 1.9388622439353773e-06, "loss": 0.0299, "step": 626825 }, { "epoch": 6.16, "grad_norm": 3.176765203475952, "learning_rate": 1.9387381214811286e-06, "loss": 0.1035, "step": 626850 }, { "epoch": 6.16, "grad_norm": 6.83653450012207, "learning_rate": 1.93861399902688e-06, "loss": 0.0168, "step": 626875 }, { "epoch": 6.16, "grad_norm": 4.493647575378418, "learning_rate": 1.938489876572632e-06, "loss": 0.0529, "step": 626900 }, { "epoch": 6.16, "grad_norm": 4.0358781814575195, "learning_rate": 1.938365754118383e-06, "loss": 0.0174, "step": 626925 }, { "epoch": 6.16, "grad_norm": 3.0884900093078613, "learning_rate": 1.9382416316641347e-06, "loss": 0.069, "step": 626950 }, { "epoch": 6.16, "grad_norm": 1.9886529445648193, "learning_rate": 1.9381175092098863e-06, "loss": 0.0437, "step": 626975 }, { "epoch": 6.16, "grad_norm": 4.487213611602783, "learning_rate": 1.9379983516538078e-06, "loss": 0.108, "step": 627000 }, { "epoch": 6.17, "grad_norm": 0.6192232966423035, "learning_rate": 1.9378742291995594e-06, "loss": 0.0147, "step": 627025 }, { "epoch": 6.17, "grad_norm": 2.814847946166992, "learning_rate": 1.937750106745311e-06, "loss": 0.0913, "step": 627050 }, { "epoch": 6.17, "grad_norm": 6.625850200653076, "learning_rate": 1.9376259842910627e-06, "loss": 0.0198, "step": 627075 }, { "epoch": 6.17, "grad_norm": 6.946184158325195, "learning_rate": 1.937501861836814e-06, "loss": 0.0576, "step": 627100 }, { "epoch": 6.17, "grad_norm": 4.2297492027282715, "learning_rate": 1.9373777393825655e-06, "loss": 0.0178, "step": 627125 }, { "epoch": 6.17, "grad_norm": 0.28529447317123413, "learning_rate": 1.937253616928317e-06, "loss": 0.0881, "step": 627150 }, { "epoch": 6.17, "grad_norm": 2.162594795227051, "learning_rate": 1.9371294944740684e-06, "loss": 0.0306, "step": 627175 }, { "epoch": 6.17, "grad_norm": 0.7223684787750244, "learning_rate": 1.93700537201982e-06, "loss": 0.0784, "step": 627200 }, { "epoch": 6.17, "grad_norm": 0.40788188576698303, "learning_rate": 1.9368812495655716e-06, "loss": 0.0248, "step": 627225 }, { "epoch": 6.17, "grad_norm": 1.1354377269744873, "learning_rate": 1.9367571271113233e-06, "loss": 0.0614, "step": 627250 }, { "epoch": 6.17, "grad_norm": 17.757427215576172, "learning_rate": 1.936633004657075e-06, "loss": 0.0371, "step": 627275 }, { "epoch": 6.17, "grad_norm": 0.2514013350009918, "learning_rate": 1.936508882202826e-06, "loss": 0.0619, "step": 627300 }, { "epoch": 6.17, "grad_norm": 0.0893201157450676, "learning_rate": 1.9363847597485777e-06, "loss": 0.009, "step": 627325 }, { "epoch": 6.17, "grad_norm": 4.86983060836792, "learning_rate": 1.9362606372943294e-06, "loss": 0.0641, "step": 627350 }, { "epoch": 6.17, "grad_norm": 12.783397674560547, "learning_rate": 1.9361365148400806e-06, "loss": 0.0396, "step": 627375 }, { "epoch": 6.17, "grad_norm": 4.2560272216796875, "learning_rate": 1.9360123923858322e-06, "loss": 0.0521, "step": 627400 }, { "epoch": 6.17, "grad_norm": 7.101821422576904, "learning_rate": 1.935888269931584e-06, "loss": 0.0115, "step": 627425 }, { "epoch": 6.17, "grad_norm": 2.3230814933776855, "learning_rate": 1.9357641474773355e-06, "loss": 0.0978, "step": 627450 }, { "epoch": 6.17, "grad_norm": 12.243350982666016, "learning_rate": 1.935640025023087e-06, "loss": 0.0172, "step": 627475 }, { "epoch": 6.17, "grad_norm": 0.7080426812171936, "learning_rate": 1.9355159025688388e-06, "loss": 0.0824, "step": 627500 }, { "epoch": 6.17, "grad_norm": 2.7978150844573975, "learning_rate": 1.93539178011459e-06, "loss": 0.0202, "step": 627525 }, { "epoch": 6.17, "grad_norm": 5.479191303253174, "learning_rate": 1.9352676576603416e-06, "loss": 0.0617, "step": 627550 }, { "epoch": 6.17, "grad_norm": 0.5993844866752625, "learning_rate": 1.9351435352060932e-06, "loss": 0.0169, "step": 627575 }, { "epoch": 6.17, "grad_norm": 5.13930082321167, "learning_rate": 1.9350194127518445e-06, "loss": 0.0869, "step": 627600 }, { "epoch": 6.17, "grad_norm": 0.8302902579307556, "learning_rate": 1.934895290297596e-06, "loss": 0.0207, "step": 627625 }, { "epoch": 6.17, "grad_norm": 0.09842035174369812, "learning_rate": 1.9347711678433477e-06, "loss": 0.0878, "step": 627650 }, { "epoch": 6.17, "grad_norm": 2.051405906677246, "learning_rate": 1.9346470453890994e-06, "loss": 0.02, "step": 627675 }, { "epoch": 6.17, "grad_norm": 3.5940442085266113, "learning_rate": 1.934522922934851e-06, "loss": 0.0851, "step": 627700 }, { "epoch": 6.17, "grad_norm": 4.062685012817383, "learning_rate": 1.934398800480602e-06, "loss": 0.0191, "step": 627725 }, { "epoch": 6.17, "grad_norm": 2.1288251876831055, "learning_rate": 1.934274678026354e-06, "loss": 0.0697, "step": 627750 }, { "epoch": 6.17, "grad_norm": 8.486610412597656, "learning_rate": 1.9341505555721055e-06, "loss": 0.0202, "step": 627775 }, { "epoch": 6.17, "grad_norm": 5.071867942810059, "learning_rate": 1.9340264331178567e-06, "loss": 0.0863, "step": 627800 }, { "epoch": 6.17, "grad_norm": 3.1654105186462402, "learning_rate": 1.9339023106636083e-06, "loss": 0.0134, "step": 627825 }, { "epoch": 6.17, "grad_norm": 1.3425348997116089, "learning_rate": 1.93377818820936e-06, "loss": 0.0898, "step": 627850 }, { "epoch": 6.17, "grad_norm": 8.492242813110352, "learning_rate": 1.9336540657551116e-06, "loss": 0.0176, "step": 627875 }, { "epoch": 6.17, "grad_norm": 2.6587886810302734, "learning_rate": 1.9335299433008632e-06, "loss": 0.0831, "step": 627900 }, { "epoch": 6.17, "grad_norm": 12.737449645996094, "learning_rate": 1.933405820846615e-06, "loss": 0.0222, "step": 627925 }, { "epoch": 6.17, "grad_norm": 1.3102017641067505, "learning_rate": 1.933281698392366e-06, "loss": 0.0757, "step": 627950 }, { "epoch": 6.17, "grad_norm": 5.697358131408691, "learning_rate": 1.9331575759381177e-06, "loss": 0.0298, "step": 627975 }, { "epoch": 6.17, "grad_norm": 2.3342061042785645, "learning_rate": 1.9330334534838693e-06, "loss": 0.0687, "step": 628000 }, { "epoch": 6.17, "grad_norm": 13.9056396484375, "learning_rate": 1.9329093310296206e-06, "loss": 0.0204, "step": 628025 }, { "epoch": 6.18, "grad_norm": 2.715475082397461, "learning_rate": 1.932785208575372e-06, "loss": 0.054, "step": 628050 }, { "epoch": 6.18, "grad_norm": 11.391393661499023, "learning_rate": 1.932661086121124e-06, "loss": 0.0294, "step": 628075 }, { "epoch": 6.18, "grad_norm": 6.441391944885254, "learning_rate": 1.9325369636668755e-06, "loss": 0.0572, "step": 628100 }, { "epoch": 6.18, "grad_norm": 0.7167928218841553, "learning_rate": 1.932412841212627e-06, "loss": 0.0288, "step": 628125 }, { "epoch": 6.18, "grad_norm": 0.188555046916008, "learning_rate": 1.9322887187583783e-06, "loss": 0.0765, "step": 628150 }, { "epoch": 6.18, "grad_norm": 1.5216330289840698, "learning_rate": 1.93216459630413e-06, "loss": 0.0141, "step": 628175 }, { "epoch": 6.18, "grad_norm": 4.95432710647583, "learning_rate": 1.9320404738498816e-06, "loss": 0.0689, "step": 628200 }, { "epoch": 6.18, "grad_norm": 7.094009876251221, "learning_rate": 1.9319163513956328e-06, "loss": 0.0156, "step": 628225 }, { "epoch": 6.18, "grad_norm": 3.8341927528381348, "learning_rate": 1.9317922289413844e-06, "loss": 0.0857, "step": 628250 }, { "epoch": 6.18, "grad_norm": 10.532588958740234, "learning_rate": 1.931668106487136e-06, "loss": 0.0303, "step": 628275 }, { "epoch": 6.18, "grad_norm": 2.900373697280884, "learning_rate": 1.9315439840328877e-06, "loss": 0.0753, "step": 628300 }, { "epoch": 6.18, "grad_norm": 0.5491174459457397, "learning_rate": 1.9314198615786393e-06, "loss": 0.0157, "step": 628325 }, { "epoch": 6.18, "grad_norm": 6.012086868286133, "learning_rate": 1.931295739124391e-06, "loss": 0.075, "step": 628350 }, { "epoch": 6.18, "grad_norm": 12.762112617492676, "learning_rate": 1.931171616670142e-06, "loss": 0.0323, "step": 628375 }, { "epoch": 6.18, "grad_norm": 3.8534998893737793, "learning_rate": 1.931047494215894e-06, "loss": 0.0704, "step": 628400 }, { "epoch": 6.18, "grad_norm": 7.70789909362793, "learning_rate": 1.9309233717616454e-06, "loss": 0.0199, "step": 628425 }, { "epoch": 6.18, "grad_norm": 1.9717222452163696, "learning_rate": 1.9307992493073967e-06, "loss": 0.0689, "step": 628450 }, { "epoch": 6.18, "grad_norm": 2.389683246612549, "learning_rate": 1.9306751268531483e-06, "loss": 0.0297, "step": 628475 }, { "epoch": 6.18, "grad_norm": 3.2127645015716553, "learning_rate": 1.9305510043989e-06, "loss": 0.0766, "step": 628500 }, { "epoch": 6.18, "grad_norm": 2.0207149982452393, "learning_rate": 1.9304268819446516e-06, "loss": 0.0125, "step": 628525 }, { "epoch": 6.18, "grad_norm": 0.26576319336891174, "learning_rate": 1.930302759490403e-06, "loss": 0.0536, "step": 628550 }, { "epoch": 6.18, "grad_norm": 0.7727307081222534, "learning_rate": 1.9301786370361544e-06, "loss": 0.0195, "step": 628575 }, { "epoch": 6.18, "grad_norm": 0.9189303517341614, "learning_rate": 1.930054514581906e-06, "loss": 0.0762, "step": 628600 }, { "epoch": 6.18, "grad_norm": 9.186686515808105, "learning_rate": 1.9299303921276577e-06, "loss": 0.0115, "step": 628625 }, { "epoch": 6.18, "grad_norm": 1.6553055047988892, "learning_rate": 1.929806269673409e-06, "loss": 0.0768, "step": 628650 }, { "epoch": 6.18, "grad_norm": 15.515790939331055, "learning_rate": 1.9296821472191605e-06, "loss": 0.0343, "step": 628675 }, { "epoch": 6.18, "grad_norm": 2.837897300720215, "learning_rate": 1.929558024764912e-06, "loss": 0.0913, "step": 628700 }, { "epoch": 6.18, "grad_norm": 6.69182825088501, "learning_rate": 1.929433902310664e-06, "loss": 0.0245, "step": 628725 }, { "epoch": 6.18, "grad_norm": 1.7319852113723755, "learning_rate": 1.9293097798564154e-06, "loss": 0.0904, "step": 628750 }, { "epoch": 6.18, "grad_norm": 18.511987686157227, "learning_rate": 1.929185657402167e-06, "loss": 0.0269, "step": 628775 }, { "epoch": 6.18, "grad_norm": 0.39277172088623047, "learning_rate": 1.9290615349479183e-06, "loss": 0.085, "step": 628800 }, { "epoch": 6.18, "grad_norm": 7.6076788902282715, "learning_rate": 1.92893741249367e-06, "loss": 0.0249, "step": 628825 }, { "epoch": 6.18, "grad_norm": 1.8289605379104614, "learning_rate": 1.9288132900394215e-06, "loss": 0.1001, "step": 628850 }, { "epoch": 6.18, "grad_norm": 11.314590454101562, "learning_rate": 1.9286891675851728e-06, "loss": 0.0201, "step": 628875 }, { "epoch": 6.18, "grad_norm": 2.202207088470459, "learning_rate": 1.9285650451309244e-06, "loss": 0.0638, "step": 628900 }, { "epoch": 6.18, "grad_norm": 7.54141902923584, "learning_rate": 1.928440922676676e-06, "loss": 0.0182, "step": 628925 }, { "epoch": 6.18, "grad_norm": 3.7427799701690674, "learning_rate": 1.9283168002224277e-06, "loss": 0.0746, "step": 628950 }, { "epoch": 6.18, "grad_norm": 4.910068035125732, "learning_rate": 1.9281926777681793e-06, "loss": 0.0212, "step": 628975 }, { "epoch": 6.18, "grad_norm": 5.9584641456604, "learning_rate": 1.9280685553139305e-06, "loss": 0.0685, "step": 629000 }, { "epoch": 6.18, "grad_norm": 9.925994873046875, "learning_rate": 1.927944432859682e-06, "loss": 0.0237, "step": 629025 }, { "epoch": 6.18, "grad_norm": 2.535158395767212, "learning_rate": 1.9278203104054338e-06, "loss": 0.0913, "step": 629050 }, { "epoch": 6.19, "grad_norm": 4.518627643585205, "learning_rate": 1.927696187951185e-06, "loss": 0.0299, "step": 629075 }, { "epoch": 6.19, "grad_norm": 8.357662200927734, "learning_rate": 1.9275720654969366e-06, "loss": 0.0917, "step": 629100 }, { "epoch": 6.19, "grad_norm": 8.422811508178711, "learning_rate": 1.9274479430426883e-06, "loss": 0.0197, "step": 629125 }, { "epoch": 6.19, "grad_norm": 2.9058635234832764, "learning_rate": 1.92732382058844e-06, "loss": 0.0704, "step": 629150 }, { "epoch": 6.19, "grad_norm": 0.6046465039253235, "learning_rate": 1.9271996981341915e-06, "loss": 0.0242, "step": 629175 }, { "epoch": 6.19, "grad_norm": 4.291449069976807, "learning_rate": 1.927075575679943e-06, "loss": 0.0645, "step": 629200 }, { "epoch": 6.19, "grad_norm": 2.4730896949768066, "learning_rate": 1.9269514532256944e-06, "loss": 0.035, "step": 629225 }, { "epoch": 6.19, "grad_norm": 4.769918918609619, "learning_rate": 1.926827330771446e-06, "loss": 0.0664, "step": 629250 }, { "epoch": 6.19, "grad_norm": 5.539731502532959, "learning_rate": 1.9267032083171976e-06, "loss": 0.0101, "step": 629275 }, { "epoch": 6.19, "grad_norm": 6.343090534210205, "learning_rate": 1.926579085862949e-06, "loss": 0.0725, "step": 629300 }, { "epoch": 6.19, "grad_norm": 12.013078689575195, "learning_rate": 1.9264549634087005e-06, "loss": 0.0209, "step": 629325 }, { "epoch": 6.19, "grad_norm": 2.7412285804748535, "learning_rate": 1.926330840954452e-06, "loss": 0.0638, "step": 629350 }, { "epoch": 6.19, "grad_norm": 0.7086460590362549, "learning_rate": 1.9262067185002038e-06, "loss": 0.0244, "step": 629375 }, { "epoch": 6.19, "grad_norm": 2.928133487701416, "learning_rate": 1.9260825960459554e-06, "loss": 0.0545, "step": 629400 }, { "epoch": 6.19, "grad_norm": 15.38514518737793, "learning_rate": 1.9259584735917066e-06, "loss": 0.0203, "step": 629425 }, { "epoch": 6.19, "grad_norm": 0.4594986140727997, "learning_rate": 1.9258343511374582e-06, "loss": 0.0833, "step": 629450 }, { "epoch": 6.19, "grad_norm": 4.473315715789795, "learning_rate": 1.92571022868321e-06, "loss": 0.0416, "step": 629475 }, { "epoch": 6.19, "grad_norm": 5.8119940757751465, "learning_rate": 1.9255861062289615e-06, "loss": 0.0807, "step": 629500 }, { "epoch": 6.19, "grad_norm": 0.7006170749664307, "learning_rate": 1.925461983774713e-06, "loss": 0.0199, "step": 629525 }, { "epoch": 6.19, "grad_norm": 0.30506032705307007, "learning_rate": 1.9253378613204648e-06, "loss": 0.0754, "step": 629550 }, { "epoch": 6.19, "grad_norm": 9.632731437683105, "learning_rate": 1.925213738866216e-06, "loss": 0.0312, "step": 629575 }, { "epoch": 6.19, "grad_norm": 2.3883917331695557, "learning_rate": 1.9250896164119676e-06, "loss": 0.066, "step": 629600 }, { "epoch": 6.19, "grad_norm": 6.192811965942383, "learning_rate": 1.9249654939577193e-06, "loss": 0.0239, "step": 629625 }, { "epoch": 6.19, "grad_norm": 1.5421867370605469, "learning_rate": 1.9248413715034705e-06, "loss": 0.0627, "step": 629650 }, { "epoch": 6.19, "grad_norm": 8.103116989135742, "learning_rate": 1.924717249049222e-06, "loss": 0.0337, "step": 629675 }, { "epoch": 6.19, "grad_norm": 12.0127592086792, "learning_rate": 1.9245931265949737e-06, "loss": 0.0749, "step": 629700 }, { "epoch": 6.19, "grad_norm": 11.235918045043945, "learning_rate": 1.9244690041407254e-06, "loss": 0.0307, "step": 629725 }, { "epoch": 6.19, "grad_norm": 2.3700754642486572, "learning_rate": 1.924344881686477e-06, "loss": 0.089, "step": 629750 }, { "epoch": 6.19, "grad_norm": 9.752115249633789, "learning_rate": 1.9242207592322282e-06, "loss": 0.0169, "step": 629775 }, { "epoch": 6.19, "grad_norm": 1.0473144054412842, "learning_rate": 1.92409663677798e-06, "loss": 0.0862, "step": 629800 }, { "epoch": 6.19, "grad_norm": 4.5960774421691895, "learning_rate": 1.9239725143237315e-06, "loss": 0.0191, "step": 629825 }, { "epoch": 6.19, "grad_norm": 1.7395565509796143, "learning_rate": 1.9238483918694827e-06, "loss": 0.0696, "step": 629850 }, { "epoch": 6.19, "grad_norm": 5.357192516326904, "learning_rate": 1.9237242694152343e-06, "loss": 0.0388, "step": 629875 }, { "epoch": 6.19, "grad_norm": 0.20852768421173096, "learning_rate": 1.923600146960986e-06, "loss": 0.0681, "step": 629900 }, { "epoch": 6.19, "grad_norm": 3.014493227005005, "learning_rate": 1.9234760245067376e-06, "loss": 0.0328, "step": 629925 }, { "epoch": 6.19, "grad_norm": 3.6869144439697266, "learning_rate": 1.9233519020524892e-06, "loss": 0.0739, "step": 629950 }, { "epoch": 6.19, "grad_norm": 0.570961594581604, "learning_rate": 1.923227779598241e-06, "loss": 0.0245, "step": 629975 }, { "epoch": 6.19, "grad_norm": 2.6387953758239746, "learning_rate": 1.923103657143992e-06, "loss": 0.0803, "step": 630000 }, { "epoch": 6.19, "grad_norm": 5.912858009338379, "learning_rate": 1.9229795346897437e-06, "loss": 0.0181, "step": 630025 }, { "epoch": 6.19, "grad_norm": 1.8967493772506714, "learning_rate": 1.9228554122354954e-06, "loss": 0.056, "step": 630050 }, { "epoch": 6.2, "grad_norm": 6.24614143371582, "learning_rate": 1.9227312897812466e-06, "loss": 0.0149, "step": 630075 }, { "epoch": 6.2, "grad_norm": 0.1765105128288269, "learning_rate": 1.922607167326998e-06, "loss": 0.0556, "step": 630100 }, { "epoch": 6.2, "grad_norm": 6.064679145812988, "learning_rate": 1.92248304487275e-06, "loss": 0.021, "step": 630125 }, { "epoch": 6.2, "grad_norm": 0.2804667353630066, "learning_rate": 1.9223589224185015e-06, "loss": 0.1033, "step": 630150 }, { "epoch": 6.2, "grad_norm": 2.6380765438079834, "learning_rate": 1.922234799964253e-06, "loss": 0.0218, "step": 630175 }, { "epoch": 6.2, "grad_norm": 1.5150185823440552, "learning_rate": 1.9221106775100043e-06, "loss": 0.0725, "step": 630200 }, { "epoch": 6.2, "grad_norm": 8.071396827697754, "learning_rate": 1.921986555055756e-06, "loss": 0.0246, "step": 630225 }, { "epoch": 6.2, "grad_norm": 6.27496337890625, "learning_rate": 1.9218624326015076e-06, "loss": 0.0791, "step": 630250 }, { "epoch": 6.2, "grad_norm": 11.507980346679688, "learning_rate": 1.921738310147259e-06, "loss": 0.0203, "step": 630275 }, { "epoch": 6.2, "grad_norm": 0.2409157007932663, "learning_rate": 1.9216141876930104e-06, "loss": 0.0775, "step": 630300 }, { "epoch": 6.2, "grad_norm": 5.692861080169678, "learning_rate": 1.921490065238762e-06, "loss": 0.016, "step": 630325 }, { "epoch": 6.2, "grad_norm": 3.1723785400390625, "learning_rate": 1.9213709076826835e-06, "loss": 0.0865, "step": 630350 }, { "epoch": 6.2, "grad_norm": 0.7562278509140015, "learning_rate": 1.921246785228435e-06, "loss": 0.0245, "step": 630375 }, { "epoch": 6.2, "grad_norm": 0.12725959718227386, "learning_rate": 1.921122662774187e-06, "loss": 0.0746, "step": 630400 }, { "epoch": 6.2, "grad_norm": 7.440824508666992, "learning_rate": 1.9209985403199384e-06, "loss": 0.0185, "step": 630425 }, { "epoch": 6.2, "grad_norm": 0.40035924315452576, "learning_rate": 1.9208744178656896e-06, "loss": 0.059, "step": 630450 }, { "epoch": 6.2, "grad_norm": 2.2154765129089355, "learning_rate": 1.9207502954114413e-06, "loss": 0.0149, "step": 630475 }, { "epoch": 6.2, "grad_norm": 2.1367545127868652, "learning_rate": 1.920626172957193e-06, "loss": 0.0541, "step": 630500 }, { "epoch": 6.2, "grad_norm": 6.660830020904541, "learning_rate": 1.920502050502944e-06, "loss": 0.0217, "step": 630525 }, { "epoch": 6.2, "grad_norm": 1.92457115650177, "learning_rate": 1.9203779280486958e-06, "loss": 0.0644, "step": 630550 }, { "epoch": 6.2, "grad_norm": 2.9210963249206543, "learning_rate": 1.9202538055944474e-06, "loss": 0.0292, "step": 630575 }, { "epoch": 6.2, "grad_norm": 0.15629291534423828, "learning_rate": 1.920129683140199e-06, "loss": 0.0645, "step": 630600 }, { "epoch": 6.2, "grad_norm": 10.695366859436035, "learning_rate": 1.9200055606859507e-06, "loss": 0.0297, "step": 630625 }, { "epoch": 6.2, "grad_norm": 0.9237355589866638, "learning_rate": 1.9198814382317023e-06, "loss": 0.072, "step": 630650 }, { "epoch": 6.2, "grad_norm": 6.126572608947754, "learning_rate": 1.9197573157774535e-06, "loss": 0.0275, "step": 630675 }, { "epoch": 6.2, "grad_norm": 1.8208892345428467, "learning_rate": 1.919633193323205e-06, "loss": 0.0709, "step": 630700 }, { "epoch": 6.2, "grad_norm": 0.18200638890266418, "learning_rate": 1.9195090708689568e-06, "loss": 0.0248, "step": 630725 }, { "epoch": 6.2, "grad_norm": 0.49311766028404236, "learning_rate": 1.919384948414708e-06, "loss": 0.0728, "step": 630750 }, { "epoch": 6.2, "grad_norm": 9.427173614501953, "learning_rate": 1.9192608259604596e-06, "loss": 0.0245, "step": 630775 }, { "epoch": 6.2, "grad_norm": 1.7016186714172363, "learning_rate": 1.9191367035062113e-06, "loss": 0.0879, "step": 630800 }, { "epoch": 6.2, "grad_norm": 2.8743896484375, "learning_rate": 1.919012581051963e-06, "loss": 0.0249, "step": 630825 }, { "epoch": 6.2, "grad_norm": 1.719277024269104, "learning_rate": 1.9188884585977145e-06, "loss": 0.0891, "step": 630850 }, { "epoch": 6.2, "grad_norm": 7.446473121643066, "learning_rate": 1.9187643361434657e-06, "loss": 0.0179, "step": 630875 }, { "epoch": 6.2, "grad_norm": 42.56483840942383, "learning_rate": 1.9186402136892174e-06, "loss": 0.0699, "step": 630900 }, { "epoch": 6.2, "grad_norm": 0.29414352774620056, "learning_rate": 1.918516091234969e-06, "loss": 0.018, "step": 630925 }, { "epoch": 6.2, "grad_norm": 0.45630595088005066, "learning_rate": 1.9183919687807202e-06, "loss": 0.077, "step": 630950 }, { "epoch": 6.2, "grad_norm": 0.9671068787574768, "learning_rate": 1.918267846326472e-06, "loss": 0.019, "step": 630975 }, { "epoch": 6.2, "grad_norm": 1.077291488647461, "learning_rate": 1.9181437238722235e-06, "loss": 0.0538, "step": 631000 }, { "epoch": 6.2, "grad_norm": 10.718385696411133, "learning_rate": 1.918019601417975e-06, "loss": 0.0221, "step": 631025 }, { "epoch": 6.2, "grad_norm": 7.3566412925720215, "learning_rate": 1.9178954789637268e-06, "loss": 0.0675, "step": 631050 }, { "epoch": 6.2, "grad_norm": 14.092538833618164, "learning_rate": 1.9177713565094784e-06, "loss": 0.0326, "step": 631075 }, { "epoch": 6.21, "grad_norm": 1.2945913076400757, "learning_rate": 1.9176472340552296e-06, "loss": 0.0585, "step": 631100 }, { "epoch": 6.21, "grad_norm": 4.5694260597229, "learning_rate": 1.9175231116009812e-06, "loss": 0.023, "step": 631125 }, { "epoch": 6.21, "grad_norm": 4.199751377105713, "learning_rate": 1.917398989146733e-06, "loss": 0.0734, "step": 631150 }, { "epoch": 6.21, "grad_norm": 1.9505255222320557, "learning_rate": 1.917274866692484e-06, "loss": 0.0128, "step": 631175 }, { "epoch": 6.21, "grad_norm": 4.407680988311768, "learning_rate": 1.9171507442382357e-06, "loss": 0.0748, "step": 631200 }, { "epoch": 6.21, "grad_norm": 12.415787696838379, "learning_rate": 1.9170266217839874e-06, "loss": 0.0334, "step": 631225 }, { "epoch": 6.21, "grad_norm": 2.7519915103912354, "learning_rate": 1.916902499329739e-06, "loss": 0.0991, "step": 631250 }, { "epoch": 6.21, "grad_norm": 2.225562572479248, "learning_rate": 1.9167783768754906e-06, "loss": 0.0097, "step": 631275 }, { "epoch": 6.21, "grad_norm": 5.006518840789795, "learning_rate": 1.916654254421242e-06, "loss": 0.0947, "step": 631300 }, { "epoch": 6.21, "grad_norm": 4.745887279510498, "learning_rate": 1.9165301319669935e-06, "loss": 0.0155, "step": 631325 }, { "epoch": 6.21, "grad_norm": 8.076966285705566, "learning_rate": 1.916406009512745e-06, "loss": 0.0917, "step": 631350 }, { "epoch": 6.21, "grad_norm": 4.550923824310303, "learning_rate": 1.9162818870584963e-06, "loss": 0.0215, "step": 631375 }, { "epoch": 6.21, "grad_norm": 1.486467719078064, "learning_rate": 1.916157764604248e-06, "loss": 0.0959, "step": 631400 }, { "epoch": 6.21, "grad_norm": 9.258485794067383, "learning_rate": 1.9160336421499996e-06, "loss": 0.0247, "step": 631425 }, { "epoch": 6.21, "grad_norm": 0.7371701002120972, "learning_rate": 1.9159095196957512e-06, "loss": 0.0711, "step": 631450 }, { "epoch": 6.21, "grad_norm": 4.443897724151611, "learning_rate": 1.915785397241503e-06, "loss": 0.0322, "step": 631475 }, { "epoch": 6.21, "grad_norm": 0.15548986196517944, "learning_rate": 1.9156612747872545e-06, "loss": 0.0871, "step": 631500 }, { "epoch": 6.21, "grad_norm": 11.840639114379883, "learning_rate": 1.9155371523330057e-06, "loss": 0.0221, "step": 631525 }, { "epoch": 6.21, "grad_norm": 7.954982757568359, "learning_rate": 1.9154130298787573e-06, "loss": 0.066, "step": 631550 }, { "epoch": 6.21, "grad_norm": 7.035175323486328, "learning_rate": 1.915288907424509e-06, "loss": 0.0457, "step": 631575 }, { "epoch": 6.21, "grad_norm": 4.2237043380737305, "learning_rate": 1.91516478497026e-06, "loss": 0.0835, "step": 631600 }, { "epoch": 6.21, "grad_norm": 11.427416801452637, "learning_rate": 1.915040662516012e-06, "loss": 0.0271, "step": 631625 }, { "epoch": 6.21, "grad_norm": 3.4348654747009277, "learning_rate": 1.9149165400617635e-06, "loss": 0.0913, "step": 631650 }, { "epoch": 6.21, "grad_norm": 1.0196027755737305, "learning_rate": 1.914792417607515e-06, "loss": 0.0208, "step": 631675 }, { "epoch": 6.21, "grad_norm": 3.367342948913574, "learning_rate": 1.9146682951532667e-06, "loss": 0.0817, "step": 631700 }, { "epoch": 6.21, "grad_norm": 7.856247425079346, "learning_rate": 1.914544172699018e-06, "loss": 0.0192, "step": 631725 }, { "epoch": 6.21, "grad_norm": 6.124233722686768, "learning_rate": 1.9144200502447696e-06, "loss": 0.0523, "step": 631750 }, { "epoch": 6.21, "grad_norm": 2.5517923831939697, "learning_rate": 1.9142959277905212e-06, "loss": 0.0331, "step": 631775 }, { "epoch": 6.21, "grad_norm": 4.544439792633057, "learning_rate": 1.9141718053362724e-06, "loss": 0.0798, "step": 631800 }, { "epoch": 6.21, "grad_norm": 0.8322635889053345, "learning_rate": 1.914047682882024e-06, "loss": 0.0143, "step": 631825 }, { "epoch": 6.21, "grad_norm": 7.543497562408447, "learning_rate": 1.9139235604277757e-06, "loss": 0.0773, "step": 631850 }, { "epoch": 6.21, "grad_norm": 5.914161682128906, "learning_rate": 1.9137994379735273e-06, "loss": 0.0188, "step": 631875 }, { "epoch": 6.21, "grad_norm": 0.3165547549724579, "learning_rate": 1.913675315519279e-06, "loss": 0.0841, "step": 631900 }, { "epoch": 6.21, "grad_norm": 5.686095237731934, "learning_rate": 1.9135511930650306e-06, "loss": 0.0194, "step": 631925 }, { "epoch": 6.21, "grad_norm": 0.5485638380050659, "learning_rate": 1.913427070610782e-06, "loss": 0.0721, "step": 631950 }, { "epoch": 6.21, "grad_norm": 7.715546607971191, "learning_rate": 1.9133029481565334e-06, "loss": 0.0266, "step": 631975 }, { "epoch": 6.21, "grad_norm": 2.5373427867889404, "learning_rate": 1.913178825702285e-06, "loss": 0.0687, "step": 632000 }, { "epoch": 6.21, "grad_norm": 3.710794448852539, "learning_rate": 1.9130547032480363e-06, "loss": 0.0317, "step": 632025 }, { "epoch": 6.21, "grad_norm": 4.256162643432617, "learning_rate": 1.912930580793788e-06, "loss": 0.081, "step": 632050 }, { "epoch": 6.21, "grad_norm": 3.486811399459839, "learning_rate": 1.9128064583395396e-06, "loss": 0.0155, "step": 632075 }, { "epoch": 6.21, "grad_norm": 1.791898488998413, "learning_rate": 1.912682335885291e-06, "loss": 0.0751, "step": 632100 }, { "epoch": 6.22, "grad_norm": 9.940950393676758, "learning_rate": 1.912558213431043e-06, "loss": 0.0195, "step": 632125 }, { "epoch": 6.22, "grad_norm": 3.7289648056030273, "learning_rate": 1.912434090976794e-06, "loss": 0.0877, "step": 632150 }, { "epoch": 6.22, "grad_norm": 9.631109237670898, "learning_rate": 1.9123099685225457e-06, "loss": 0.017, "step": 632175 }, { "epoch": 6.22, "grad_norm": 1.5547631978988647, "learning_rate": 1.9121858460682973e-06, "loss": 0.0607, "step": 632200 }, { "epoch": 6.22, "grad_norm": 4.553784370422363, "learning_rate": 1.9120617236140485e-06, "loss": 0.0166, "step": 632225 }, { "epoch": 6.22, "grad_norm": 1.5365978479385376, "learning_rate": 1.9119376011598e-06, "loss": 0.0665, "step": 632250 }, { "epoch": 6.22, "grad_norm": 6.251287460327148, "learning_rate": 1.911813478705552e-06, "loss": 0.0204, "step": 632275 }, { "epoch": 6.22, "grad_norm": 1.195500135421753, "learning_rate": 1.9116893562513034e-06, "loss": 0.0688, "step": 632300 }, { "epoch": 6.22, "grad_norm": 0.09022186696529388, "learning_rate": 1.911565233797055e-06, "loss": 0.0254, "step": 632325 }, { "epoch": 6.22, "grad_norm": 3.8711135387420654, "learning_rate": 1.9114411113428067e-06, "loss": 0.0951, "step": 632350 }, { "epoch": 6.22, "grad_norm": 5.639132976531982, "learning_rate": 1.911316988888558e-06, "loss": 0.0219, "step": 632375 }, { "epoch": 6.22, "grad_norm": 0.5994318127632141, "learning_rate": 1.9111928664343095e-06, "loss": 0.074, "step": 632400 }, { "epoch": 6.22, "grad_norm": 6.469252586364746, "learning_rate": 1.911068743980061e-06, "loss": 0.0186, "step": 632425 }, { "epoch": 6.22, "grad_norm": 3.359086513519287, "learning_rate": 1.910944621525813e-06, "loss": 0.0801, "step": 632450 }, { "epoch": 6.22, "grad_norm": 4.173945903778076, "learning_rate": 1.9108204990715645e-06, "loss": 0.0337, "step": 632475 }, { "epoch": 6.22, "grad_norm": 2.7340502738952637, "learning_rate": 1.9106963766173157e-06, "loss": 0.0629, "step": 632500 }, { "epoch": 6.22, "grad_norm": 5.775470733642578, "learning_rate": 1.9105722541630673e-06, "loss": 0.0199, "step": 632525 }, { "epoch": 6.22, "grad_norm": 4.250011920928955, "learning_rate": 1.910448131708819e-06, "loss": 0.0745, "step": 632550 }, { "epoch": 6.22, "grad_norm": 1.750356674194336, "learning_rate": 1.91032400925457e-06, "loss": 0.0276, "step": 632575 }, { "epoch": 6.22, "grad_norm": 5.532189846038818, "learning_rate": 1.9101998868003218e-06, "loss": 0.0949, "step": 632600 }, { "epoch": 6.22, "grad_norm": 5.4360432624816895, "learning_rate": 1.9100757643460734e-06, "loss": 0.0225, "step": 632625 }, { "epoch": 6.22, "grad_norm": 0.13218526542186737, "learning_rate": 1.909951641891825e-06, "loss": 0.0652, "step": 632650 }, { "epoch": 6.22, "grad_norm": 3.7247843742370605, "learning_rate": 1.9098275194375767e-06, "loss": 0.0237, "step": 632675 }, { "epoch": 6.22, "grad_norm": 0.18443506956100464, "learning_rate": 1.9097033969833283e-06, "loss": 0.0704, "step": 632700 }, { "epoch": 6.22, "grad_norm": 11.410168647766113, "learning_rate": 1.9095792745290795e-06, "loss": 0.0233, "step": 632725 }, { "epoch": 6.22, "grad_norm": 2.722822904586792, "learning_rate": 1.909455152074831e-06, "loss": 0.0811, "step": 632750 }, { "epoch": 6.22, "grad_norm": 8.363297462463379, "learning_rate": 1.909331029620583e-06, "loss": 0.0194, "step": 632775 }, { "epoch": 6.22, "grad_norm": 0.44701507687568665, "learning_rate": 1.909206907166334e-06, "loss": 0.074, "step": 632800 }, { "epoch": 6.22, "grad_norm": 1.6786874532699585, "learning_rate": 1.9090827847120856e-06, "loss": 0.0194, "step": 632825 }, { "epoch": 6.22, "grad_norm": 2.0887656211853027, "learning_rate": 1.9089586622578373e-06, "loss": 0.0889, "step": 632850 }, { "epoch": 6.22, "grad_norm": 1.2134181261062622, "learning_rate": 1.908834539803589e-06, "loss": 0.0188, "step": 632875 }, { "epoch": 6.22, "grad_norm": 2.678805351257324, "learning_rate": 1.9087104173493406e-06, "loss": 0.0701, "step": 632900 }, { "epoch": 6.22, "grad_norm": 9.19083023071289, "learning_rate": 1.9085862948950918e-06, "loss": 0.0248, "step": 632925 }, { "epoch": 6.22, "grad_norm": 14.214448928833008, "learning_rate": 1.9084621724408434e-06, "loss": 0.0615, "step": 632950 }, { "epoch": 6.22, "grad_norm": 2.8544492721557617, "learning_rate": 1.908338049986595e-06, "loss": 0.0213, "step": 632975 }, { "epoch": 6.22, "grad_norm": 1.069310188293457, "learning_rate": 1.9082139275323462e-06, "loss": 0.0754, "step": 633000 }, { "epoch": 6.22, "grad_norm": 9.141254425048828, "learning_rate": 1.908089805078098e-06, "loss": 0.0211, "step": 633025 }, { "epoch": 6.22, "grad_norm": 0.23339876532554626, "learning_rate": 1.9079656826238495e-06, "loss": 0.0937, "step": 633050 }, { "epoch": 6.22, "grad_norm": 5.539707660675049, "learning_rate": 1.907841560169601e-06, "loss": 0.0301, "step": 633075 }, { "epoch": 6.22, "grad_norm": 3.06325626373291, "learning_rate": 1.9077174377153528e-06, "loss": 0.0993, "step": 633100 }, { "epoch": 6.22, "grad_norm": 0.2387504279613495, "learning_rate": 1.9075933152611044e-06, "loss": 0.0209, "step": 633125 }, { "epoch": 6.23, "grad_norm": 0.9596600532531738, "learning_rate": 1.9074691928068556e-06, "loss": 0.0647, "step": 633150 }, { "epoch": 6.23, "grad_norm": 13.013731956481934, "learning_rate": 1.9073450703526073e-06, "loss": 0.0275, "step": 633175 }, { "epoch": 6.23, "grad_norm": 2.7311925888061523, "learning_rate": 1.907220947898359e-06, "loss": 0.069, "step": 633200 }, { "epoch": 6.23, "grad_norm": 5.958933353424072, "learning_rate": 1.9070968254441103e-06, "loss": 0.0244, "step": 633225 }, { "epoch": 6.23, "grad_norm": 7.702287197113037, "learning_rate": 1.906972702989862e-06, "loss": 0.0805, "step": 633250 }, { "epoch": 6.23, "grad_norm": 6.058948516845703, "learning_rate": 1.9068485805356136e-06, "loss": 0.0209, "step": 633275 }, { "epoch": 6.23, "grad_norm": 0.17937727272510529, "learning_rate": 1.9067244580813648e-06, "loss": 0.1044, "step": 633300 }, { "epoch": 6.23, "grad_norm": 5.440345287322998, "learning_rate": 1.9066003356271164e-06, "loss": 0.0235, "step": 633325 }, { "epoch": 6.23, "grad_norm": 3.7404086589813232, "learning_rate": 1.9064762131728679e-06, "loss": 0.065, "step": 633350 }, { "epoch": 6.23, "grad_norm": 4.906672954559326, "learning_rate": 1.9063520907186195e-06, "loss": 0.0143, "step": 633375 }, { "epoch": 6.23, "grad_norm": 3.8350865840911865, "learning_rate": 1.9062279682643711e-06, "loss": 0.0859, "step": 633400 }, { "epoch": 6.23, "grad_norm": 2.9130172729492188, "learning_rate": 1.9061038458101226e-06, "loss": 0.0175, "step": 633425 }, { "epoch": 6.23, "grad_norm": 2.2513046264648438, "learning_rate": 1.9059797233558742e-06, "loss": 0.097, "step": 633450 }, { "epoch": 6.23, "grad_norm": 4.165584564208984, "learning_rate": 1.9058556009016258e-06, "loss": 0.0204, "step": 633475 }, { "epoch": 6.23, "grad_norm": 2.374634265899658, "learning_rate": 1.905731478447377e-06, "loss": 0.0599, "step": 633500 }, { "epoch": 6.23, "grad_norm": 13.984402656555176, "learning_rate": 1.9056073559931287e-06, "loss": 0.0288, "step": 633525 }, { "epoch": 6.23, "grad_norm": 0.5137389898300171, "learning_rate": 1.9054832335388803e-06, "loss": 0.0745, "step": 633550 }, { "epoch": 6.23, "grad_norm": 10.89971923828125, "learning_rate": 1.9053591110846317e-06, "loss": 0.0256, "step": 633575 }, { "epoch": 6.23, "grad_norm": 1.286016583442688, "learning_rate": 1.9052349886303834e-06, "loss": 0.0684, "step": 633600 }, { "epoch": 6.23, "grad_norm": 11.766744613647461, "learning_rate": 1.905110866176135e-06, "loss": 0.0229, "step": 633625 }, { "epoch": 6.23, "grad_norm": 4.326801300048828, "learning_rate": 1.9049867437218864e-06, "loss": 0.0939, "step": 633650 }, { "epoch": 6.23, "grad_norm": 7.637881278991699, "learning_rate": 1.904862621267638e-06, "loss": 0.0128, "step": 633675 }, { "epoch": 6.23, "grad_norm": 1.5339990854263306, "learning_rate": 1.9047384988133897e-06, "loss": 0.0726, "step": 633700 }, { "epoch": 6.23, "grad_norm": 6.878032684326172, "learning_rate": 1.904614376359141e-06, "loss": 0.0199, "step": 633725 }, { "epoch": 6.23, "grad_norm": 3.6756577491760254, "learning_rate": 1.9044902539048925e-06, "loss": 0.0641, "step": 633750 }, { "epoch": 6.23, "grad_norm": 2.4482829570770264, "learning_rate": 1.904366131450644e-06, "loss": 0.0163, "step": 633775 }, { "epoch": 6.23, "grad_norm": 4.8406081199646, "learning_rate": 1.9042420089963956e-06, "loss": 0.0671, "step": 633800 }, { "epoch": 6.23, "grad_norm": 5.3625054359436035, "learning_rate": 1.9041178865421472e-06, "loss": 0.0301, "step": 633825 }, { "epoch": 6.23, "grad_norm": 0.9317978620529175, "learning_rate": 1.9039937640878987e-06, "loss": 0.0743, "step": 633850 }, { "epoch": 6.23, "grad_norm": 0.6579947471618652, "learning_rate": 1.9038696416336503e-06, "loss": 0.024, "step": 633875 }, { "epoch": 6.23, "grad_norm": 1.4310039281845093, "learning_rate": 1.903745519179402e-06, "loss": 0.0879, "step": 633900 }, { "epoch": 6.23, "grad_norm": 5.613621711730957, "learning_rate": 1.9036213967251531e-06, "loss": 0.0125, "step": 633925 }, { "epoch": 6.23, "grad_norm": 3.1575405597686768, "learning_rate": 1.9034972742709048e-06, "loss": 0.0776, "step": 633950 }, { "epoch": 6.23, "grad_norm": 2.6182215213775635, "learning_rate": 1.9033731518166564e-06, "loss": 0.0217, "step": 633975 }, { "epoch": 6.23, "grad_norm": 8.558605194091797, "learning_rate": 1.9032490293624078e-06, "loss": 0.0968, "step": 634000 }, { "epoch": 6.23, "grad_norm": 11.206354141235352, "learning_rate": 1.9031249069081595e-06, "loss": 0.026, "step": 634025 }, { "epoch": 6.23, "grad_norm": 1.705595850944519, "learning_rate": 1.903000784453911e-06, "loss": 0.0675, "step": 634050 }, { "epoch": 6.23, "grad_norm": 2.682321548461914, "learning_rate": 1.9028766619996625e-06, "loss": 0.0248, "step": 634075 }, { "epoch": 6.23, "grad_norm": 4.307684898376465, "learning_rate": 1.9027525395454142e-06, "loss": 0.0673, "step": 634100 }, { "epoch": 6.23, "grad_norm": 8.637333869934082, "learning_rate": 1.9026284170911658e-06, "loss": 0.0301, "step": 634125 }, { "epoch": 6.24, "grad_norm": 6.022593975067139, "learning_rate": 1.902504294636917e-06, "loss": 0.0921, "step": 634150 }, { "epoch": 6.24, "grad_norm": 4.146941184997559, "learning_rate": 1.9023801721826686e-06, "loss": 0.0205, "step": 634175 }, { "epoch": 6.24, "grad_norm": 0.5004733800888062, "learning_rate": 1.90225604972842e-06, "loss": 0.071, "step": 634200 }, { "epoch": 6.24, "grad_norm": 6.209214687347412, "learning_rate": 1.9021319272741717e-06, "loss": 0.0277, "step": 634225 }, { "epoch": 6.24, "grad_norm": 0.12581218779087067, "learning_rate": 1.9020078048199233e-06, "loss": 0.0876, "step": 634250 }, { "epoch": 6.24, "grad_norm": 2.2970361709594727, "learning_rate": 1.9018836823656748e-06, "loss": 0.0261, "step": 634275 }, { "epoch": 6.24, "grad_norm": 0.9276750683784485, "learning_rate": 1.9017595599114264e-06, "loss": 0.073, "step": 634300 }, { "epoch": 6.24, "grad_norm": 6.417858123779297, "learning_rate": 1.901635437457178e-06, "loss": 0.0133, "step": 634325 }, { "epoch": 6.24, "grad_norm": 1.1038131713867188, "learning_rate": 1.9015162799010995e-06, "loss": 0.099, "step": 634350 }, { "epoch": 6.24, "grad_norm": 10.80377197265625, "learning_rate": 1.9013921574468507e-06, "loss": 0.0237, "step": 634375 }, { "epoch": 6.24, "grad_norm": 0.6891948580741882, "learning_rate": 1.9012680349926023e-06, "loss": 0.0459, "step": 634400 }, { "epoch": 6.24, "grad_norm": 5.748562812805176, "learning_rate": 1.901143912538354e-06, "loss": 0.0179, "step": 634425 }, { "epoch": 6.24, "grad_norm": 3.054628372192383, "learning_rate": 1.9010197900841054e-06, "loss": 0.0818, "step": 634450 }, { "epoch": 6.24, "grad_norm": 6.9061150550842285, "learning_rate": 1.900895667629857e-06, "loss": 0.0282, "step": 634475 }, { "epoch": 6.24, "grad_norm": 4.415401935577393, "learning_rate": 1.9007715451756087e-06, "loss": 0.0687, "step": 634500 }, { "epoch": 6.24, "grad_norm": 6.554441928863525, "learning_rate": 1.90064742272136e-06, "loss": 0.0202, "step": 634525 }, { "epoch": 6.24, "grad_norm": 5.819721221923828, "learning_rate": 1.9005233002671117e-06, "loss": 0.0862, "step": 634550 }, { "epoch": 6.24, "grad_norm": 4.7737908363342285, "learning_rate": 1.9003991778128633e-06, "loss": 0.0265, "step": 634575 }, { "epoch": 6.24, "grad_norm": 5.219126224517822, "learning_rate": 1.9002750553586148e-06, "loss": 0.0801, "step": 634600 }, { "epoch": 6.24, "grad_norm": 6.1910319328308105, "learning_rate": 1.9001509329043664e-06, "loss": 0.0334, "step": 634625 }, { "epoch": 6.24, "grad_norm": 9.049346923828125, "learning_rate": 1.9000317753482879e-06, "loss": 0.0558, "step": 634650 }, { "epoch": 6.24, "grad_norm": 2.235546827316284, "learning_rate": 1.8999076528940395e-06, "loss": 0.0328, "step": 634675 }, { "epoch": 6.24, "grad_norm": 0.3366149365901947, "learning_rate": 1.8997835304397907e-06, "loss": 0.0794, "step": 634700 }, { "epoch": 6.24, "grad_norm": 6.297668933868408, "learning_rate": 1.8996594079855423e-06, "loss": 0.0199, "step": 634725 }, { "epoch": 6.24, "grad_norm": 0.7797680497169495, "learning_rate": 1.899535285531294e-06, "loss": 0.0782, "step": 634750 }, { "epoch": 6.24, "grad_norm": 10.201765060424805, "learning_rate": 1.8994111630770454e-06, "loss": 0.0474, "step": 634775 }, { "epoch": 6.24, "grad_norm": 2.2414634227752686, "learning_rate": 1.899287040622797e-06, "loss": 0.0584, "step": 634800 }, { "epoch": 6.24, "grad_norm": 1.7112783193588257, "learning_rate": 1.8991629181685487e-06, "loss": 0.0184, "step": 634825 }, { "epoch": 6.24, "grad_norm": 0.6993629932403564, "learning_rate": 1.8990387957143e-06, "loss": 0.0697, "step": 634850 }, { "epoch": 6.24, "grad_norm": 14.508816719055176, "learning_rate": 1.8989146732600517e-06, "loss": 0.0228, "step": 634875 }, { "epoch": 6.24, "grad_norm": 9.622869491577148, "learning_rate": 1.898790550805803e-06, "loss": 0.1066, "step": 634900 }, { "epoch": 6.24, "grad_norm": 12.886685371398926, "learning_rate": 1.8986664283515546e-06, "loss": 0.0339, "step": 634925 }, { "epoch": 6.24, "grad_norm": 1.4044829607009888, "learning_rate": 1.8985423058973062e-06, "loss": 0.0682, "step": 634950 }, { "epoch": 6.24, "grad_norm": 8.895995140075684, "learning_rate": 1.8984181834430576e-06, "loss": 0.0267, "step": 634975 }, { "epoch": 6.24, "grad_norm": 2.868000030517578, "learning_rate": 1.8982940609888093e-06, "loss": 0.0796, "step": 635000 }, { "epoch": 6.24, "grad_norm": 8.801563262939453, "learning_rate": 1.898169938534561e-06, "loss": 0.0383, "step": 635025 }, { "epoch": 6.24, "grad_norm": 3.5585081577301025, "learning_rate": 1.8980458160803123e-06, "loss": 0.0768, "step": 635050 }, { "epoch": 6.24, "grad_norm": 2.2684009075164795, "learning_rate": 1.897921693626064e-06, "loss": 0.0256, "step": 635075 }, { "epoch": 6.24, "grad_norm": 0.1749880015850067, "learning_rate": 1.8977975711718156e-06, "loss": 0.0773, "step": 635100 }, { "epoch": 6.24, "grad_norm": 7.0026702880859375, "learning_rate": 1.8976734487175668e-06, "loss": 0.0191, "step": 635125 }, { "epoch": 6.24, "grad_norm": 2.9671108722686768, "learning_rate": 1.8975493262633184e-06, "loss": 0.0908, "step": 635150 }, { "epoch": 6.25, "grad_norm": 10.742300987243652, "learning_rate": 1.89742520380907e-06, "loss": 0.0171, "step": 635175 }, { "epoch": 6.25, "grad_norm": 1.3431999683380127, "learning_rate": 1.8973010813548215e-06, "loss": 0.0526, "step": 635200 }, { "epoch": 6.25, "grad_norm": 20.97694206237793, "learning_rate": 1.8971769589005731e-06, "loss": 0.024, "step": 635225 }, { "epoch": 6.25, "grad_norm": 1.7679083347320557, "learning_rate": 1.8970528364463248e-06, "loss": 0.0659, "step": 635250 }, { "epoch": 6.25, "grad_norm": 8.339102745056152, "learning_rate": 1.8969287139920762e-06, "loss": 0.0248, "step": 635275 }, { "epoch": 6.25, "grad_norm": 1.8712701797485352, "learning_rate": 1.8968045915378278e-06, "loss": 0.0671, "step": 635300 }, { "epoch": 6.25, "grad_norm": 5.225051403045654, "learning_rate": 1.896680469083579e-06, "loss": 0.04, "step": 635325 }, { "epoch": 6.25, "grad_norm": 10.17164421081543, "learning_rate": 1.8965563466293307e-06, "loss": 0.0854, "step": 635350 }, { "epoch": 6.25, "grad_norm": 3.1161158084869385, "learning_rate": 1.8964322241750823e-06, "loss": 0.0269, "step": 635375 }, { "epoch": 6.25, "grad_norm": 1.7584956884384155, "learning_rate": 1.8963081017208337e-06, "loss": 0.07, "step": 635400 }, { "epoch": 6.25, "grad_norm": 14.08472728729248, "learning_rate": 1.8961839792665854e-06, "loss": 0.0316, "step": 635425 }, { "epoch": 6.25, "grad_norm": 1.6906341314315796, "learning_rate": 1.896059856812337e-06, "loss": 0.067, "step": 635450 }, { "epoch": 6.25, "grad_norm": 11.458808898925781, "learning_rate": 1.8959357343580884e-06, "loss": 0.0147, "step": 635475 }, { "epoch": 6.25, "grad_norm": 1.1062371730804443, "learning_rate": 1.89581161190384e-06, "loss": 0.0672, "step": 635500 }, { "epoch": 6.25, "grad_norm": 4.609344005584717, "learning_rate": 1.8956874894495917e-06, "loss": 0.0188, "step": 635525 }, { "epoch": 6.25, "grad_norm": 3.201962947845459, "learning_rate": 1.895563366995343e-06, "loss": 0.0737, "step": 635550 }, { "epoch": 6.25, "grad_norm": 4.588095664978027, "learning_rate": 1.8954392445410945e-06, "loss": 0.034, "step": 635575 }, { "epoch": 6.25, "grad_norm": 3.1089911460876465, "learning_rate": 1.8953151220868462e-06, "loss": 0.0778, "step": 635600 }, { "epoch": 6.25, "grad_norm": 5.153646945953369, "learning_rate": 1.8951909996325976e-06, "loss": 0.0143, "step": 635625 }, { "epoch": 6.25, "grad_norm": 0.9488770961761475, "learning_rate": 1.8950668771783492e-06, "loss": 0.0537, "step": 635650 }, { "epoch": 6.25, "grad_norm": 6.854608058929443, "learning_rate": 1.8949427547241009e-06, "loss": 0.0192, "step": 635675 }, { "epoch": 6.25, "grad_norm": 2.7989203929901123, "learning_rate": 1.8948186322698523e-06, "loss": 0.0668, "step": 635700 }, { "epoch": 6.25, "grad_norm": 8.046874046325684, "learning_rate": 1.894694509815604e-06, "loss": 0.0198, "step": 635725 }, { "epoch": 6.25, "grad_norm": 1.1903252601623535, "learning_rate": 1.8945703873613556e-06, "loss": 0.0865, "step": 635750 }, { "epoch": 6.25, "grad_norm": 9.6959228515625, "learning_rate": 1.8944462649071068e-06, "loss": 0.0164, "step": 635775 }, { "epoch": 6.25, "grad_norm": 1.5699481964111328, "learning_rate": 1.8943221424528584e-06, "loss": 0.0778, "step": 635800 }, { "epoch": 6.25, "grad_norm": 0.790337085723877, "learning_rate": 1.8941980199986098e-06, "loss": 0.0132, "step": 635825 }, { "epoch": 6.25, "grad_norm": 0.2032347470521927, "learning_rate": 1.8940738975443615e-06, "loss": 0.0756, "step": 635850 }, { "epoch": 6.25, "grad_norm": 18.54759979248047, "learning_rate": 1.893949775090113e-06, "loss": 0.0194, "step": 635875 }, { "epoch": 6.25, "grad_norm": 1.501410961151123, "learning_rate": 1.8938256526358645e-06, "loss": 0.0735, "step": 635900 }, { "epoch": 6.25, "grad_norm": 9.51794719696045, "learning_rate": 1.8937015301816162e-06, "loss": 0.0379, "step": 635925 }, { "epoch": 6.25, "grad_norm": 0.14753541350364685, "learning_rate": 1.8935774077273678e-06, "loss": 0.0982, "step": 635950 }, { "epoch": 6.25, "grad_norm": 9.48469066619873, "learning_rate": 1.8934532852731192e-06, "loss": 0.0321, "step": 635975 }, { "epoch": 6.25, "grad_norm": 0.22389519214630127, "learning_rate": 1.8933291628188706e-06, "loss": 0.0626, "step": 636000 }, { "epoch": 6.25, "grad_norm": 9.89368724822998, "learning_rate": 1.8932050403646223e-06, "loss": 0.0178, "step": 636025 }, { "epoch": 6.25, "grad_norm": 11.136506080627441, "learning_rate": 1.8930809179103737e-06, "loss": 0.0647, "step": 636050 }, { "epoch": 6.25, "grad_norm": 9.310675621032715, "learning_rate": 1.8929567954561253e-06, "loss": 0.0253, "step": 636075 }, { "epoch": 6.25, "grad_norm": 3.13419246673584, "learning_rate": 1.892832673001877e-06, "loss": 0.0583, "step": 636100 }, { "epoch": 6.25, "grad_norm": 4.006194591522217, "learning_rate": 1.8927085505476284e-06, "loss": 0.0247, "step": 636125 }, { "epoch": 6.25, "grad_norm": 1.494362473487854, "learning_rate": 1.89258442809338e-06, "loss": 0.0737, "step": 636150 }, { "epoch": 6.25, "grad_norm": 5.180790424346924, "learning_rate": 1.8924603056391317e-06, "loss": 0.0153, "step": 636175 }, { "epoch": 6.26, "grad_norm": 5.6143574714660645, "learning_rate": 1.892336183184883e-06, "loss": 0.0721, "step": 636200 }, { "epoch": 6.26, "grad_norm": 3.7157959938049316, "learning_rate": 1.8922120607306347e-06, "loss": 0.0278, "step": 636225 }, { "epoch": 6.26, "grad_norm": 0.02456396259367466, "learning_rate": 1.892087938276386e-06, "loss": 0.0719, "step": 636250 }, { "epoch": 6.26, "grad_norm": 9.265242576599121, "learning_rate": 1.8919638158221376e-06, "loss": 0.0247, "step": 636275 }, { "epoch": 6.26, "grad_norm": 0.24010518193244934, "learning_rate": 1.8918396933678892e-06, "loss": 0.06, "step": 636300 }, { "epoch": 6.26, "grad_norm": 1.982927918434143, "learning_rate": 1.8917155709136406e-06, "loss": 0.0166, "step": 636325 }, { "epoch": 6.26, "grad_norm": 1.9738850593566895, "learning_rate": 1.8915914484593923e-06, "loss": 0.0653, "step": 636350 }, { "epoch": 6.26, "grad_norm": 5.4266180992126465, "learning_rate": 1.891467326005144e-06, "loss": 0.0297, "step": 636375 }, { "epoch": 6.26, "grad_norm": 1.0821645259857178, "learning_rate": 1.8913432035508953e-06, "loss": 0.0813, "step": 636400 }, { "epoch": 6.26, "grad_norm": 3.8089404106140137, "learning_rate": 1.891219081096647e-06, "loss": 0.0224, "step": 636425 }, { "epoch": 6.26, "grad_norm": 0.7511736154556274, "learning_rate": 1.8910949586423986e-06, "loss": 0.0774, "step": 636450 }, { "epoch": 6.26, "grad_norm": 1.4852585792541504, "learning_rate": 1.8909708361881498e-06, "loss": 0.0115, "step": 636475 }, { "epoch": 6.26, "grad_norm": 1.4983688592910767, "learning_rate": 1.8908467137339014e-06, "loss": 0.0677, "step": 636500 }, { "epoch": 6.26, "grad_norm": 3.237018585205078, "learning_rate": 1.890722591279653e-06, "loss": 0.0198, "step": 636525 }, { "epoch": 6.26, "grad_norm": 5.914685249328613, "learning_rate": 1.8905984688254045e-06, "loss": 0.1141, "step": 636550 }, { "epoch": 6.26, "grad_norm": 10.515296936035156, "learning_rate": 1.8904743463711561e-06, "loss": 0.0335, "step": 636575 }, { "epoch": 6.26, "grad_norm": 1.8119499683380127, "learning_rate": 1.8903502239169078e-06, "loss": 0.0682, "step": 636600 }, { "epoch": 6.26, "grad_norm": 3.2514328956604004, "learning_rate": 1.8902261014626592e-06, "loss": 0.0249, "step": 636625 }, { "epoch": 6.26, "grad_norm": 6.733206748962402, "learning_rate": 1.8901069439065806e-06, "loss": 0.0867, "step": 636650 }, { "epoch": 6.26, "grad_norm": 6.204676151275635, "learning_rate": 1.8899828214523323e-06, "loss": 0.0157, "step": 636675 }, { "epoch": 6.26, "grad_norm": 4.865864276885986, "learning_rate": 1.889858698998084e-06, "loss": 0.0769, "step": 636700 }, { "epoch": 6.26, "grad_norm": 8.885823249816895, "learning_rate": 1.8897345765438351e-06, "loss": 0.0326, "step": 636725 }, { "epoch": 6.26, "grad_norm": 0.8083004355430603, "learning_rate": 1.8896104540895868e-06, "loss": 0.0713, "step": 636750 }, { "epoch": 6.26, "grad_norm": 5.647327899932861, "learning_rate": 1.8894863316353382e-06, "loss": 0.0118, "step": 636775 }, { "epoch": 6.26, "grad_norm": 1.9697319269180298, "learning_rate": 1.8893622091810898e-06, "loss": 0.0778, "step": 636800 }, { "epoch": 6.26, "grad_norm": 7.690530776977539, "learning_rate": 1.8892380867268414e-06, "loss": 0.0271, "step": 636825 }, { "epoch": 6.26, "grad_norm": 2.122779607772827, "learning_rate": 1.8891139642725929e-06, "loss": 0.071, "step": 636850 }, { "epoch": 6.26, "grad_norm": 1.2203552722930908, "learning_rate": 1.8889898418183445e-06, "loss": 0.0246, "step": 636875 }, { "epoch": 6.26, "grad_norm": 0.4962958097457886, "learning_rate": 1.8888657193640961e-06, "loss": 0.059, "step": 636900 }, { "epoch": 6.26, "grad_norm": 1.332220196723938, "learning_rate": 1.8887415969098474e-06, "loss": 0.017, "step": 636925 }, { "epoch": 6.26, "grad_norm": 0.5374658107757568, "learning_rate": 1.888617474455599e-06, "loss": 0.0658, "step": 636950 }, { "epoch": 6.26, "grad_norm": 7.434711456298828, "learning_rate": 1.8884933520013506e-06, "loss": 0.0269, "step": 636975 }, { "epoch": 6.26, "grad_norm": 0.894938051700592, "learning_rate": 1.888369229547102e-06, "loss": 0.0604, "step": 637000 }, { "epoch": 6.26, "grad_norm": 7.426058292388916, "learning_rate": 1.8882451070928537e-06, "loss": 0.0194, "step": 637025 }, { "epoch": 6.26, "grad_norm": 7.1265153884887695, "learning_rate": 1.8881209846386053e-06, "loss": 0.0617, "step": 637050 }, { "epoch": 6.26, "grad_norm": 1.8124053478240967, "learning_rate": 1.8879968621843567e-06, "loss": 0.0168, "step": 637075 }, { "epoch": 6.26, "grad_norm": 0.772726833820343, "learning_rate": 1.8878727397301084e-06, "loss": 0.0604, "step": 637100 }, { "epoch": 6.26, "grad_norm": 0.6987990140914917, "learning_rate": 1.88774861727586e-06, "loss": 0.0194, "step": 637125 }, { "epoch": 6.26, "grad_norm": 2.543846607208252, "learning_rate": 1.8876244948216112e-06, "loss": 0.0742, "step": 637150 }, { "epoch": 6.26, "grad_norm": 12.141887664794922, "learning_rate": 1.8875003723673629e-06, "loss": 0.0232, "step": 637175 }, { "epoch": 6.27, "grad_norm": 1.3545081615447998, "learning_rate": 1.8873762499131143e-06, "loss": 0.0895, "step": 637200 }, { "epoch": 6.27, "grad_norm": 0.5472659468650818, "learning_rate": 1.887252127458866e-06, "loss": 0.0259, "step": 637225 }, { "epoch": 6.27, "grad_norm": 4.1242523193359375, "learning_rate": 1.8871280050046175e-06, "loss": 0.0601, "step": 637250 }, { "epoch": 6.27, "grad_norm": 15.727499008178711, "learning_rate": 1.887003882550369e-06, "loss": 0.0175, "step": 637275 }, { "epoch": 6.27, "grad_norm": 4.784195899963379, "learning_rate": 1.8868797600961206e-06, "loss": 0.0759, "step": 637300 }, { "epoch": 6.27, "grad_norm": 3.705216407775879, "learning_rate": 1.8867556376418722e-06, "loss": 0.0308, "step": 637325 }, { "epoch": 6.27, "grad_norm": 1.141836404800415, "learning_rate": 1.8866315151876235e-06, "loss": 0.0732, "step": 637350 }, { "epoch": 6.27, "grad_norm": 6.384899616241455, "learning_rate": 1.886507392733375e-06, "loss": 0.0204, "step": 637375 }, { "epoch": 6.27, "grad_norm": 3.333775043487549, "learning_rate": 1.8863832702791267e-06, "loss": 0.0861, "step": 637400 }, { "epoch": 6.27, "grad_norm": 6.449635982513428, "learning_rate": 1.8862591478248781e-06, "loss": 0.019, "step": 637425 }, { "epoch": 6.27, "grad_norm": 2.613936424255371, "learning_rate": 1.8861350253706298e-06, "loss": 0.0741, "step": 637450 }, { "epoch": 6.27, "grad_norm": 6.908336639404297, "learning_rate": 1.8860109029163814e-06, "loss": 0.0257, "step": 637475 }, { "epoch": 6.27, "grad_norm": 4.11442756652832, "learning_rate": 1.8858867804621328e-06, "loss": 0.0716, "step": 637500 }, { "epoch": 6.27, "grad_norm": 4.460322380065918, "learning_rate": 1.8857626580078845e-06, "loss": 0.0161, "step": 637525 }, { "epoch": 6.27, "grad_norm": 2.212914228439331, "learning_rate": 1.8856385355536361e-06, "loss": 0.0737, "step": 637550 }, { "epoch": 6.27, "grad_norm": 9.326000213623047, "learning_rate": 1.8855144130993875e-06, "loss": 0.0185, "step": 637575 }, { "epoch": 6.27, "grad_norm": 0.320984810590744, "learning_rate": 1.8853902906451392e-06, "loss": 0.0784, "step": 637600 }, { "epoch": 6.27, "grad_norm": 4.06187105178833, "learning_rate": 1.8852661681908904e-06, "loss": 0.0303, "step": 637625 }, { "epoch": 6.27, "grad_norm": 11.832437515258789, "learning_rate": 1.885142045736642e-06, "loss": 0.0635, "step": 637650 }, { "epoch": 6.27, "grad_norm": 2.9452173709869385, "learning_rate": 1.8850179232823936e-06, "loss": 0.0241, "step": 637675 }, { "epoch": 6.27, "grad_norm": 0.5658466815948486, "learning_rate": 1.884893800828145e-06, "loss": 0.0657, "step": 637700 }, { "epoch": 6.27, "grad_norm": 12.633312225341797, "learning_rate": 1.8847696783738967e-06, "loss": 0.0246, "step": 637725 }, { "epoch": 6.27, "grad_norm": 1.3591126203536987, "learning_rate": 1.8846455559196483e-06, "loss": 0.0933, "step": 637750 }, { "epoch": 6.27, "grad_norm": 2.8105967044830322, "learning_rate": 1.8845214334653998e-06, "loss": 0.022, "step": 637775 }, { "epoch": 6.27, "grad_norm": 5.071508884429932, "learning_rate": 1.8843973110111514e-06, "loss": 0.0851, "step": 637800 }, { "epoch": 6.27, "grad_norm": 5.805490016937256, "learning_rate": 1.884273188556903e-06, "loss": 0.0162, "step": 637825 }, { "epoch": 6.27, "grad_norm": 7.269407272338867, "learning_rate": 1.8841490661026542e-06, "loss": 0.0608, "step": 637850 }, { "epoch": 6.27, "grad_norm": 8.499427795410156, "learning_rate": 1.8840249436484059e-06, "loss": 0.0215, "step": 637875 }, { "epoch": 6.27, "grad_norm": 42.49549102783203, "learning_rate": 1.8839008211941575e-06, "loss": 0.09, "step": 637900 }, { "epoch": 6.27, "grad_norm": 12.37423324584961, "learning_rate": 1.883776698739909e-06, "loss": 0.0269, "step": 637925 }, { "epoch": 6.27, "grad_norm": 2.7868764400482178, "learning_rate": 1.8836525762856606e-06, "loss": 0.0801, "step": 637950 }, { "epoch": 6.27, "grad_norm": 6.473153114318848, "learning_rate": 1.8835284538314122e-06, "loss": 0.0217, "step": 637975 }, { "epoch": 6.27, "grad_norm": 0.13560619950294495, "learning_rate": 1.8834043313771636e-06, "loss": 0.0894, "step": 638000 }, { "epoch": 6.27, "grad_norm": 1.5199660062789917, "learning_rate": 1.8832802089229153e-06, "loss": 0.0143, "step": 638025 }, { "epoch": 6.27, "grad_norm": 2.1057350635528564, "learning_rate": 1.8831560864686665e-06, "loss": 0.0757, "step": 638050 }, { "epoch": 6.27, "grad_norm": 19.067153930664062, "learning_rate": 1.8830319640144181e-06, "loss": 0.036, "step": 638075 }, { "epoch": 6.27, "grad_norm": 1.6802620887756348, "learning_rate": 1.8829078415601697e-06, "loss": 0.0654, "step": 638100 }, { "epoch": 6.27, "grad_norm": 11.816595077514648, "learning_rate": 1.8827837191059212e-06, "loss": 0.0174, "step": 638125 }, { "epoch": 6.27, "grad_norm": 2.766535520553589, "learning_rate": 1.8826595966516728e-06, "loss": 0.0865, "step": 638150 }, { "epoch": 6.27, "grad_norm": 0.5540053248405457, "learning_rate": 1.8825354741974244e-06, "loss": 0.0162, "step": 638175 }, { "epoch": 6.27, "grad_norm": 4.704269886016846, "learning_rate": 1.8824113517431759e-06, "loss": 0.0725, "step": 638200 }, { "epoch": 6.28, "grad_norm": 9.470972061157227, "learning_rate": 1.8822872292889275e-06, "loss": 0.0206, "step": 638225 }, { "epoch": 6.28, "grad_norm": 3.6439764499664307, "learning_rate": 1.8821631068346791e-06, "loss": 0.0736, "step": 638250 }, { "epoch": 6.28, "grad_norm": 4.8348612785339355, "learning_rate": 1.8820389843804303e-06, "loss": 0.0266, "step": 638275 }, { "epoch": 6.28, "grad_norm": 0.16847656667232513, "learning_rate": 1.881914861926182e-06, "loss": 0.0671, "step": 638300 }, { "epoch": 6.28, "grad_norm": 22.506196975708008, "learning_rate": 1.8817907394719336e-06, "loss": 0.0284, "step": 638325 }, { "epoch": 6.28, "grad_norm": 2.0394532680511475, "learning_rate": 1.881666617017685e-06, "loss": 0.084, "step": 638350 }, { "epoch": 6.28, "grad_norm": 10.057750701904297, "learning_rate": 1.8815424945634367e-06, "loss": 0.0257, "step": 638375 }, { "epoch": 6.28, "grad_norm": 0.21987031400203705, "learning_rate": 1.8814183721091883e-06, "loss": 0.0813, "step": 638400 }, { "epoch": 6.28, "grad_norm": 14.100370407104492, "learning_rate": 1.8812942496549397e-06, "loss": 0.0286, "step": 638425 }, { "epoch": 6.28, "grad_norm": 1.6476832628250122, "learning_rate": 1.8811701272006914e-06, "loss": 0.0722, "step": 638450 }, { "epoch": 6.28, "grad_norm": 8.89886474609375, "learning_rate": 1.8810460047464426e-06, "loss": 0.0188, "step": 638475 }, { "epoch": 6.28, "grad_norm": 6.615756034851074, "learning_rate": 1.8809218822921942e-06, "loss": 0.0814, "step": 638500 }, { "epoch": 6.28, "grad_norm": 13.929288864135742, "learning_rate": 1.8807977598379458e-06, "loss": 0.0371, "step": 638525 }, { "epoch": 6.28, "grad_norm": 4.524999141693115, "learning_rate": 1.8806736373836973e-06, "loss": 0.0836, "step": 638550 }, { "epoch": 6.28, "grad_norm": 2.966118574142456, "learning_rate": 1.880549514929449e-06, "loss": 0.0238, "step": 638575 }, { "epoch": 6.28, "grad_norm": 4.633153438568115, "learning_rate": 1.8804253924752005e-06, "loss": 0.0553, "step": 638600 }, { "epoch": 6.28, "grad_norm": 5.768560409545898, "learning_rate": 1.880301270020952e-06, "loss": 0.0278, "step": 638625 }, { "epoch": 6.28, "grad_norm": 3.20228910446167, "learning_rate": 1.8801771475667036e-06, "loss": 0.0732, "step": 638650 }, { "epoch": 6.28, "grad_norm": 4.29399299621582, "learning_rate": 1.8800530251124552e-06, "loss": 0.0319, "step": 638675 }, { "epoch": 6.28, "grad_norm": 0.1288851797580719, "learning_rate": 1.8799289026582064e-06, "loss": 0.0618, "step": 638700 }, { "epoch": 6.28, "grad_norm": 4.000925540924072, "learning_rate": 1.879804780203958e-06, "loss": 0.0233, "step": 638725 }, { "epoch": 6.28, "grad_norm": 2.0326242446899414, "learning_rate": 1.8796806577497097e-06, "loss": 0.074, "step": 638750 }, { "epoch": 6.28, "grad_norm": 4.008978843688965, "learning_rate": 1.8795565352954611e-06, "loss": 0.0282, "step": 638775 }, { "epoch": 6.28, "grad_norm": 0.9959186911582947, "learning_rate": 1.8794324128412128e-06, "loss": 0.0859, "step": 638800 }, { "epoch": 6.28, "grad_norm": 13.12017822265625, "learning_rate": 1.8793082903869644e-06, "loss": 0.0172, "step": 638825 }, { "epoch": 6.28, "grad_norm": 2.918267011642456, "learning_rate": 1.8791841679327158e-06, "loss": 0.0674, "step": 638850 }, { "epoch": 6.28, "grad_norm": 11.361572265625, "learning_rate": 1.8790600454784675e-06, "loss": 0.0247, "step": 638875 }, { "epoch": 6.28, "grad_norm": 2.9996464252471924, "learning_rate": 1.8789359230242189e-06, "loss": 0.0724, "step": 638900 }, { "epoch": 6.28, "grad_norm": 18.08580780029297, "learning_rate": 1.8788118005699705e-06, "loss": 0.0241, "step": 638925 }, { "epoch": 6.28, "grad_norm": 6.197023868560791, "learning_rate": 1.8786876781157222e-06, "loss": 0.0733, "step": 638950 }, { "epoch": 6.28, "grad_norm": 9.757076263427734, "learning_rate": 1.8785635556614734e-06, "loss": 0.0103, "step": 638975 }, { "epoch": 6.28, "grad_norm": 2.3316080570220947, "learning_rate": 1.878439433207225e-06, "loss": 0.0672, "step": 639000 }, { "epoch": 6.28, "grad_norm": 6.19206428527832, "learning_rate": 1.8783153107529766e-06, "loss": 0.0169, "step": 639025 }, { "epoch": 6.28, "grad_norm": 3.07037091255188, "learning_rate": 1.878191188298728e-06, "loss": 0.0607, "step": 639050 }, { "epoch": 6.28, "grad_norm": 4.606189250946045, "learning_rate": 1.8780670658444797e-06, "loss": 0.0374, "step": 639075 }, { "epoch": 6.28, "grad_norm": 6.600356101989746, "learning_rate": 1.8779429433902313e-06, "loss": 0.0599, "step": 639100 }, { "epoch": 6.28, "grad_norm": 6.414056777954102, "learning_rate": 1.8778188209359828e-06, "loss": 0.0196, "step": 639125 }, { "epoch": 6.28, "grad_norm": 1.3058515787124634, "learning_rate": 1.8776946984817344e-06, "loss": 0.0768, "step": 639150 }, { "epoch": 6.28, "grad_norm": 1.7374366521835327, "learning_rate": 1.877570576027486e-06, "loss": 0.0213, "step": 639175 }, { "epoch": 6.28, "grad_norm": 2.806626081466675, "learning_rate": 1.8774464535732372e-06, "loss": 0.0584, "step": 639200 }, { "epoch": 6.28, "grad_norm": 11.028944969177246, "learning_rate": 1.8773223311189889e-06, "loss": 0.0289, "step": 639225 }, { "epoch": 6.29, "grad_norm": 0.9399747848510742, "learning_rate": 1.8771982086647405e-06, "loss": 0.0721, "step": 639250 }, { "epoch": 6.29, "grad_norm": 17.460906982421875, "learning_rate": 1.877074086210492e-06, "loss": 0.0292, "step": 639275 }, { "epoch": 6.29, "grad_norm": 1.1211670637130737, "learning_rate": 1.8769499637562436e-06, "loss": 0.0604, "step": 639300 }, { "epoch": 6.29, "grad_norm": 7.635992527008057, "learning_rate": 1.8768258413019952e-06, "loss": 0.0185, "step": 639325 }, { "epoch": 6.29, "grad_norm": 3.161825656890869, "learning_rate": 1.8767017188477466e-06, "loss": 0.0612, "step": 639350 }, { "epoch": 6.29, "grad_norm": 1.824705958366394, "learning_rate": 1.8765775963934983e-06, "loss": 0.0119, "step": 639375 }, { "epoch": 6.29, "grad_norm": 0.1458786576986313, "learning_rate": 1.8764534739392495e-06, "loss": 0.0938, "step": 639400 }, { "epoch": 6.29, "grad_norm": 2.096001625061035, "learning_rate": 1.8763293514850011e-06, "loss": 0.0108, "step": 639425 }, { "epoch": 6.29, "grad_norm": 3.088406562805176, "learning_rate": 1.8762052290307527e-06, "loss": 0.0847, "step": 639450 }, { "epoch": 6.29, "grad_norm": 0.3689174950122833, "learning_rate": 1.8760811065765042e-06, "loss": 0.0231, "step": 639475 }, { "epoch": 6.29, "grad_norm": 4.200552940368652, "learning_rate": 1.8759569841222558e-06, "loss": 0.0526, "step": 639500 }, { "epoch": 6.29, "grad_norm": 41.28206253051758, "learning_rate": 1.8758328616680074e-06, "loss": 0.0337, "step": 639525 }, { "epoch": 6.29, "grad_norm": 4.796481132507324, "learning_rate": 1.8757087392137589e-06, "loss": 0.0789, "step": 639550 }, { "epoch": 6.29, "grad_norm": 18.597211837768555, "learning_rate": 1.8755846167595105e-06, "loss": 0.0135, "step": 639575 }, { "epoch": 6.29, "grad_norm": 0.7305350303649902, "learning_rate": 1.875465459203432e-06, "loss": 0.0657, "step": 639600 }, { "epoch": 6.29, "grad_norm": 19.906980514526367, "learning_rate": 1.8753413367491836e-06, "loss": 0.031, "step": 639625 }, { "epoch": 6.29, "grad_norm": 2.3944950103759766, "learning_rate": 1.8752172142949348e-06, "loss": 0.0829, "step": 639650 }, { "epoch": 6.29, "grad_norm": 14.542192459106445, "learning_rate": 1.8750930918406864e-06, "loss": 0.021, "step": 639675 }, { "epoch": 6.29, "grad_norm": 0.08337482064962387, "learning_rate": 1.874968969386438e-06, "loss": 0.0851, "step": 639700 }, { "epoch": 6.29, "grad_norm": 3.8998072147369385, "learning_rate": 1.8748448469321895e-06, "loss": 0.0277, "step": 639725 }, { "epoch": 6.29, "grad_norm": 0.030061226338148117, "learning_rate": 1.8747207244779411e-06, "loss": 0.08, "step": 639750 }, { "epoch": 6.29, "grad_norm": 5.108060359954834, "learning_rate": 1.8745966020236928e-06, "loss": 0.0275, "step": 639775 }, { "epoch": 6.29, "grad_norm": 3.9115331172943115, "learning_rate": 1.8744724795694442e-06, "loss": 0.0663, "step": 639800 }, { "epoch": 6.29, "grad_norm": 0.3124834895133972, "learning_rate": 1.8743483571151958e-06, "loss": 0.0213, "step": 639825 }, { "epoch": 6.29, "grad_norm": 2.2444915771484375, "learning_rate": 1.8742242346609474e-06, "loss": 0.0879, "step": 639850 }, { "epoch": 6.29, "grad_norm": 9.586512565612793, "learning_rate": 1.8741001122066987e-06, "loss": 0.0229, "step": 639875 }, { "epoch": 6.29, "grad_norm": 0.11571008712053299, "learning_rate": 1.8739759897524503e-06, "loss": 0.0595, "step": 639900 }, { "epoch": 6.29, "grad_norm": 13.831486701965332, "learning_rate": 1.8738518672982017e-06, "loss": 0.0189, "step": 639925 }, { "epoch": 6.29, "grad_norm": 0.5701013803482056, "learning_rate": 1.8737277448439534e-06, "loss": 0.0668, "step": 639950 }, { "epoch": 6.29, "grad_norm": 1.5513275861740112, "learning_rate": 1.873603622389705e-06, "loss": 0.02, "step": 639975 }, { "epoch": 6.29, "grad_norm": 1.9637823104858398, "learning_rate": 1.8734794999354564e-06, "loss": 0.0664, "step": 640000 }, { "epoch": 6.29, "eval_loss": 0.8405264019966125, "eval_runtime": 6106.6279, "eval_samples_per_second": 1.55, "eval_steps_per_second": 0.194, "eval_wer": 0.11234146655507629, "step": 640000 }, { "epoch": 6.29, "grad_norm": 10.9160737991333, "learning_rate": 1.873355377481208e-06, "loss": 0.0253, "step": 640025 }, { "epoch": 6.29, "grad_norm": 2.300380229949951, "learning_rate": 1.8732312550269597e-06, "loss": 0.0947, "step": 640050 }, { "epoch": 6.29, "grad_norm": 9.649073600769043, "learning_rate": 1.8731071325727109e-06, "loss": 0.0257, "step": 640075 }, { "epoch": 6.29, "grad_norm": 2.194783926010132, "learning_rate": 1.8729830101184625e-06, "loss": 0.0764, "step": 640100 }, { "epoch": 6.29, "grad_norm": 6.694711208343506, "learning_rate": 1.8728588876642142e-06, "loss": 0.0233, "step": 640125 }, { "epoch": 6.29, "grad_norm": 0.09673479199409485, "learning_rate": 1.8727347652099656e-06, "loss": 0.0715, "step": 640150 }, { "epoch": 6.29, "grad_norm": 1.8390759229660034, "learning_rate": 1.8726106427557172e-06, "loss": 0.0189, "step": 640175 }, { "epoch": 6.29, "grad_norm": 0.26788946986198425, "learning_rate": 1.8724865203014689e-06, "loss": 0.084, "step": 640200 }, { "epoch": 6.29, "grad_norm": 6.845200538635254, "learning_rate": 1.8723623978472203e-06, "loss": 0.0258, "step": 640225 }, { "epoch": 6.3, "grad_norm": 4.569677829742432, "learning_rate": 1.872238275392972e-06, "loss": 0.0555, "step": 640250 }, { "epoch": 6.3, "grad_norm": 8.607641220092773, "learning_rate": 1.8721141529387235e-06, "loss": 0.0188, "step": 640275 }, { "epoch": 6.3, "grad_norm": 0.33515259623527527, "learning_rate": 1.8719900304844748e-06, "loss": 0.0692, "step": 640300 }, { "epoch": 6.3, "grad_norm": 9.513191223144531, "learning_rate": 1.8718659080302264e-06, "loss": 0.0127, "step": 640325 }, { "epoch": 6.3, "grad_norm": 1.608094573020935, "learning_rate": 1.8717417855759778e-06, "loss": 0.0963, "step": 640350 }, { "epoch": 6.3, "grad_norm": 8.013371467590332, "learning_rate": 1.8716176631217295e-06, "loss": 0.0227, "step": 640375 }, { "epoch": 6.3, "grad_norm": 4.29339599609375, "learning_rate": 1.871493540667481e-06, "loss": 0.0905, "step": 640400 }, { "epoch": 6.3, "grad_norm": 7.482349872589111, "learning_rate": 1.8713694182132325e-06, "loss": 0.0351, "step": 640425 }, { "epoch": 6.3, "grad_norm": 2.545921564102173, "learning_rate": 1.8712452957589841e-06, "loss": 0.0966, "step": 640450 }, { "epoch": 6.3, "grad_norm": 1.609297752380371, "learning_rate": 1.8711211733047358e-06, "loss": 0.0176, "step": 640475 }, { "epoch": 6.3, "grad_norm": 3.119892120361328, "learning_rate": 1.8709970508504872e-06, "loss": 0.0798, "step": 640500 }, { "epoch": 6.3, "grad_norm": 5.398599147796631, "learning_rate": 1.8708729283962388e-06, "loss": 0.0276, "step": 640525 }, { "epoch": 6.3, "grad_norm": 2.0978634357452393, "learning_rate": 1.8707488059419905e-06, "loss": 0.0773, "step": 640550 }, { "epoch": 6.3, "grad_norm": 0.9635225534439087, "learning_rate": 1.8706246834877417e-06, "loss": 0.0288, "step": 640575 }, { "epoch": 6.3, "grad_norm": 0.8969765305519104, "learning_rate": 1.8705005610334933e-06, "loss": 0.074, "step": 640600 }, { "epoch": 6.3, "grad_norm": 5.930145740509033, "learning_rate": 1.870376438579245e-06, "loss": 0.0133, "step": 640625 }, { "epoch": 6.3, "grad_norm": 9.837972640991211, "learning_rate": 1.8702523161249964e-06, "loss": 0.0757, "step": 640650 }, { "epoch": 6.3, "grad_norm": 4.943224906921387, "learning_rate": 1.870128193670748e-06, "loss": 0.0224, "step": 640675 }, { "epoch": 6.3, "grad_norm": 6.3691558837890625, "learning_rate": 1.8700040712164996e-06, "loss": 0.0799, "step": 640700 }, { "epoch": 6.3, "grad_norm": 0.7148980498313904, "learning_rate": 1.869879948762251e-06, "loss": 0.0096, "step": 640725 }, { "epoch": 6.3, "grad_norm": 0.16905035078525543, "learning_rate": 1.8697558263080027e-06, "loss": 0.0912, "step": 640750 }, { "epoch": 6.3, "grad_norm": 1.3329801559448242, "learning_rate": 1.869631703853754e-06, "loss": 0.0196, "step": 640775 }, { "epoch": 6.3, "grad_norm": 1.9165784120559692, "learning_rate": 1.8695075813995056e-06, "loss": 0.0753, "step": 640800 }, { "epoch": 6.3, "grad_norm": 3.222196578979492, "learning_rate": 1.8693834589452572e-06, "loss": 0.0196, "step": 640825 }, { "epoch": 6.3, "grad_norm": 9.512126922607422, "learning_rate": 1.8692593364910086e-06, "loss": 0.0611, "step": 640850 }, { "epoch": 6.3, "grad_norm": 8.45659351348877, "learning_rate": 1.8691352140367602e-06, "loss": 0.0152, "step": 640875 }, { "epoch": 6.3, "grad_norm": 1.4260327816009521, "learning_rate": 1.8690110915825119e-06, "loss": 0.0861, "step": 640900 }, { "epoch": 6.3, "grad_norm": 4.0621137619018555, "learning_rate": 1.8688869691282633e-06, "loss": 0.0177, "step": 640925 }, { "epoch": 6.3, "grad_norm": 8.806093215942383, "learning_rate": 1.868762846674015e-06, "loss": 0.089, "step": 640950 }, { "epoch": 6.3, "grad_norm": 4.593910217285156, "learning_rate": 1.8686387242197666e-06, "loss": 0.0181, "step": 640975 }, { "epoch": 6.3, "grad_norm": 1.5950504541397095, "learning_rate": 1.8685146017655178e-06, "loss": 0.08, "step": 641000 }, { "epoch": 6.3, "grad_norm": 6.203390598297119, "learning_rate": 1.8683904793112694e-06, "loss": 0.0192, "step": 641025 }, { "epoch": 6.3, "grad_norm": 1.5507639646530151, "learning_rate": 1.868266356857021e-06, "loss": 0.0928, "step": 641050 }, { "epoch": 6.3, "grad_norm": 0.8821220993995667, "learning_rate": 1.8681422344027725e-06, "loss": 0.0162, "step": 641075 }, { "epoch": 6.3, "grad_norm": 0.367529958486557, "learning_rate": 1.8680181119485241e-06, "loss": 0.0807, "step": 641100 }, { "epoch": 6.3, "grad_norm": 2.548924684524536, "learning_rate": 1.8678939894942757e-06, "loss": 0.0293, "step": 641125 }, { "epoch": 6.3, "grad_norm": 2.0974953174591064, "learning_rate": 1.8677698670400272e-06, "loss": 0.0869, "step": 641150 }, { "epoch": 6.3, "grad_norm": 2.5651345252990723, "learning_rate": 1.8676457445857788e-06, "loss": 0.0175, "step": 641175 }, { "epoch": 6.3, "grad_norm": 3.951756000518799, "learning_rate": 1.86752162213153e-06, "loss": 0.0872, "step": 641200 }, { "epoch": 6.3, "grad_norm": 12.767733573913574, "learning_rate": 1.8673974996772817e-06, "loss": 0.0344, "step": 641225 }, { "epoch": 6.3, "grad_norm": 3.7531511783599854, "learning_rate": 1.8672733772230333e-06, "loss": 0.0787, "step": 641250 }, { "epoch": 6.31, "grad_norm": 7.500667095184326, "learning_rate": 1.8671492547687847e-06, "loss": 0.0244, "step": 641275 }, { "epoch": 6.31, "grad_norm": 1.2889235019683838, "learning_rate": 1.8670251323145363e-06, "loss": 0.1024, "step": 641300 }, { "epoch": 6.31, "grad_norm": 9.116006851196289, "learning_rate": 1.866901009860288e-06, "loss": 0.0253, "step": 641325 }, { "epoch": 6.31, "grad_norm": 7.42191743850708, "learning_rate": 1.8667768874060394e-06, "loss": 0.0881, "step": 641350 }, { "epoch": 6.31, "grad_norm": 11.740232467651367, "learning_rate": 1.866652764951791e-06, "loss": 0.0252, "step": 641375 }, { "epoch": 6.31, "grad_norm": 1.533945918083191, "learning_rate": 1.8665286424975427e-06, "loss": 0.0799, "step": 641400 }, { "epoch": 6.31, "grad_norm": 4.034668445587158, "learning_rate": 1.8664045200432939e-06, "loss": 0.0247, "step": 641425 }, { "epoch": 6.31, "grad_norm": 1.8157824277877808, "learning_rate": 1.8662803975890455e-06, "loss": 0.0991, "step": 641450 }, { "epoch": 6.31, "grad_norm": 7.922022342681885, "learning_rate": 1.8661562751347972e-06, "loss": 0.0156, "step": 641475 }, { "epoch": 6.31, "grad_norm": 3.417571783065796, "learning_rate": 1.8660321526805486e-06, "loss": 0.0812, "step": 641500 }, { "epoch": 6.31, "grad_norm": 6.922976016998291, "learning_rate": 1.8659080302263002e-06, "loss": 0.0293, "step": 641525 }, { "epoch": 6.31, "grad_norm": 6.805006980895996, "learning_rate": 1.8657839077720519e-06, "loss": 0.0823, "step": 641550 }, { "epoch": 6.31, "grad_norm": 7.501502513885498, "learning_rate": 1.8656597853178033e-06, "loss": 0.0163, "step": 641575 }, { "epoch": 6.31, "grad_norm": 3.3209543228149414, "learning_rate": 1.865535662863555e-06, "loss": 0.0658, "step": 641600 }, { "epoch": 6.31, "grad_norm": 17.592906951904297, "learning_rate": 1.8654115404093061e-06, "loss": 0.0214, "step": 641625 }, { "epoch": 6.31, "grad_norm": 6.315785884857178, "learning_rate": 1.8652874179550578e-06, "loss": 0.0819, "step": 641650 }, { "epoch": 6.31, "grad_norm": 12.848326683044434, "learning_rate": 1.8651632955008094e-06, "loss": 0.0247, "step": 641675 }, { "epoch": 6.31, "grad_norm": 1.3097968101501465, "learning_rate": 1.8650391730465608e-06, "loss": 0.0972, "step": 641700 }, { "epoch": 6.31, "grad_norm": 2.272693634033203, "learning_rate": 1.8649150505923124e-06, "loss": 0.0198, "step": 641725 }, { "epoch": 6.31, "grad_norm": 4.513689994812012, "learning_rate": 1.864790928138064e-06, "loss": 0.0947, "step": 641750 }, { "epoch": 6.31, "grad_norm": 11.15023136138916, "learning_rate": 1.8646668056838155e-06, "loss": 0.0247, "step": 641775 }, { "epoch": 6.31, "grad_norm": 0.26488828659057617, "learning_rate": 1.8645426832295671e-06, "loss": 0.0757, "step": 641800 }, { "epoch": 6.31, "grad_norm": 3.837021827697754, "learning_rate": 1.8644185607753188e-06, "loss": 0.0191, "step": 641825 }, { "epoch": 6.31, "grad_norm": 1.7477805614471436, "learning_rate": 1.8642944383210702e-06, "loss": 0.0554, "step": 641850 }, { "epoch": 6.31, "grad_norm": 14.274421691894531, "learning_rate": 1.8641703158668218e-06, "loss": 0.0302, "step": 641875 }, { "epoch": 6.31, "grad_norm": 1.0508400201797485, "learning_rate": 1.8640461934125735e-06, "loss": 0.0504, "step": 641900 }, { "epoch": 6.31, "grad_norm": 16.1198787689209, "learning_rate": 1.8639220709583247e-06, "loss": 0.02, "step": 641925 }, { "epoch": 6.31, "grad_norm": 6.014122486114502, "learning_rate": 1.8637979485040763e-06, "loss": 0.1003, "step": 641950 }, { "epoch": 6.31, "grad_norm": 10.87615966796875, "learning_rate": 1.863673826049828e-06, "loss": 0.0205, "step": 641975 }, { "epoch": 6.31, "grad_norm": 2.963895320892334, "learning_rate": 1.8635497035955794e-06, "loss": 0.0677, "step": 642000 }, { "epoch": 6.31, "grad_norm": 5.7193217277526855, "learning_rate": 1.863425581141331e-06, "loss": 0.0264, "step": 642025 }, { "epoch": 6.31, "grad_norm": 0.41892576217651367, "learning_rate": 1.8633064235852525e-06, "loss": 0.1004, "step": 642050 }, { "epoch": 6.31, "grad_norm": 8.270724296569824, "learning_rate": 1.863182301131004e-06, "loss": 0.0269, "step": 642075 }, { "epoch": 6.31, "grad_norm": 0.9609820246696472, "learning_rate": 1.8630581786767555e-06, "loss": 0.0637, "step": 642100 }, { "epoch": 6.31, "grad_norm": 14.569048881530762, "learning_rate": 1.8629340562225072e-06, "loss": 0.0317, "step": 642125 }, { "epoch": 6.31, "grad_norm": 0.9440339803695679, "learning_rate": 1.8628099337682588e-06, "loss": 0.0585, "step": 642150 }, { "epoch": 6.31, "grad_norm": 4.878399848937988, "learning_rate": 1.86268581131401e-06, "loss": 0.0208, "step": 642175 }, { "epoch": 6.31, "grad_norm": 2.1394529342651367, "learning_rate": 1.8625616888597616e-06, "loss": 0.0518, "step": 642200 }, { "epoch": 6.31, "grad_norm": 8.037496566772461, "learning_rate": 1.862437566405513e-06, "loss": 0.0249, "step": 642225 }, { "epoch": 6.31, "grad_norm": 2.5408103466033936, "learning_rate": 1.8623134439512647e-06, "loss": 0.0775, "step": 642250 }, { "epoch": 6.31, "grad_norm": 5.923258304595947, "learning_rate": 1.8621893214970163e-06, "loss": 0.0261, "step": 642275 }, { "epoch": 6.32, "grad_norm": 1.7347290515899658, "learning_rate": 1.8620651990427678e-06, "loss": 0.0916, "step": 642300 }, { "epoch": 6.32, "grad_norm": 4.904514789581299, "learning_rate": 1.8619410765885194e-06, "loss": 0.026, "step": 642325 }, { "epoch": 6.32, "grad_norm": 5.637156963348389, "learning_rate": 1.861816954134271e-06, "loss": 0.087, "step": 642350 }, { "epoch": 6.32, "grad_norm": 5.302247047424316, "learning_rate": 1.8616928316800222e-06, "loss": 0.0433, "step": 642375 }, { "epoch": 6.32, "grad_norm": 2.128969430923462, "learning_rate": 1.8615687092257739e-06, "loss": 0.0798, "step": 642400 }, { "epoch": 6.32, "grad_norm": 10.318182945251465, "learning_rate": 1.8614445867715255e-06, "loss": 0.0208, "step": 642425 }, { "epoch": 6.32, "grad_norm": 7.473826885223389, "learning_rate": 1.861320464317277e-06, "loss": 0.0669, "step": 642450 }, { "epoch": 6.32, "grad_norm": 6.789175033569336, "learning_rate": 1.8611963418630286e-06, "loss": 0.0168, "step": 642475 }, { "epoch": 6.32, "grad_norm": 2.3600354194641113, "learning_rate": 1.8610722194087802e-06, "loss": 0.0773, "step": 642500 }, { "epoch": 6.32, "grad_norm": 7.170378684997559, "learning_rate": 1.8609480969545316e-06, "loss": 0.0311, "step": 642525 }, { "epoch": 6.32, "grad_norm": 5.028348445892334, "learning_rate": 1.8608239745002833e-06, "loss": 0.0715, "step": 642550 }, { "epoch": 6.32, "grad_norm": 13.9370756149292, "learning_rate": 1.8606998520460349e-06, "loss": 0.0253, "step": 642575 }, { "epoch": 6.32, "grad_norm": 2.759216070175171, "learning_rate": 1.860575729591786e-06, "loss": 0.0658, "step": 642600 }, { "epoch": 6.32, "grad_norm": 8.366086959838867, "learning_rate": 1.8604516071375377e-06, "loss": 0.0214, "step": 642625 }, { "epoch": 6.32, "grad_norm": 4.418929576873779, "learning_rate": 1.8603274846832892e-06, "loss": 0.0801, "step": 642650 }, { "epoch": 6.32, "grad_norm": 1.864732265472412, "learning_rate": 1.8602033622290408e-06, "loss": 0.021, "step": 642675 }, { "epoch": 6.32, "grad_norm": 4.875914096832275, "learning_rate": 1.8600792397747924e-06, "loss": 0.0702, "step": 642700 }, { "epoch": 6.32, "grad_norm": 6.892351150512695, "learning_rate": 1.8599551173205439e-06, "loss": 0.022, "step": 642725 }, { "epoch": 6.32, "grad_norm": 3.447902202606201, "learning_rate": 1.8598309948662955e-06, "loss": 0.0869, "step": 642750 }, { "epoch": 6.32, "grad_norm": 2.1766209602355957, "learning_rate": 1.8597068724120471e-06, "loss": 0.0116, "step": 642775 }, { "epoch": 6.32, "grad_norm": 2.697312355041504, "learning_rate": 1.8595827499577983e-06, "loss": 0.07, "step": 642800 }, { "epoch": 6.32, "grad_norm": 5.5121684074401855, "learning_rate": 1.85945862750355e-06, "loss": 0.033, "step": 642825 }, { "epoch": 6.32, "grad_norm": 0.15268242359161377, "learning_rate": 1.8593345050493016e-06, "loss": 0.062, "step": 642850 }, { "epoch": 6.32, "grad_norm": 11.709579467773438, "learning_rate": 1.859210382595053e-06, "loss": 0.0185, "step": 642875 }, { "epoch": 6.32, "grad_norm": 3.651230812072754, "learning_rate": 1.8590862601408047e-06, "loss": 0.0659, "step": 642900 }, { "epoch": 6.32, "grad_norm": 2.591269016265869, "learning_rate": 1.8589621376865563e-06, "loss": 0.02, "step": 642925 }, { "epoch": 6.32, "grad_norm": 2.5765206813812256, "learning_rate": 1.8588380152323077e-06, "loss": 0.0868, "step": 642950 }, { "epoch": 6.32, "grad_norm": 8.638176918029785, "learning_rate": 1.8587138927780594e-06, "loss": 0.0296, "step": 642975 }, { "epoch": 6.32, "grad_norm": 1.0436890125274658, "learning_rate": 1.858589770323811e-06, "loss": 0.0809, "step": 643000 }, { "epoch": 6.32, "grad_norm": 11.09461498260498, "learning_rate": 1.8584656478695622e-06, "loss": 0.0115, "step": 643025 }, { "epoch": 6.32, "grad_norm": 0.038770005106925964, "learning_rate": 1.8583415254153138e-06, "loss": 0.066, "step": 643050 }, { "epoch": 6.32, "grad_norm": 2.0917935371398926, "learning_rate": 1.8582174029610653e-06, "loss": 0.0267, "step": 643075 }, { "epoch": 6.32, "grad_norm": 2.9250295162200928, "learning_rate": 1.8580932805068169e-06, "loss": 0.0932, "step": 643100 }, { "epoch": 6.32, "grad_norm": 5.38895320892334, "learning_rate": 1.8579691580525685e-06, "loss": 0.0132, "step": 643125 }, { "epoch": 6.32, "grad_norm": 2.091967821121216, "learning_rate": 1.85784503559832e-06, "loss": 0.0733, "step": 643150 }, { "epoch": 6.32, "grad_norm": 4.949068069458008, "learning_rate": 1.8577209131440716e-06, "loss": 0.0168, "step": 643175 }, { "epoch": 6.32, "grad_norm": 0.059819888323545456, "learning_rate": 1.8575967906898232e-06, "loss": 0.0826, "step": 643200 }, { "epoch": 6.32, "grad_norm": 4.4097795486450195, "learning_rate": 1.8574726682355744e-06, "loss": 0.0106, "step": 643225 }, { "epoch": 6.32, "grad_norm": 1.3018684387207031, "learning_rate": 1.857348545781326e-06, "loss": 0.0729, "step": 643250 }, { "epoch": 6.32, "grad_norm": 5.265246391296387, "learning_rate": 1.8572244233270777e-06, "loss": 0.0237, "step": 643275 }, { "epoch": 6.33, "grad_norm": 0.7549963593482971, "learning_rate": 1.8571003008728291e-06, "loss": 0.0888, "step": 643300 }, { "epoch": 6.33, "grad_norm": 8.956608772277832, "learning_rate": 1.8569761784185808e-06, "loss": 0.0393, "step": 643325 }, { "epoch": 6.33, "grad_norm": 1.6579086780548096, "learning_rate": 1.8568520559643324e-06, "loss": 0.0666, "step": 643350 }, { "epoch": 6.33, "grad_norm": 0.9685744047164917, "learning_rate": 1.8567279335100838e-06, "loss": 0.0206, "step": 643375 }, { "epoch": 6.33, "grad_norm": 0.5398887991905212, "learning_rate": 1.8566038110558355e-06, "loss": 0.0533, "step": 643400 }, { "epoch": 6.33, "grad_norm": 19.335119247436523, "learning_rate": 1.856479688601587e-06, "loss": 0.0205, "step": 643425 }, { "epoch": 6.33, "grad_norm": 0.49622872471809387, "learning_rate": 1.8563555661473385e-06, "loss": 0.0763, "step": 643450 }, { "epoch": 6.33, "grad_norm": 5.887663841247559, "learning_rate": 1.8562314436930901e-06, "loss": 0.0243, "step": 643475 }, { "epoch": 6.33, "grad_norm": 7.341365337371826, "learning_rate": 1.8561073212388414e-06, "loss": 0.0566, "step": 643500 }, { "epoch": 6.33, "grad_norm": 5.415216445922852, "learning_rate": 1.855983198784593e-06, "loss": 0.0315, "step": 643525 }, { "epoch": 6.33, "grad_norm": 2.532423973083496, "learning_rate": 1.8558590763303446e-06, "loss": 0.0758, "step": 643550 }, { "epoch": 6.33, "grad_norm": 20.165973663330078, "learning_rate": 1.855734953876096e-06, "loss": 0.0221, "step": 643575 }, { "epoch": 6.33, "grad_norm": 2.6692416667938232, "learning_rate": 1.8556108314218477e-06, "loss": 0.0748, "step": 643600 }, { "epoch": 6.33, "grad_norm": 5.855251312255859, "learning_rate": 1.8554867089675993e-06, "loss": 0.0239, "step": 643625 }, { "epoch": 6.33, "grad_norm": 1.276189923286438, "learning_rate": 1.8553625865133507e-06, "loss": 0.0834, "step": 643650 }, { "epoch": 6.33, "grad_norm": 13.157962799072266, "learning_rate": 1.8552384640591024e-06, "loss": 0.0254, "step": 643675 }, { "epoch": 6.33, "grad_norm": 3.47529673576355, "learning_rate": 1.855114341604854e-06, "loss": 0.0726, "step": 643700 }, { "epoch": 6.33, "grad_norm": 11.64477252960205, "learning_rate": 1.8549902191506052e-06, "loss": 0.0216, "step": 643725 }, { "epoch": 6.33, "grad_norm": 0.5266122817993164, "learning_rate": 1.8548660966963569e-06, "loss": 0.0689, "step": 643750 }, { "epoch": 6.33, "grad_norm": 0.45197904109954834, "learning_rate": 1.8547419742421085e-06, "loss": 0.0281, "step": 643775 }, { "epoch": 6.33, "grad_norm": 7.835929870605469, "learning_rate": 1.85461785178786e-06, "loss": 0.0505, "step": 643800 }, { "epoch": 6.33, "grad_norm": 6.100595951080322, "learning_rate": 1.8544937293336116e-06, "loss": 0.034, "step": 643825 }, { "epoch": 6.33, "grad_norm": 3.65757417678833, "learning_rate": 1.8543696068793632e-06, "loss": 0.101, "step": 643850 }, { "epoch": 6.33, "grad_norm": 3.4156813621520996, "learning_rate": 1.8542454844251146e-06, "loss": 0.0156, "step": 643875 }, { "epoch": 6.33, "grad_norm": 1.2728010416030884, "learning_rate": 1.8541213619708662e-06, "loss": 0.0656, "step": 643900 }, { "epoch": 6.33, "grad_norm": 6.6715898513793945, "learning_rate": 1.8539972395166175e-06, "loss": 0.022, "step": 643925 }, { "epoch": 6.33, "grad_norm": 6.815311908721924, "learning_rate": 1.853873117062369e-06, "loss": 0.0736, "step": 643950 }, { "epoch": 6.33, "grad_norm": 8.192999839782715, "learning_rate": 1.8537489946081207e-06, "loss": 0.0217, "step": 643975 }, { "epoch": 6.33, "grad_norm": 6.626160621643066, "learning_rate": 1.8536248721538722e-06, "loss": 0.0815, "step": 644000 }, { "epoch": 6.33, "grad_norm": 8.086812973022461, "learning_rate": 1.8535007496996238e-06, "loss": 0.0289, "step": 644025 }, { "epoch": 6.33, "grad_norm": 2.8886611461639404, "learning_rate": 1.8533766272453754e-06, "loss": 0.1026, "step": 644050 }, { "epoch": 6.33, "grad_norm": 3.842815399169922, "learning_rate": 1.8532525047911268e-06, "loss": 0.0208, "step": 644075 }, { "epoch": 6.33, "grad_norm": 1.5157214403152466, "learning_rate": 1.8531283823368785e-06, "loss": 0.0699, "step": 644100 }, { "epoch": 6.33, "grad_norm": 5.953470230102539, "learning_rate": 1.8530042598826301e-06, "loss": 0.0172, "step": 644125 }, { "epoch": 6.33, "grad_norm": 8.125550270080566, "learning_rate": 1.8528801374283813e-06, "loss": 0.0935, "step": 644150 }, { "epoch": 6.33, "grad_norm": 8.554252624511719, "learning_rate": 1.852756014974133e-06, "loss": 0.0139, "step": 644175 }, { "epoch": 6.33, "grad_norm": 7.854331016540527, "learning_rate": 1.8526318925198846e-06, "loss": 0.0617, "step": 644200 }, { "epoch": 6.33, "grad_norm": 1.5787392854690552, "learning_rate": 1.852507770065636e-06, "loss": 0.0175, "step": 644225 }, { "epoch": 6.33, "grad_norm": 4.5560994148254395, "learning_rate": 1.8523836476113877e-06, "loss": 0.0683, "step": 644250 }, { "epoch": 6.33, "grad_norm": 15.499494552612305, "learning_rate": 1.8522595251571393e-06, "loss": 0.0312, "step": 644275 }, { "epoch": 6.33, "grad_norm": 3.5556581020355225, "learning_rate": 1.8521354027028907e-06, "loss": 0.0607, "step": 644300 }, { "epoch": 6.34, "grad_norm": 15.791618347167969, "learning_rate": 1.8520112802486423e-06, "loss": 0.0227, "step": 644325 }, { "epoch": 6.34, "grad_norm": 1.7452998161315918, "learning_rate": 1.8518871577943936e-06, "loss": 0.0784, "step": 644350 }, { "epoch": 6.34, "grad_norm": 5.936259746551514, "learning_rate": 1.8517630353401452e-06, "loss": 0.0435, "step": 644375 }, { "epoch": 6.34, "grad_norm": 1.4021514654159546, "learning_rate": 1.8516389128858968e-06, "loss": 0.0969, "step": 644400 }, { "epoch": 6.34, "grad_norm": 0.5929885506629944, "learning_rate": 1.8515147904316483e-06, "loss": 0.0228, "step": 644425 }, { "epoch": 6.34, "grad_norm": 3.400160074234009, "learning_rate": 1.8513906679773999e-06, "loss": 0.0749, "step": 644450 }, { "epoch": 6.34, "grad_norm": 17.67798614501953, "learning_rate": 1.8512665455231515e-06, "loss": 0.026, "step": 644475 }, { "epoch": 6.34, "grad_norm": 0.15439943969249725, "learning_rate": 1.851142423068903e-06, "loss": 0.0695, "step": 644500 }, { "epoch": 6.34, "grad_norm": 4.205198764801025, "learning_rate": 1.8510183006146546e-06, "loss": 0.032, "step": 644525 }, { "epoch": 6.34, "grad_norm": 6.339095115661621, "learning_rate": 1.8508941781604062e-06, "loss": 0.0573, "step": 644550 }, { "epoch": 6.34, "grad_norm": 16.155372619628906, "learning_rate": 1.8507700557061574e-06, "loss": 0.0265, "step": 644575 }, { "epoch": 6.34, "grad_norm": 5.341032028198242, "learning_rate": 1.850645933251909e-06, "loss": 0.0697, "step": 644600 }, { "epoch": 6.34, "grad_norm": 6.926705837249756, "learning_rate": 1.8505218107976607e-06, "loss": 0.0372, "step": 644625 }, { "epoch": 6.34, "grad_norm": 6.130375385284424, "learning_rate": 1.8503976883434121e-06, "loss": 0.0738, "step": 644650 }, { "epoch": 6.34, "grad_norm": 0.11199415475130081, "learning_rate": 1.8502735658891638e-06, "loss": 0.0152, "step": 644675 }, { "epoch": 6.34, "grad_norm": 1.122135043144226, "learning_rate": 1.8501494434349154e-06, "loss": 0.0852, "step": 644700 }, { "epoch": 6.34, "grad_norm": 2.7061285972595215, "learning_rate": 1.8500253209806668e-06, "loss": 0.0187, "step": 644725 }, { "epoch": 6.34, "grad_norm": 13.011180877685547, "learning_rate": 1.8499011985264184e-06, "loss": 0.077, "step": 644750 }, { "epoch": 6.34, "grad_norm": 3.056133985519409, "learning_rate": 1.8497770760721699e-06, "loss": 0.0288, "step": 644775 }, { "epoch": 6.34, "grad_norm": 1.374299168586731, "learning_rate": 1.8496529536179215e-06, "loss": 0.0618, "step": 644800 }, { "epoch": 6.34, "grad_norm": 4.352984428405762, "learning_rate": 1.8495288311636731e-06, "loss": 0.0181, "step": 644825 }, { "epoch": 6.34, "grad_norm": 0.6611695885658264, "learning_rate": 1.8494047087094244e-06, "loss": 0.0892, "step": 644850 }, { "epoch": 6.34, "grad_norm": 10.412995338439941, "learning_rate": 1.849280586255176e-06, "loss": 0.0313, "step": 644875 }, { "epoch": 6.34, "grad_norm": 25.359373092651367, "learning_rate": 1.8491564638009276e-06, "loss": 0.0852, "step": 644900 }, { "epoch": 6.34, "grad_norm": 14.289660453796387, "learning_rate": 1.849032341346679e-06, "loss": 0.0166, "step": 644925 }, { "epoch": 6.34, "grad_norm": 3.7461366653442383, "learning_rate": 1.8489082188924307e-06, "loss": 0.0688, "step": 644950 }, { "epoch": 6.34, "grad_norm": 11.016953468322754, "learning_rate": 1.8487840964381823e-06, "loss": 0.0201, "step": 644975 }, { "epoch": 6.34, "grad_norm": 0.7472472190856934, "learning_rate": 1.8486599739839337e-06, "loss": 0.0511, "step": 645000 }, { "epoch": 6.34, "grad_norm": 19.981685638427734, "learning_rate": 1.8485358515296854e-06, "loss": 0.0218, "step": 645025 }, { "epoch": 6.34, "grad_norm": 2.7992796897888184, "learning_rate": 1.848411729075437e-06, "loss": 0.0416, "step": 645050 }, { "epoch": 6.34, "grad_norm": 8.50049114227295, "learning_rate": 1.8482876066211882e-06, "loss": 0.0303, "step": 645075 }, { "epoch": 6.34, "grad_norm": 4.55560302734375, "learning_rate": 1.8481634841669399e-06, "loss": 0.073, "step": 645100 }, { "epoch": 6.34, "grad_norm": 10.571986198425293, "learning_rate": 1.8480393617126915e-06, "loss": 0.0149, "step": 645125 }, { "epoch": 6.34, "grad_norm": 1.7293468713760376, "learning_rate": 1.847915239258443e-06, "loss": 0.0802, "step": 645150 }, { "epoch": 6.34, "grad_norm": 3.298009157180786, "learning_rate": 1.8477911168041945e-06, "loss": 0.0144, "step": 645175 }, { "epoch": 6.34, "grad_norm": 4.3665452003479, "learning_rate": 1.847666994349946e-06, "loss": 0.0876, "step": 645200 }, { "epoch": 6.34, "grad_norm": 5.114945411682129, "learning_rate": 1.8475428718956976e-06, "loss": 0.0259, "step": 645225 }, { "epoch": 6.34, "grad_norm": 4.594372749328613, "learning_rate": 1.8474187494414492e-06, "loss": 0.0885, "step": 645250 }, { "epoch": 6.34, "grad_norm": 1.0053086280822754, "learning_rate": 1.8472946269872005e-06, "loss": 0.0288, "step": 645275 }, { "epoch": 6.34, "grad_norm": 3.224687099456787, "learning_rate": 1.8471754694311223e-06, "loss": 0.0828, "step": 645300 }, { "epoch": 6.34, "grad_norm": 6.149294376373291, "learning_rate": 1.8470513469768735e-06, "loss": 0.0222, "step": 645325 }, { "epoch": 6.35, "grad_norm": 2.2363998889923096, "learning_rate": 1.8469272245226252e-06, "loss": 0.0803, "step": 645350 }, { "epoch": 6.35, "grad_norm": 0.9412841796875, "learning_rate": 1.8468031020683766e-06, "loss": 0.0379, "step": 645375 }, { "epoch": 6.35, "grad_norm": 0.5769743323326111, "learning_rate": 1.8466789796141282e-06, "loss": 0.0876, "step": 645400 }, { "epoch": 6.35, "grad_norm": 31.308361053466797, "learning_rate": 1.8465548571598799e-06, "loss": 0.0197, "step": 645425 }, { "epoch": 6.35, "grad_norm": 1.4954742193222046, "learning_rate": 1.8464307347056313e-06, "loss": 0.0617, "step": 645450 }, { "epoch": 6.35, "grad_norm": 0.3674706816673279, "learning_rate": 1.846306612251383e-06, "loss": 0.0389, "step": 645475 }, { "epoch": 6.35, "grad_norm": 2.713268995285034, "learning_rate": 1.8461824897971346e-06, "loss": 0.0815, "step": 645500 }, { "epoch": 6.35, "grad_norm": 5.787431716918945, "learning_rate": 1.8460583673428858e-06, "loss": 0.0287, "step": 645525 }, { "epoch": 6.35, "grad_norm": 1.9243234395980835, "learning_rate": 1.8459342448886374e-06, "loss": 0.0939, "step": 645550 }, { "epoch": 6.35, "grad_norm": 4.239419937133789, "learning_rate": 1.845810122434389e-06, "loss": 0.0132, "step": 645575 }, { "epoch": 6.35, "grad_norm": 4.290441513061523, "learning_rate": 1.8456859999801405e-06, "loss": 0.0944, "step": 645600 }, { "epoch": 6.35, "grad_norm": 10.965096473693848, "learning_rate": 1.845561877525892e-06, "loss": 0.0289, "step": 645625 }, { "epoch": 6.35, "grad_norm": 1.5564674139022827, "learning_rate": 1.8454377550716437e-06, "loss": 0.1126, "step": 645650 }, { "epoch": 6.35, "grad_norm": 12.36355972290039, "learning_rate": 1.8453136326173952e-06, "loss": 0.0247, "step": 645675 }, { "epoch": 6.35, "grad_norm": 1.774580955505371, "learning_rate": 1.8451895101631468e-06, "loss": 0.0514, "step": 645700 }, { "epoch": 6.35, "grad_norm": 5.4746880531311035, "learning_rate": 1.8450653877088984e-06, "loss": 0.0477, "step": 645725 }, { "epoch": 6.35, "grad_norm": 5.316536903381348, "learning_rate": 1.8449412652546496e-06, "loss": 0.0634, "step": 645750 }, { "epoch": 6.35, "grad_norm": 6.397039413452148, "learning_rate": 1.8448171428004013e-06, "loss": 0.04, "step": 645775 }, { "epoch": 6.35, "grad_norm": 3.1059343814849854, "learning_rate": 1.8446930203461527e-06, "loss": 0.0927, "step": 645800 }, { "epoch": 6.35, "grad_norm": 3.1052157878875732, "learning_rate": 1.8445688978919043e-06, "loss": 0.036, "step": 645825 }, { "epoch": 6.35, "grad_norm": 4.141475200653076, "learning_rate": 1.844444775437656e-06, "loss": 0.0943, "step": 645850 }, { "epoch": 6.35, "grad_norm": 5.189511775970459, "learning_rate": 1.8443206529834074e-06, "loss": 0.0239, "step": 645875 }, { "epoch": 6.35, "grad_norm": 6.41680383682251, "learning_rate": 1.844196530529159e-06, "loss": 0.0888, "step": 645900 }, { "epoch": 6.35, "grad_norm": 7.345283508300781, "learning_rate": 1.8440724080749107e-06, "loss": 0.028, "step": 645925 }, { "epoch": 6.35, "grad_norm": 1.3069343566894531, "learning_rate": 1.8439482856206619e-06, "loss": 0.0874, "step": 645950 }, { "epoch": 6.35, "grad_norm": 0.5656858086585999, "learning_rate": 1.8438241631664135e-06, "loss": 0.0191, "step": 645975 }, { "epoch": 6.35, "grad_norm": 4.750767230987549, "learning_rate": 1.8437000407121651e-06, "loss": 0.0757, "step": 646000 }, { "epoch": 6.35, "grad_norm": 1.1516729593276978, "learning_rate": 1.8435759182579166e-06, "loss": 0.0128, "step": 646025 }, { "epoch": 6.35, "grad_norm": 0.065882109105587, "learning_rate": 1.8434517958036682e-06, "loss": 0.086, "step": 646050 }, { "epoch": 6.35, "grad_norm": 2.4715487957000732, "learning_rate": 1.8433276733494198e-06, "loss": 0.0181, "step": 646075 }, { "epoch": 6.35, "grad_norm": 7.321497440338135, "learning_rate": 1.8432035508951713e-06, "loss": 0.0799, "step": 646100 }, { "epoch": 6.35, "grad_norm": 8.355144500732422, "learning_rate": 1.843079428440923e-06, "loss": 0.0203, "step": 646125 }, { "epoch": 6.35, "grad_norm": 2.4723904132843018, "learning_rate": 1.8429553059866745e-06, "loss": 0.0903, "step": 646150 }, { "epoch": 6.35, "grad_norm": 8.983077049255371, "learning_rate": 1.842831183532426e-06, "loss": 0.0328, "step": 646175 }, { "epoch": 6.35, "grad_norm": 2.3691461086273193, "learning_rate": 1.8427070610781776e-06, "loss": 0.0794, "step": 646200 }, { "epoch": 6.35, "grad_norm": 10.005577087402344, "learning_rate": 1.8425829386239288e-06, "loss": 0.0268, "step": 646225 }, { "epoch": 6.35, "grad_norm": 0.31379827857017517, "learning_rate": 1.8424588161696804e-06, "loss": 0.0735, "step": 646250 }, { "epoch": 6.35, "grad_norm": 3.3559510707855225, "learning_rate": 1.842334693715432e-06, "loss": 0.0218, "step": 646275 }, { "epoch": 6.35, "grad_norm": 3.2553298473358154, "learning_rate": 1.8422105712611835e-06, "loss": 0.0632, "step": 646300 }, { "epoch": 6.35, "grad_norm": 12.105916023254395, "learning_rate": 1.8420864488069351e-06, "loss": 0.0321, "step": 646325 }, { "epoch": 6.36, "grad_norm": 3.4116263389587402, "learning_rate": 1.8419623263526868e-06, "loss": 0.0735, "step": 646350 }, { "epoch": 6.36, "grad_norm": 1.8910130262374878, "learning_rate": 1.8418382038984382e-06, "loss": 0.0261, "step": 646375 }, { "epoch": 6.36, "grad_norm": 6.643797874450684, "learning_rate": 1.8417140814441898e-06, "loss": 0.0819, "step": 646400 }, { "epoch": 6.36, "grad_norm": 3.6201791763305664, "learning_rate": 1.8415899589899415e-06, "loss": 0.0173, "step": 646425 }, { "epoch": 6.36, "grad_norm": 1.0835494995117188, "learning_rate": 1.8414658365356927e-06, "loss": 0.0611, "step": 646450 }, { "epoch": 6.36, "grad_norm": 2.910649061203003, "learning_rate": 1.8413417140814443e-06, "loss": 0.0345, "step": 646475 }, { "epoch": 6.36, "grad_norm": 0.5334072709083557, "learning_rate": 1.841217591627196e-06, "loss": 0.0527, "step": 646500 }, { "epoch": 6.36, "grad_norm": 11.374371528625488, "learning_rate": 1.8410934691729474e-06, "loss": 0.0186, "step": 646525 }, { "epoch": 6.36, "grad_norm": 0.44558724761009216, "learning_rate": 1.840969346718699e-06, "loss": 0.061, "step": 646550 }, { "epoch": 6.36, "grad_norm": 5.968545913696289, "learning_rate": 1.8408452242644506e-06, "loss": 0.017, "step": 646575 }, { "epoch": 6.36, "grad_norm": 7.838826656341553, "learning_rate": 1.840721101810202e-06, "loss": 0.0773, "step": 646600 }, { "epoch": 6.36, "grad_norm": 8.610187530517578, "learning_rate": 1.8405969793559537e-06, "loss": 0.0281, "step": 646625 }, { "epoch": 6.36, "grad_norm": 1.4080487489700317, "learning_rate": 1.840472856901705e-06, "loss": 0.1026, "step": 646650 }, { "epoch": 6.36, "grad_norm": 15.992948532104492, "learning_rate": 1.8403487344474565e-06, "loss": 0.0219, "step": 646675 }, { "epoch": 6.36, "grad_norm": 0.6394553184509277, "learning_rate": 1.8402246119932082e-06, "loss": 0.0935, "step": 646700 }, { "epoch": 6.36, "grad_norm": 3.0256807804107666, "learning_rate": 1.8401004895389596e-06, "loss": 0.0234, "step": 646725 }, { "epoch": 6.36, "grad_norm": 2.17403507232666, "learning_rate": 1.8399763670847112e-06, "loss": 0.0743, "step": 646750 }, { "epoch": 6.36, "grad_norm": 4.518040180206299, "learning_rate": 1.8398522446304629e-06, "loss": 0.0266, "step": 646775 }, { "epoch": 6.36, "grad_norm": 2.800297498703003, "learning_rate": 1.8397281221762143e-06, "loss": 0.0628, "step": 646800 }, { "epoch": 6.36, "grad_norm": 5.5322041511535645, "learning_rate": 1.839603999721966e-06, "loss": 0.014, "step": 646825 }, { "epoch": 6.36, "grad_norm": 1.2029240131378174, "learning_rate": 1.8394798772677176e-06, "loss": 0.0892, "step": 646850 }, { "epoch": 6.36, "grad_norm": 4.037421226501465, "learning_rate": 1.8393557548134688e-06, "loss": 0.0242, "step": 646875 }, { "epoch": 6.36, "grad_norm": 1.4401673078536987, "learning_rate": 1.8392316323592204e-06, "loss": 0.0806, "step": 646900 }, { "epoch": 6.36, "grad_norm": 11.524933815002441, "learning_rate": 1.839107509904972e-06, "loss": 0.0234, "step": 646925 }, { "epoch": 6.36, "grad_norm": 0.22181816399097443, "learning_rate": 1.8389833874507235e-06, "loss": 0.0998, "step": 646950 }, { "epoch": 6.36, "grad_norm": 13.18784236907959, "learning_rate": 1.838859264996475e-06, "loss": 0.0221, "step": 646975 }, { "epoch": 6.36, "grad_norm": 1.8769642114639282, "learning_rate": 1.8387351425422267e-06, "loss": 0.0749, "step": 647000 }, { "epoch": 6.36, "grad_norm": 5.394675254821777, "learning_rate": 1.8386110200879782e-06, "loss": 0.0374, "step": 647025 }, { "epoch": 6.36, "grad_norm": 1.8997316360473633, "learning_rate": 1.8384868976337298e-06, "loss": 0.0868, "step": 647050 }, { "epoch": 6.36, "grad_norm": 2.2041079998016357, "learning_rate": 1.838362775179481e-06, "loss": 0.0288, "step": 647075 }, { "epoch": 6.36, "grad_norm": 6.13043737411499, "learning_rate": 1.8382386527252326e-06, "loss": 0.0906, "step": 647100 }, { "epoch": 6.36, "grad_norm": 13.570626258850098, "learning_rate": 1.8381145302709843e-06, "loss": 0.0107, "step": 647125 }, { "epoch": 6.36, "grad_norm": 0.9597604870796204, "learning_rate": 1.8379904078167357e-06, "loss": 0.0816, "step": 647150 }, { "epoch": 6.36, "grad_norm": 14.687467575073242, "learning_rate": 1.8378662853624873e-06, "loss": 0.0217, "step": 647175 }, { "epoch": 6.36, "grad_norm": 0.31239771842956543, "learning_rate": 1.837742162908239e-06, "loss": 0.0806, "step": 647200 }, { "epoch": 6.36, "grad_norm": 7.030172348022461, "learning_rate": 1.8376180404539904e-06, "loss": 0.028, "step": 647225 }, { "epoch": 6.36, "grad_norm": 2.363767385482788, "learning_rate": 1.837493917999742e-06, "loss": 0.0613, "step": 647250 }, { "epoch": 6.36, "grad_norm": 4.331530570983887, "learning_rate": 1.8373697955454937e-06, "loss": 0.0292, "step": 647275 }, { "epoch": 6.36, "grad_norm": 4.752660274505615, "learning_rate": 1.8372456730912449e-06, "loss": 0.0781, "step": 647300 }, { "epoch": 6.36, "grad_norm": 4.82315731048584, "learning_rate": 1.8371215506369965e-06, "loss": 0.011, "step": 647325 }, { "epoch": 6.36, "grad_norm": 1.135350227355957, "learning_rate": 1.8369974281827481e-06, "loss": 0.0685, "step": 647350 }, { "epoch": 6.37, "grad_norm": 12.178598403930664, "learning_rate": 1.8368733057284996e-06, "loss": 0.0202, "step": 647375 }, { "epoch": 6.37, "grad_norm": 3.8788106441497803, "learning_rate": 1.8367491832742512e-06, "loss": 0.0926, "step": 647400 }, { "epoch": 6.37, "grad_norm": 5.552451133728027, "learning_rate": 1.8366250608200028e-06, "loss": 0.0212, "step": 647425 }, { "epoch": 6.37, "grad_norm": 0.9158000946044922, "learning_rate": 1.8365009383657543e-06, "loss": 0.0764, "step": 647450 }, { "epoch": 6.37, "grad_norm": 2.87290096282959, "learning_rate": 1.8363768159115059e-06, "loss": 0.0199, "step": 647475 }, { "epoch": 6.37, "grad_norm": 6.479706287384033, "learning_rate": 1.8362526934572573e-06, "loss": 0.1143, "step": 647500 }, { "epoch": 6.37, "grad_norm": 9.883211135864258, "learning_rate": 1.8361285710030087e-06, "loss": 0.0225, "step": 647525 }, { "epoch": 6.37, "grad_norm": 0.2325695902109146, "learning_rate": 1.8360044485487604e-06, "loss": 0.0835, "step": 647550 }, { "epoch": 6.37, "grad_norm": 5.705997943878174, "learning_rate": 1.8358803260945118e-06, "loss": 0.0221, "step": 647575 }, { "epoch": 6.37, "grad_norm": 0.6144707798957825, "learning_rate": 1.8357562036402634e-06, "loss": 0.0801, "step": 647600 }, { "epoch": 6.37, "grad_norm": 3.130316734313965, "learning_rate": 1.835632081186015e-06, "loss": 0.0277, "step": 647625 }, { "epoch": 6.37, "grad_norm": 0.13265007734298706, "learning_rate": 1.8355079587317665e-06, "loss": 0.0652, "step": 647650 }, { "epoch": 6.37, "grad_norm": 6.082077503204346, "learning_rate": 1.8353838362775181e-06, "loss": 0.0147, "step": 647675 }, { "epoch": 6.37, "grad_norm": 2.0612661838531494, "learning_rate": 1.8352597138232698e-06, "loss": 0.0749, "step": 647700 }, { "epoch": 6.37, "grad_norm": 6.695709228515625, "learning_rate": 1.8351355913690212e-06, "loss": 0.0253, "step": 647725 }, { "epoch": 6.37, "grad_norm": 6.085019588470459, "learning_rate": 1.8350114689147728e-06, "loss": 0.0596, "step": 647750 }, { "epoch": 6.37, "grad_norm": 10.609753608703613, "learning_rate": 1.8348873464605245e-06, "loss": 0.0333, "step": 647775 }, { "epoch": 6.37, "grad_norm": 1.2811200618743896, "learning_rate": 1.8347632240062757e-06, "loss": 0.0765, "step": 647800 }, { "epoch": 6.37, "grad_norm": 2.7404794692993164, "learning_rate": 1.8346391015520273e-06, "loss": 0.0147, "step": 647825 }, { "epoch": 6.37, "grad_norm": 2.9572384357452393, "learning_rate": 1.834514979097779e-06, "loss": 0.0846, "step": 647850 }, { "epoch": 6.37, "grad_norm": 5.999369144439697, "learning_rate": 1.8343908566435304e-06, "loss": 0.0299, "step": 647875 }, { "epoch": 6.37, "grad_norm": 3.3890397548675537, "learning_rate": 1.834266734189282e-06, "loss": 0.0835, "step": 647900 }, { "epoch": 6.37, "grad_norm": 24.62625503540039, "learning_rate": 1.8341426117350334e-06, "loss": 0.0263, "step": 647925 }, { "epoch": 6.37, "grad_norm": 1.298577904701233, "learning_rate": 1.834018489280785e-06, "loss": 0.0564, "step": 647950 }, { "epoch": 6.37, "grad_norm": 0.6326989531517029, "learning_rate": 1.8338943668265367e-06, "loss": 0.0148, "step": 647975 }, { "epoch": 6.37, "grad_norm": 0.18560324609279633, "learning_rate": 1.833770244372288e-06, "loss": 0.0761, "step": 648000 }, { "epoch": 6.37, "grad_norm": 3.498549461364746, "learning_rate": 1.8336461219180395e-06, "loss": 0.0325, "step": 648025 }, { "epoch": 6.37, "grad_norm": 2.589305877685547, "learning_rate": 1.8335219994637912e-06, "loss": 0.0648, "step": 648050 }, { "epoch": 6.37, "grad_norm": 3.2807888984680176, "learning_rate": 1.8333978770095426e-06, "loss": 0.0217, "step": 648075 }, { "epoch": 6.37, "grad_norm": 0.6172753572463989, "learning_rate": 1.8332737545552942e-06, "loss": 0.0638, "step": 648100 }, { "epoch": 6.37, "grad_norm": 5.12270975112915, "learning_rate": 1.8331496321010459e-06, "loss": 0.0341, "step": 648125 }, { "epoch": 6.37, "grad_norm": 0.44341006875038147, "learning_rate": 1.8330255096467973e-06, "loss": 0.0827, "step": 648150 }, { "epoch": 6.37, "grad_norm": 1.3420393466949463, "learning_rate": 1.832901387192549e-06, "loss": 0.0155, "step": 648175 }, { "epoch": 6.37, "grad_norm": 0.24647186696529388, "learning_rate": 1.8327822296364704e-06, "loss": 0.0649, "step": 648200 }, { "epoch": 6.37, "grad_norm": 3.7282893657684326, "learning_rate": 1.832658107182222e-06, "loss": 0.0275, "step": 648225 }, { "epoch": 6.37, "grad_norm": 0.4803694784641266, "learning_rate": 1.8325339847279732e-06, "loss": 0.0812, "step": 648250 }, { "epoch": 6.37, "grad_norm": 6.274189472198486, "learning_rate": 1.8324098622737248e-06, "loss": 0.0113, "step": 648275 }, { "epoch": 6.37, "grad_norm": 2.2632126808166504, "learning_rate": 1.8322857398194765e-06, "loss": 0.0606, "step": 648300 }, { "epoch": 6.37, "grad_norm": 6.831860542297363, "learning_rate": 1.832161617365228e-06, "loss": 0.0195, "step": 648325 }, { "epoch": 6.37, "grad_norm": 5.63995885848999, "learning_rate": 1.8320374949109795e-06, "loss": 0.0765, "step": 648350 }, { "epoch": 6.37, "grad_norm": 0.24132993817329407, "learning_rate": 1.8319133724567312e-06, "loss": 0.0132, "step": 648375 }, { "epoch": 6.38, "grad_norm": 6.521333694458008, "learning_rate": 1.8317892500024826e-06, "loss": 0.0586, "step": 648400 }, { "epoch": 6.38, "grad_norm": 3.0149190425872803, "learning_rate": 1.8316651275482342e-06, "loss": 0.0103, "step": 648425 }, { "epoch": 6.38, "grad_norm": 1.8938817977905273, "learning_rate": 1.8315410050939859e-06, "loss": 0.0646, "step": 648450 }, { "epoch": 6.38, "grad_norm": 18.448476791381836, "learning_rate": 1.831416882639737e-06, "loss": 0.027, "step": 648475 }, { "epoch": 6.38, "grad_norm": 0.6270712614059448, "learning_rate": 1.8312927601854887e-06, "loss": 0.0696, "step": 648500 }, { "epoch": 6.38, "grad_norm": 8.525750160217285, "learning_rate": 1.8311686377312401e-06, "loss": 0.0243, "step": 648525 }, { "epoch": 6.38, "grad_norm": 2.828636407852173, "learning_rate": 1.8310445152769918e-06, "loss": 0.084, "step": 648550 }, { "epoch": 6.38, "grad_norm": 3.6959099769592285, "learning_rate": 1.8309203928227434e-06, "loss": 0.0097, "step": 648575 }, { "epoch": 6.38, "grad_norm": 0.47114720940589905, "learning_rate": 1.8307962703684948e-06, "loss": 0.0599, "step": 648600 }, { "epoch": 6.38, "grad_norm": 0.8133599162101746, "learning_rate": 1.8306721479142465e-06, "loss": 0.0328, "step": 648625 }, { "epoch": 6.38, "grad_norm": 1.2679920196533203, "learning_rate": 1.830548025459998e-06, "loss": 0.0743, "step": 648650 }, { "epoch": 6.38, "grad_norm": 8.175966262817383, "learning_rate": 1.8304239030057493e-06, "loss": 0.0306, "step": 648675 }, { "epoch": 6.38, "grad_norm": 3.560406446456909, "learning_rate": 1.830299780551501e-06, "loss": 0.0629, "step": 648700 }, { "epoch": 6.38, "grad_norm": 9.707033157348633, "learning_rate": 1.8301756580972526e-06, "loss": 0.0269, "step": 648725 }, { "epoch": 6.38, "grad_norm": 4.035088062286377, "learning_rate": 1.830051535643004e-06, "loss": 0.0763, "step": 648750 }, { "epoch": 6.38, "grad_norm": 4.616561412811279, "learning_rate": 1.8299274131887556e-06, "loss": 0.0263, "step": 648775 }, { "epoch": 6.38, "grad_norm": 2.358266592025757, "learning_rate": 1.8298032907345073e-06, "loss": 0.0788, "step": 648800 }, { "epoch": 6.38, "grad_norm": 5.793243885040283, "learning_rate": 1.8296791682802587e-06, "loss": 0.0404, "step": 648825 }, { "epoch": 6.38, "grad_norm": 0.6998794078826904, "learning_rate": 1.8295550458260103e-06, "loss": 0.091, "step": 648850 }, { "epoch": 6.38, "grad_norm": 2.7002453804016113, "learning_rate": 1.829430923371762e-06, "loss": 0.0348, "step": 648875 }, { "epoch": 6.38, "grad_norm": 0.28527188301086426, "learning_rate": 1.8293068009175132e-06, "loss": 0.0777, "step": 648900 }, { "epoch": 6.38, "grad_norm": 9.25855827331543, "learning_rate": 1.8291826784632648e-06, "loss": 0.0177, "step": 648925 }, { "epoch": 6.38, "grad_norm": 1.9408469200134277, "learning_rate": 1.8290585560090162e-06, "loss": 0.0674, "step": 648950 }, { "epoch": 6.38, "grad_norm": 1.9831137657165527, "learning_rate": 1.8289344335547679e-06, "loss": 0.0177, "step": 648975 }, { "epoch": 6.38, "grad_norm": 0.8299275636672974, "learning_rate": 1.8288103111005195e-06, "loss": 0.0881, "step": 649000 }, { "epoch": 6.38, "grad_norm": 9.14040756225586, "learning_rate": 1.828686188646271e-06, "loss": 0.0279, "step": 649025 }, { "epoch": 6.38, "grad_norm": 2.565566301345825, "learning_rate": 1.8285620661920226e-06, "loss": 0.1014, "step": 649050 }, { "epoch": 6.38, "grad_norm": 1.7312545776367188, "learning_rate": 1.8284379437377742e-06, "loss": 0.021, "step": 649075 }, { "epoch": 6.38, "grad_norm": 1.3960082530975342, "learning_rate": 1.8283138212835256e-06, "loss": 0.0561, "step": 649100 }, { "epoch": 6.38, "grad_norm": 13.372907638549805, "learning_rate": 1.8281896988292773e-06, "loss": 0.0258, "step": 649125 }, { "epoch": 6.38, "grad_norm": 2.4074223041534424, "learning_rate": 1.828065576375029e-06, "loss": 0.0882, "step": 649150 }, { "epoch": 6.38, "grad_norm": 12.662275314331055, "learning_rate": 1.8279414539207801e-06, "loss": 0.0307, "step": 649175 }, { "epoch": 6.38, "grad_norm": 2.619413137435913, "learning_rate": 1.8278173314665317e-06, "loss": 0.0837, "step": 649200 }, { "epoch": 6.38, "grad_norm": 10.935846328735352, "learning_rate": 1.8276932090122834e-06, "loss": 0.0379, "step": 649225 }, { "epoch": 6.38, "grad_norm": 0.04805396497249603, "learning_rate": 1.8275690865580348e-06, "loss": 0.0966, "step": 649250 }, { "epoch": 6.38, "grad_norm": 7.723107814788818, "learning_rate": 1.8274449641037864e-06, "loss": 0.0239, "step": 649275 }, { "epoch": 6.38, "grad_norm": 1.5712921619415283, "learning_rate": 1.827320841649538e-06, "loss": 0.0496, "step": 649300 }, { "epoch": 6.38, "grad_norm": 13.961081504821777, "learning_rate": 1.8271967191952895e-06, "loss": 0.0284, "step": 649325 }, { "epoch": 6.38, "grad_norm": 1.6347893476486206, "learning_rate": 1.8270725967410411e-06, "loss": 0.0964, "step": 649350 }, { "epoch": 6.38, "grad_norm": 10.300520896911621, "learning_rate": 1.8269484742867923e-06, "loss": 0.0287, "step": 649375 }, { "epoch": 6.39, "grad_norm": 1.7471331357955933, "learning_rate": 1.826824351832544e-06, "loss": 0.0674, "step": 649400 }, { "epoch": 6.39, "grad_norm": 5.523606300354004, "learning_rate": 1.8267002293782956e-06, "loss": 0.0281, "step": 649425 }, { "epoch": 6.39, "grad_norm": 4.767825126647949, "learning_rate": 1.826576106924047e-06, "loss": 0.0703, "step": 649450 }, { "epoch": 6.39, "grad_norm": 7.123287677764893, "learning_rate": 1.8264519844697987e-06, "loss": 0.0267, "step": 649475 }, { "epoch": 6.39, "grad_norm": 6.319888114929199, "learning_rate": 1.8263278620155503e-06, "loss": 0.0529, "step": 649500 }, { "epoch": 6.39, "grad_norm": 9.735477447509766, "learning_rate": 1.8262037395613017e-06, "loss": 0.0296, "step": 649525 }, { "epoch": 6.39, "grad_norm": 1.8996870517730713, "learning_rate": 1.8260796171070534e-06, "loss": 0.0989, "step": 649550 }, { "epoch": 6.39, "grad_norm": 5.078734874725342, "learning_rate": 1.825955494652805e-06, "loss": 0.0204, "step": 649575 }, { "epoch": 6.39, "grad_norm": 1.2073345184326172, "learning_rate": 1.8258313721985562e-06, "loss": 0.0789, "step": 649600 }, { "epoch": 6.39, "grad_norm": 4.131916522979736, "learning_rate": 1.8257072497443078e-06, "loss": 0.0283, "step": 649625 }, { "epoch": 6.39, "grad_norm": 3.683804512023926, "learning_rate": 1.8255831272900595e-06, "loss": 0.0711, "step": 649650 }, { "epoch": 6.39, "grad_norm": 20.519962310791016, "learning_rate": 1.825459004835811e-06, "loss": 0.034, "step": 649675 }, { "epoch": 6.39, "grad_norm": 2.9254913330078125, "learning_rate": 1.8253348823815625e-06, "loss": 0.0902, "step": 649700 }, { "epoch": 6.39, "grad_norm": 3.302826166152954, "learning_rate": 1.8252107599273142e-06, "loss": 0.0188, "step": 649725 }, { "epoch": 6.39, "grad_norm": 4.736788749694824, "learning_rate": 1.8250866374730656e-06, "loss": 0.0998, "step": 649750 }, { "epoch": 6.39, "grad_norm": 11.342506408691406, "learning_rate": 1.8249625150188172e-06, "loss": 0.0215, "step": 649775 }, { "epoch": 6.39, "grad_norm": 1.1083087921142578, "learning_rate": 1.8248383925645684e-06, "loss": 0.082, "step": 649800 }, { "epoch": 6.39, "grad_norm": 0.3507526218891144, "learning_rate": 1.82471427011032e-06, "loss": 0.0251, "step": 649825 }, { "epoch": 6.39, "grad_norm": 0.1779399812221527, "learning_rate": 1.8245901476560717e-06, "loss": 0.0695, "step": 649850 }, { "epoch": 6.39, "grad_norm": 17.959638595581055, "learning_rate": 1.8244660252018231e-06, "loss": 0.0212, "step": 649875 }, { "epoch": 6.39, "grad_norm": 2.378138303756714, "learning_rate": 1.8243419027475748e-06, "loss": 0.0667, "step": 649900 }, { "epoch": 6.39, "grad_norm": 11.63609504699707, "learning_rate": 1.8242177802933264e-06, "loss": 0.0192, "step": 649925 }, { "epoch": 6.39, "grad_norm": 6.951052665710449, "learning_rate": 1.8240936578390778e-06, "loss": 0.084, "step": 649950 }, { "epoch": 6.39, "grad_norm": 9.974477767944336, "learning_rate": 1.8239695353848295e-06, "loss": 0.0275, "step": 649975 }, { "epoch": 6.39, "grad_norm": 3.543452739715576, "learning_rate": 1.823845412930581e-06, "loss": 0.0711, "step": 650000 }, { "epoch": 6.39, "grad_norm": 3.305051803588867, "learning_rate": 1.8237212904763323e-06, "loss": 0.0292, "step": 650025 }, { "epoch": 6.39, "grad_norm": 7.0016770362854, "learning_rate": 1.823597168022084e-06, "loss": 0.09, "step": 650050 }, { "epoch": 6.39, "grad_norm": 5.397378921508789, "learning_rate": 1.8234730455678356e-06, "loss": 0.0314, "step": 650075 }, { "epoch": 6.39, "grad_norm": 0.1810857504606247, "learning_rate": 1.823348923113587e-06, "loss": 0.092, "step": 650100 }, { "epoch": 6.39, "grad_norm": 4.362260818481445, "learning_rate": 1.8232248006593386e-06, "loss": 0.0134, "step": 650125 }, { "epoch": 6.39, "grad_norm": 0.6527169942855835, "learning_rate": 1.8231006782050903e-06, "loss": 0.077, "step": 650150 }, { "epoch": 6.39, "grad_norm": 8.285163879394531, "learning_rate": 1.8229765557508417e-06, "loss": 0.0187, "step": 650175 }, { "epoch": 6.39, "grad_norm": 7.974610805511475, "learning_rate": 1.8228524332965933e-06, "loss": 0.0781, "step": 650200 }, { "epoch": 6.39, "grad_norm": 4.241216659545898, "learning_rate": 1.8227283108423445e-06, "loss": 0.0199, "step": 650225 }, { "epoch": 6.39, "grad_norm": 1.9364879131317139, "learning_rate": 1.8226041883880962e-06, "loss": 0.0928, "step": 650250 }, { "epoch": 6.39, "grad_norm": 19.184240341186523, "learning_rate": 1.8224800659338478e-06, "loss": 0.0177, "step": 650275 }, { "epoch": 6.39, "grad_norm": 9.392902374267578, "learning_rate": 1.8223559434795992e-06, "loss": 0.0634, "step": 650300 }, { "epoch": 6.39, "grad_norm": 11.310359954833984, "learning_rate": 1.8222318210253509e-06, "loss": 0.0152, "step": 650325 }, { "epoch": 6.39, "grad_norm": 5.3446044921875, "learning_rate": 1.8221076985711025e-06, "loss": 0.1009, "step": 650350 }, { "epoch": 6.39, "grad_norm": 7.3987812995910645, "learning_rate": 1.821983576116854e-06, "loss": 0.0201, "step": 650375 }, { "epoch": 6.39, "grad_norm": 0.6655868291854858, "learning_rate": 1.8218644185607754e-06, "loss": 0.0799, "step": 650400 }, { "epoch": 6.4, "grad_norm": 5.88639497756958, "learning_rate": 1.821740296106527e-06, "loss": 0.0261, "step": 650425 }, { "epoch": 6.4, "grad_norm": 6.03761625289917, "learning_rate": 1.8216161736522786e-06, "loss": 0.0827, "step": 650450 }, { "epoch": 6.4, "grad_norm": 13.225266456604004, "learning_rate": 1.8214920511980299e-06, "loss": 0.0283, "step": 650475 }, { "epoch": 6.4, "grad_norm": 5.659544467926025, "learning_rate": 1.8213679287437815e-06, "loss": 0.0731, "step": 650500 }, { "epoch": 6.4, "grad_norm": 5.293619155883789, "learning_rate": 1.8212438062895331e-06, "loss": 0.015, "step": 650525 }, { "epoch": 6.4, "grad_norm": 4.586948871612549, "learning_rate": 1.8211196838352846e-06, "loss": 0.0776, "step": 650550 }, { "epoch": 6.4, "grad_norm": 6.737583637237549, "learning_rate": 1.8209955613810362e-06, "loss": 0.0206, "step": 650575 }, { "epoch": 6.4, "grad_norm": 4.717617511749268, "learning_rate": 1.8208714389267878e-06, "loss": 0.0681, "step": 650600 }, { "epoch": 6.4, "grad_norm": 13.191018104553223, "learning_rate": 1.8207473164725392e-06, "loss": 0.0165, "step": 650625 }, { "epoch": 6.4, "grad_norm": 1.2677947282791138, "learning_rate": 1.8206231940182909e-06, "loss": 0.0789, "step": 650650 }, { "epoch": 6.4, "grad_norm": 5.58652400970459, "learning_rate": 1.8204990715640425e-06, "loss": 0.0143, "step": 650675 }, { "epoch": 6.4, "grad_norm": 3.5159597396850586, "learning_rate": 1.820374949109794e-06, "loss": 0.0687, "step": 650700 }, { "epoch": 6.4, "grad_norm": 1.1808536052703857, "learning_rate": 1.8202508266555456e-06, "loss": 0.0346, "step": 650725 }, { "epoch": 6.4, "grad_norm": 3.444568634033203, "learning_rate": 1.8201267042012972e-06, "loss": 0.0544, "step": 650750 }, { "epoch": 6.4, "grad_norm": 4.844263076782227, "learning_rate": 1.8200025817470484e-06, "loss": 0.0195, "step": 650775 }, { "epoch": 6.4, "grad_norm": 2.4248926639556885, "learning_rate": 1.8198784592928e-06, "loss": 0.0725, "step": 650800 }, { "epoch": 6.4, "grad_norm": 6.434892654418945, "learning_rate": 1.8197543368385515e-06, "loss": 0.0242, "step": 650825 }, { "epoch": 6.4, "grad_norm": 1.2986849546432495, "learning_rate": 1.8196302143843031e-06, "loss": 0.0515, "step": 650850 }, { "epoch": 6.4, "grad_norm": 3.4828572273254395, "learning_rate": 1.8195060919300547e-06, "loss": 0.0303, "step": 650875 }, { "epoch": 6.4, "grad_norm": 1.2007412910461426, "learning_rate": 1.8193819694758062e-06, "loss": 0.0814, "step": 650900 }, { "epoch": 6.4, "grad_norm": 6.294246673583984, "learning_rate": 1.8192578470215578e-06, "loss": 0.0241, "step": 650925 }, { "epoch": 6.4, "grad_norm": 1.3417079448699951, "learning_rate": 1.8191337245673094e-06, "loss": 0.0612, "step": 650950 }, { "epoch": 6.4, "grad_norm": 12.312219619750977, "learning_rate": 1.8190096021130607e-06, "loss": 0.0197, "step": 650975 }, { "epoch": 6.4, "grad_norm": 0.5927620530128479, "learning_rate": 1.8188854796588123e-06, "loss": 0.0604, "step": 651000 }, { "epoch": 6.4, "grad_norm": 9.05571174621582, "learning_rate": 1.818761357204564e-06, "loss": 0.0324, "step": 651025 }, { "epoch": 6.4, "grad_norm": 1.0667037963867188, "learning_rate": 1.8186372347503153e-06, "loss": 0.0647, "step": 651050 }, { "epoch": 6.4, "grad_norm": 2.652317762374878, "learning_rate": 1.818513112296067e-06, "loss": 0.0177, "step": 651075 }, { "epoch": 6.4, "grad_norm": 0.0369870625436306, "learning_rate": 1.8183889898418186e-06, "loss": 0.089, "step": 651100 }, { "epoch": 6.4, "grad_norm": 4.376430988311768, "learning_rate": 1.81826486738757e-06, "loss": 0.0302, "step": 651125 }, { "epoch": 6.4, "grad_norm": 1.8447060585021973, "learning_rate": 1.8181407449333217e-06, "loss": 0.0562, "step": 651150 }, { "epoch": 6.4, "grad_norm": 1.4270155429840088, "learning_rate": 1.8180166224790733e-06, "loss": 0.0203, "step": 651175 }, { "epoch": 6.4, "grad_norm": 4.4618611335754395, "learning_rate": 1.8178925000248245e-06, "loss": 0.0779, "step": 651200 }, { "epoch": 6.4, "grad_norm": 17.712398529052734, "learning_rate": 1.8177683775705762e-06, "loss": 0.0337, "step": 651225 }, { "epoch": 6.4, "grad_norm": 1.0298066139221191, "learning_rate": 1.8176442551163276e-06, "loss": 0.0784, "step": 651250 }, { "epoch": 6.4, "grad_norm": 7.277068614959717, "learning_rate": 1.8175201326620792e-06, "loss": 0.0152, "step": 651275 }, { "epoch": 6.4, "grad_norm": 5.961144924163818, "learning_rate": 1.8173960102078308e-06, "loss": 0.0913, "step": 651300 }, { "epoch": 6.4, "grad_norm": 7.1315531730651855, "learning_rate": 1.8172718877535823e-06, "loss": 0.0213, "step": 651325 }, { "epoch": 6.4, "grad_norm": 0.17035536468029022, "learning_rate": 1.817147765299334e-06, "loss": 0.0878, "step": 651350 }, { "epoch": 6.4, "grad_norm": 1.4596556425094604, "learning_rate": 1.8170236428450855e-06, "loss": 0.036, "step": 651375 }, { "epoch": 6.4, "grad_norm": 6.703814506530762, "learning_rate": 1.8168995203908368e-06, "loss": 0.0619, "step": 651400 }, { "epoch": 6.4, "grad_norm": 6.128843307495117, "learning_rate": 1.8167753979365884e-06, "loss": 0.0148, "step": 651425 }, { "epoch": 6.41, "grad_norm": 3.181365489959717, "learning_rate": 1.81665127548234e-06, "loss": 0.046, "step": 651450 }, { "epoch": 6.41, "grad_norm": 3.4614226818084717, "learning_rate": 1.8165271530280914e-06, "loss": 0.0164, "step": 651475 }, { "epoch": 6.41, "grad_norm": 7.014556884765625, "learning_rate": 1.816403030573843e-06, "loss": 0.075, "step": 651500 }, { "epoch": 6.41, "grad_norm": 2.446046829223633, "learning_rate": 1.8162789081195947e-06, "loss": 0.0273, "step": 651525 }, { "epoch": 6.41, "grad_norm": 2.0459160804748535, "learning_rate": 1.8161547856653461e-06, "loss": 0.0628, "step": 651550 }, { "epoch": 6.41, "grad_norm": 4.359678268432617, "learning_rate": 1.8160306632110978e-06, "loss": 0.0123, "step": 651575 }, { "epoch": 6.41, "grad_norm": 1.3153398036956787, "learning_rate": 1.8159065407568494e-06, "loss": 0.0726, "step": 651600 }, { "epoch": 6.41, "grad_norm": 9.523024559020996, "learning_rate": 1.8157824183026006e-06, "loss": 0.0291, "step": 651625 }, { "epoch": 6.41, "grad_norm": 1.5165351629257202, "learning_rate": 1.8156582958483523e-06, "loss": 0.081, "step": 651650 }, { "epoch": 6.41, "grad_norm": 11.747750282287598, "learning_rate": 1.8155341733941037e-06, "loss": 0.0265, "step": 651675 }, { "epoch": 6.41, "grad_norm": 7.6328349113464355, "learning_rate": 1.8154100509398553e-06, "loss": 0.0716, "step": 651700 }, { "epoch": 6.41, "grad_norm": 2.181093215942383, "learning_rate": 1.815285928485607e-06, "loss": 0.0176, "step": 651725 }, { "epoch": 6.41, "grad_norm": 4.19921350479126, "learning_rate": 1.8151618060313584e-06, "loss": 0.0747, "step": 651750 }, { "epoch": 6.41, "grad_norm": 7.509678840637207, "learning_rate": 1.81503768357711e-06, "loss": 0.0115, "step": 651775 }, { "epoch": 6.41, "grad_norm": 9.15225601196289, "learning_rate": 1.8149135611228616e-06, "loss": 0.0848, "step": 651800 }, { "epoch": 6.41, "grad_norm": 8.560617446899414, "learning_rate": 1.8147894386686129e-06, "loss": 0.0251, "step": 651825 }, { "epoch": 6.41, "grad_norm": 4.361627578735352, "learning_rate": 1.8146653162143645e-06, "loss": 0.0695, "step": 651850 }, { "epoch": 6.41, "grad_norm": 19.722829818725586, "learning_rate": 1.8145411937601161e-06, "loss": 0.031, "step": 651875 }, { "epoch": 6.41, "grad_norm": 0.9872065782546997, "learning_rate": 1.8144170713058675e-06, "loss": 0.095, "step": 651900 }, { "epoch": 6.41, "grad_norm": 2.8725061416625977, "learning_rate": 1.8142929488516192e-06, "loss": 0.0196, "step": 651925 }, { "epoch": 6.41, "grad_norm": 0.4578087329864502, "learning_rate": 1.8141688263973708e-06, "loss": 0.0823, "step": 651950 }, { "epoch": 6.41, "grad_norm": 3.8677520751953125, "learning_rate": 1.8140447039431222e-06, "loss": 0.0143, "step": 651975 }, { "epoch": 6.41, "grad_norm": 4.830242156982422, "learning_rate": 1.8139205814888739e-06, "loss": 0.0669, "step": 652000 }, { "epoch": 6.41, "grad_norm": 6.801087856292725, "learning_rate": 1.8137964590346255e-06, "loss": 0.0289, "step": 652025 }, { "epoch": 6.41, "grad_norm": 0.8070146441459656, "learning_rate": 1.813672336580377e-06, "loss": 0.0589, "step": 652050 }, { "epoch": 6.41, "grad_norm": 11.690619468688965, "learning_rate": 1.8135482141261286e-06, "loss": 0.025, "step": 652075 }, { "epoch": 6.41, "grad_norm": 6.330761909484863, "learning_rate": 1.8134240916718798e-06, "loss": 0.0782, "step": 652100 }, { "epoch": 6.41, "grad_norm": 12.183172225952148, "learning_rate": 1.8132999692176314e-06, "loss": 0.0301, "step": 652125 }, { "epoch": 6.41, "grad_norm": 6.55720853805542, "learning_rate": 1.813175846763383e-06, "loss": 0.0832, "step": 652150 }, { "epoch": 6.41, "grad_norm": 2.490917444229126, "learning_rate": 1.8130517243091345e-06, "loss": 0.0155, "step": 652175 }, { "epoch": 6.41, "grad_norm": 0.5369095802307129, "learning_rate": 1.8129276018548861e-06, "loss": 0.0968, "step": 652200 }, { "epoch": 6.41, "grad_norm": 11.737622261047363, "learning_rate": 1.8128034794006377e-06, "loss": 0.0162, "step": 652225 }, { "epoch": 6.41, "grad_norm": 1.3742339611053467, "learning_rate": 1.8126793569463892e-06, "loss": 0.0704, "step": 652250 }, { "epoch": 6.41, "grad_norm": 6.090823173522949, "learning_rate": 1.8125552344921408e-06, "loss": 0.0189, "step": 652275 }, { "epoch": 6.41, "grad_norm": 6.421742916107178, "learning_rate": 1.8124311120378924e-06, "loss": 0.0599, "step": 652300 }, { "epoch": 6.41, "grad_norm": 4.684802055358887, "learning_rate": 1.8123069895836436e-06, "loss": 0.0236, "step": 652325 }, { "epoch": 6.41, "grad_norm": 6.539849281311035, "learning_rate": 1.8121828671293953e-06, "loss": 0.0944, "step": 652350 }, { "epoch": 6.41, "grad_norm": 2.631308078765869, "learning_rate": 1.812058744675147e-06, "loss": 0.0278, "step": 652375 }, { "epoch": 6.41, "grad_norm": 0.9910908937454224, "learning_rate": 1.8119346222208983e-06, "loss": 0.0776, "step": 652400 }, { "epoch": 6.41, "grad_norm": 1.1520594358444214, "learning_rate": 1.81181049976665e-06, "loss": 0.0137, "step": 652425 }, { "epoch": 6.41, "grad_norm": 5.4367828369140625, "learning_rate": 1.8116863773124016e-06, "loss": 0.0763, "step": 652450 }, { "epoch": 6.42, "grad_norm": 1.1655195951461792, "learning_rate": 1.811562254858153e-06, "loss": 0.0208, "step": 652475 }, { "epoch": 6.42, "grad_norm": 5.7613677978515625, "learning_rate": 1.8114381324039047e-06, "loss": 0.0819, "step": 652500 }, { "epoch": 6.42, "grad_norm": 7.113751411437988, "learning_rate": 1.8113140099496559e-06, "loss": 0.0143, "step": 652525 }, { "epoch": 6.42, "grad_norm": 1.3058323860168457, "learning_rate": 1.8111898874954075e-06, "loss": 0.059, "step": 652550 }, { "epoch": 6.42, "grad_norm": 10.92202091217041, "learning_rate": 1.8110657650411592e-06, "loss": 0.0247, "step": 652575 }, { "epoch": 6.42, "grad_norm": 0.9308724999427795, "learning_rate": 1.8109416425869106e-06, "loss": 0.0817, "step": 652600 }, { "epoch": 6.42, "grad_norm": 6.833740234375, "learning_rate": 1.8108175201326622e-06, "loss": 0.0183, "step": 652625 }, { "epoch": 6.42, "grad_norm": 7.726973533630371, "learning_rate": 1.8106933976784138e-06, "loss": 0.0857, "step": 652650 }, { "epoch": 6.42, "grad_norm": 0.9683398008346558, "learning_rate": 1.8105692752241653e-06, "loss": 0.0225, "step": 652675 }, { "epoch": 6.42, "grad_norm": 1.7919257879257202, "learning_rate": 1.810445152769917e-06, "loss": 0.1018, "step": 652700 }, { "epoch": 6.42, "grad_norm": 8.388053894042969, "learning_rate": 1.8103210303156685e-06, "loss": 0.0195, "step": 652725 }, { "epoch": 6.42, "grad_norm": 5.218498229980469, "learning_rate": 1.81020187275959e-06, "loss": 0.071, "step": 652750 }, { "epoch": 6.42, "grad_norm": 0.21619758009910583, "learning_rate": 1.8100777503053412e-06, "loss": 0.0124, "step": 652775 }, { "epoch": 6.42, "grad_norm": 0.24360963702201843, "learning_rate": 1.8099536278510928e-06, "loss": 0.0955, "step": 652800 }, { "epoch": 6.42, "grad_norm": 11.173784255981445, "learning_rate": 1.8098295053968445e-06, "loss": 0.0199, "step": 652825 }, { "epoch": 6.42, "grad_norm": 12.457481384277344, "learning_rate": 1.8097053829425959e-06, "loss": 0.0558, "step": 652850 }, { "epoch": 6.42, "grad_norm": 14.364147186279297, "learning_rate": 1.8095812604883475e-06, "loss": 0.0288, "step": 652875 }, { "epoch": 6.42, "grad_norm": 3.622786521911621, "learning_rate": 1.8094571380340992e-06, "loss": 0.0821, "step": 652900 }, { "epoch": 6.42, "grad_norm": 6.778204441070557, "learning_rate": 1.8093330155798506e-06, "loss": 0.0236, "step": 652925 }, { "epoch": 6.42, "grad_norm": 2.727529525756836, "learning_rate": 1.8092088931256022e-06, "loss": 0.0894, "step": 652950 }, { "epoch": 6.42, "grad_norm": 7.87716007232666, "learning_rate": 1.8090847706713539e-06, "loss": 0.0175, "step": 652975 }, { "epoch": 6.42, "grad_norm": 1.8418024778366089, "learning_rate": 1.808960648217105e-06, "loss": 0.0726, "step": 653000 }, { "epoch": 6.42, "grad_norm": 4.832696914672852, "learning_rate": 1.8088365257628567e-06, "loss": 0.0146, "step": 653025 }, { "epoch": 6.42, "grad_norm": 2.2404894828796387, "learning_rate": 1.8087124033086083e-06, "loss": 0.0903, "step": 653050 }, { "epoch": 6.42, "grad_norm": 7.894485950469971, "learning_rate": 1.8085882808543598e-06, "loss": 0.0199, "step": 653075 }, { "epoch": 6.42, "grad_norm": 2.262561559677124, "learning_rate": 1.8084641584001114e-06, "loss": 0.0521, "step": 653100 }, { "epoch": 6.42, "grad_norm": 1.7813340425491333, "learning_rate": 1.8083400359458628e-06, "loss": 0.0227, "step": 653125 }, { "epoch": 6.42, "grad_norm": 2.5028083324432373, "learning_rate": 1.8082159134916145e-06, "loss": 0.0675, "step": 653150 }, { "epoch": 6.42, "grad_norm": 11.735705375671387, "learning_rate": 1.808091791037366e-06, "loss": 0.0197, "step": 653175 }, { "epoch": 6.42, "grad_norm": 4.229100704193115, "learning_rate": 1.8079676685831173e-06, "loss": 0.0837, "step": 653200 }, { "epoch": 6.42, "grad_norm": 9.342793464660645, "learning_rate": 1.807843546128869e-06, "loss": 0.0361, "step": 653225 }, { "epoch": 6.42, "grad_norm": 1.080621361732483, "learning_rate": 1.8077194236746206e-06, "loss": 0.0632, "step": 653250 }, { "epoch": 6.42, "grad_norm": 5.543814659118652, "learning_rate": 1.807595301220372e-06, "loss": 0.0213, "step": 653275 }, { "epoch": 6.42, "grad_norm": 13.305230140686035, "learning_rate": 1.8074711787661236e-06, "loss": 0.0486, "step": 653300 }, { "epoch": 6.42, "grad_norm": 8.707908630371094, "learning_rate": 1.8073470563118753e-06, "loss": 0.0211, "step": 653325 }, { "epoch": 6.42, "grad_norm": 4.5591840744018555, "learning_rate": 1.8072229338576267e-06, "loss": 0.0925, "step": 653350 }, { "epoch": 6.42, "grad_norm": 1.0090845823287964, "learning_rate": 1.8070988114033783e-06, "loss": 0.025, "step": 653375 }, { "epoch": 6.42, "grad_norm": 0.07680804282426834, "learning_rate": 1.80697468894913e-06, "loss": 0.0819, "step": 653400 }, { "epoch": 6.42, "grad_norm": 6.537163734436035, "learning_rate": 1.8068505664948814e-06, "loss": 0.03, "step": 653425 }, { "epoch": 6.42, "grad_norm": 3.7327795028686523, "learning_rate": 1.806726444040633e-06, "loss": 0.0902, "step": 653450 }, { "epoch": 6.43, "grad_norm": 0.5231075882911682, "learning_rate": 1.8066023215863846e-06, "loss": 0.0196, "step": 653475 }, { "epoch": 6.43, "grad_norm": 3.6071624755859375, "learning_rate": 1.8064781991321359e-06, "loss": 0.0758, "step": 653500 }, { "epoch": 6.43, "grad_norm": 2.313077926635742, "learning_rate": 1.8063540766778875e-06, "loss": 0.0196, "step": 653525 }, { "epoch": 6.43, "grad_norm": 2.8128294944763184, "learning_rate": 1.806229954223639e-06, "loss": 0.0735, "step": 653550 }, { "epoch": 6.43, "grad_norm": 16.988014221191406, "learning_rate": 1.8061058317693906e-06, "loss": 0.0186, "step": 653575 }, { "epoch": 6.43, "grad_norm": 3.0865859985351562, "learning_rate": 1.8059817093151422e-06, "loss": 0.0807, "step": 653600 }, { "epoch": 6.43, "grad_norm": 4.47037410736084, "learning_rate": 1.8058575868608936e-06, "loss": 0.021, "step": 653625 }, { "epoch": 6.43, "grad_norm": 3.753392219543457, "learning_rate": 1.8057334644066452e-06, "loss": 0.0765, "step": 653650 }, { "epoch": 6.43, "grad_norm": 7.667891979217529, "learning_rate": 1.8056093419523969e-06, "loss": 0.0289, "step": 653675 }, { "epoch": 6.43, "grad_norm": 2.6115188598632812, "learning_rate": 1.805485219498148e-06, "loss": 0.0659, "step": 653700 }, { "epoch": 6.43, "grad_norm": 17.91422462463379, "learning_rate": 1.8053610970438997e-06, "loss": 0.0255, "step": 653725 }, { "epoch": 6.43, "grad_norm": 7.984526634216309, "learning_rate": 1.8052369745896514e-06, "loss": 0.0646, "step": 653750 }, { "epoch": 6.43, "grad_norm": 7.086593151092529, "learning_rate": 1.8051128521354028e-06, "loss": 0.025, "step": 653775 }, { "epoch": 6.43, "grad_norm": 1.0170649290084839, "learning_rate": 1.8049887296811544e-06, "loss": 0.0818, "step": 653800 }, { "epoch": 6.43, "grad_norm": 6.476654529571533, "learning_rate": 1.804864607226906e-06, "loss": 0.0293, "step": 653825 }, { "epoch": 6.43, "grad_norm": 0.3236500918865204, "learning_rate": 1.8047404847726575e-06, "loss": 0.0525, "step": 653850 }, { "epoch": 6.43, "grad_norm": 6.957928657531738, "learning_rate": 1.8046163623184091e-06, "loss": 0.0222, "step": 653875 }, { "epoch": 6.43, "grad_norm": 3.7135260105133057, "learning_rate": 1.8044922398641608e-06, "loss": 0.0548, "step": 653900 }, { "epoch": 6.43, "grad_norm": 5.555023670196533, "learning_rate": 1.804368117409912e-06, "loss": 0.0164, "step": 653925 }, { "epoch": 6.43, "grad_norm": 3.864413261413574, "learning_rate": 1.8042439949556636e-06, "loss": 0.0799, "step": 653950 }, { "epoch": 6.43, "grad_norm": 0.8686267733573914, "learning_rate": 1.804119872501415e-06, "loss": 0.0278, "step": 653975 }, { "epoch": 6.43, "grad_norm": 1.1715272665023804, "learning_rate": 1.8039957500471667e-06, "loss": 0.083, "step": 654000 }, { "epoch": 6.43, "grad_norm": 14.230677604675293, "learning_rate": 1.8038716275929183e-06, "loss": 0.0201, "step": 654025 }, { "epoch": 6.43, "grad_norm": 0.8881848454475403, "learning_rate": 1.8037475051386697e-06, "loss": 0.0769, "step": 654050 }, { "epoch": 6.43, "grad_norm": 2.997990846633911, "learning_rate": 1.8036233826844213e-06, "loss": 0.0159, "step": 654075 }, { "epoch": 6.43, "grad_norm": 0.3388843834400177, "learning_rate": 1.803499260230173e-06, "loss": 0.0671, "step": 654100 }, { "epoch": 6.43, "grad_norm": 4.451686859130859, "learning_rate": 1.8033751377759242e-06, "loss": 0.0198, "step": 654125 }, { "epoch": 6.43, "grad_norm": 0.01443143654614687, "learning_rate": 1.8032510153216758e-06, "loss": 0.0763, "step": 654150 }, { "epoch": 6.43, "grad_norm": 1.636626124382019, "learning_rate": 1.8031268928674275e-06, "loss": 0.0198, "step": 654175 }, { "epoch": 6.43, "grad_norm": 6.54612398147583, "learning_rate": 1.8030027704131789e-06, "loss": 0.0967, "step": 654200 }, { "epoch": 6.43, "grad_norm": 8.709625244140625, "learning_rate": 1.8028786479589305e-06, "loss": 0.0235, "step": 654225 }, { "epoch": 6.43, "grad_norm": 3.5454797744750977, "learning_rate": 1.8027545255046822e-06, "loss": 0.0744, "step": 654250 }, { "epoch": 6.43, "grad_norm": 11.8373441696167, "learning_rate": 1.8026304030504336e-06, "loss": 0.0266, "step": 654275 }, { "epoch": 6.43, "grad_norm": 4.567451000213623, "learning_rate": 1.8025062805961852e-06, "loss": 0.0775, "step": 654300 }, { "epoch": 6.43, "grad_norm": 9.735904693603516, "learning_rate": 1.8023821581419369e-06, "loss": 0.0254, "step": 654325 }, { "epoch": 6.43, "grad_norm": 1.2927215099334717, "learning_rate": 1.802258035687688e-06, "loss": 0.0794, "step": 654350 }, { "epoch": 6.43, "grad_norm": 3.725755453109741, "learning_rate": 1.8021339132334397e-06, "loss": 0.0194, "step": 654375 }, { "epoch": 6.43, "grad_norm": 0.6294950842857361, "learning_rate": 1.8020097907791911e-06, "loss": 0.0777, "step": 654400 }, { "epoch": 6.43, "grad_norm": 10.676987648010254, "learning_rate": 1.8018856683249428e-06, "loss": 0.0224, "step": 654425 }, { "epoch": 6.43, "grad_norm": 0.7956191897392273, "learning_rate": 1.8017615458706944e-06, "loss": 0.078, "step": 654450 }, { "epoch": 6.43, "grad_norm": 8.796642303466797, "learning_rate": 1.8016374234164458e-06, "loss": 0.0279, "step": 654475 }, { "epoch": 6.44, "grad_norm": 2.5071966648101807, "learning_rate": 1.8015133009621974e-06, "loss": 0.0721, "step": 654500 }, { "epoch": 6.44, "grad_norm": 16.873912811279297, "learning_rate": 1.801389178507949e-06, "loss": 0.0281, "step": 654525 }, { "epoch": 6.44, "grad_norm": 4.928515434265137, "learning_rate": 1.8012650560537003e-06, "loss": 0.0705, "step": 654550 }, { "epoch": 6.44, "grad_norm": 2.074759006500244, "learning_rate": 1.801140933599452e-06, "loss": 0.0167, "step": 654575 }, { "epoch": 6.44, "grad_norm": 3.3099138736724854, "learning_rate": 1.8010168111452036e-06, "loss": 0.0809, "step": 654600 }, { "epoch": 6.44, "grad_norm": 7.49570369720459, "learning_rate": 1.800892688690955e-06, "loss": 0.0328, "step": 654625 }, { "epoch": 6.44, "grad_norm": 5.497541427612305, "learning_rate": 1.8007685662367066e-06, "loss": 0.066, "step": 654650 }, { "epoch": 6.44, "grad_norm": 12.363760948181152, "learning_rate": 1.8006444437824583e-06, "loss": 0.0207, "step": 654675 }, { "epoch": 6.44, "grad_norm": 0.6672681570053101, "learning_rate": 1.8005203213282097e-06, "loss": 0.0832, "step": 654700 }, { "epoch": 6.44, "grad_norm": 4.6915435791015625, "learning_rate": 1.8003961988739613e-06, "loss": 0.0126, "step": 654725 }, { "epoch": 6.44, "grad_norm": 6.7904052734375, "learning_rate": 1.800272076419713e-06, "loss": 0.0649, "step": 654750 }, { "epoch": 6.44, "grad_norm": 9.20016860961914, "learning_rate": 1.8001479539654642e-06, "loss": 0.0285, "step": 654775 }, { "epoch": 6.44, "grad_norm": 1.3188426494598389, "learning_rate": 1.8000238315112158e-06, "loss": 0.0626, "step": 654800 }, { "epoch": 6.44, "grad_norm": 0.3718675374984741, "learning_rate": 1.7998997090569672e-06, "loss": 0.0239, "step": 654825 }, { "epoch": 6.44, "grad_norm": 1.2073744535446167, "learning_rate": 1.7997755866027189e-06, "loss": 0.0663, "step": 654850 }, { "epoch": 6.44, "grad_norm": 0.22932104766368866, "learning_rate": 1.7996514641484705e-06, "loss": 0.0203, "step": 654875 }, { "epoch": 6.44, "grad_norm": 0.012890003621578217, "learning_rate": 1.799527341694222e-06, "loss": 0.0641, "step": 654900 }, { "epoch": 6.44, "grad_norm": 13.374327659606934, "learning_rate": 1.7994032192399735e-06, "loss": 0.0209, "step": 654925 }, { "epoch": 6.44, "grad_norm": 4.851072788238525, "learning_rate": 1.7992790967857252e-06, "loss": 0.101, "step": 654950 }, { "epoch": 6.44, "grad_norm": 3.480512857437134, "learning_rate": 1.7991549743314766e-06, "loss": 0.0189, "step": 654975 }, { "epoch": 6.44, "grad_norm": 10.68713665008545, "learning_rate": 1.7990308518772282e-06, "loss": 0.0769, "step": 655000 }, { "epoch": 6.44, "grad_norm": 13.42852783203125, "learning_rate": 1.7989067294229799e-06, "loss": 0.0319, "step": 655025 }, { "epoch": 6.44, "grad_norm": 4.974876403808594, "learning_rate": 1.798782606968731e-06, "loss": 0.0698, "step": 655050 }, { "epoch": 6.44, "grad_norm": 9.824362754821777, "learning_rate": 1.7986584845144827e-06, "loss": 0.0254, "step": 655075 }, { "epoch": 6.44, "grad_norm": 2.067371368408203, "learning_rate": 1.7985343620602344e-06, "loss": 0.0782, "step": 655100 }, { "epoch": 6.44, "grad_norm": 11.490979194641113, "learning_rate": 1.7984102396059858e-06, "loss": 0.0267, "step": 655125 }, { "epoch": 6.44, "grad_norm": 4.0889129638671875, "learning_rate": 1.7982861171517374e-06, "loss": 0.0574, "step": 655150 }, { "epoch": 6.44, "grad_norm": 6.4982829093933105, "learning_rate": 1.798161994697489e-06, "loss": 0.0398, "step": 655175 }, { "epoch": 6.44, "grad_norm": 2.706308364868164, "learning_rate": 1.7980378722432405e-06, "loss": 0.0844, "step": 655200 }, { "epoch": 6.44, "grad_norm": 6.543254852294922, "learning_rate": 1.7979137497889921e-06, "loss": 0.0238, "step": 655225 }, { "epoch": 6.44, "grad_norm": 3.00757098197937, "learning_rate": 1.7977945922329136e-06, "loss": 0.0781, "step": 655250 }, { "epoch": 6.44, "grad_norm": 2.6629741191864014, "learning_rate": 1.7976704697786652e-06, "loss": 0.0125, "step": 655275 }, { "epoch": 6.44, "grad_norm": 3.0671932697296143, "learning_rate": 1.7975463473244164e-06, "loss": 0.0811, "step": 655300 }, { "epoch": 6.44, "grad_norm": 2.910433530807495, "learning_rate": 1.797422224870168e-06, "loss": 0.0302, "step": 655325 }, { "epoch": 6.44, "grad_norm": 3.165916919708252, "learning_rate": 1.7972981024159197e-06, "loss": 0.0734, "step": 655350 }, { "epoch": 6.44, "grad_norm": 12.6801118850708, "learning_rate": 1.797173979961671e-06, "loss": 0.0263, "step": 655375 }, { "epoch": 6.44, "grad_norm": 8.29694938659668, "learning_rate": 1.7970498575074227e-06, "loss": 0.0716, "step": 655400 }, { "epoch": 6.44, "grad_norm": 4.048943042755127, "learning_rate": 1.7969257350531742e-06, "loss": 0.023, "step": 655425 }, { "epoch": 6.44, "grad_norm": 1.444810152053833, "learning_rate": 1.7968016125989258e-06, "loss": 0.0961, "step": 655450 }, { "epoch": 6.44, "grad_norm": 3.540095567703247, "learning_rate": 1.7966774901446774e-06, "loss": 0.0197, "step": 655475 }, { "epoch": 6.44, "grad_norm": 6.497702121734619, "learning_rate": 1.7965533676904286e-06, "loss": 0.1084, "step": 655500 }, { "epoch": 6.45, "grad_norm": 10.632489204406738, "learning_rate": 1.7964292452361803e-06, "loss": 0.0234, "step": 655525 }, { "epoch": 6.45, "grad_norm": 4.218997955322266, "learning_rate": 1.796305122781932e-06, "loss": 0.0856, "step": 655550 }, { "epoch": 6.45, "grad_norm": 5.521743297576904, "learning_rate": 1.7961810003276833e-06, "loss": 0.0294, "step": 655575 }, { "epoch": 6.45, "grad_norm": 2.2620396614074707, "learning_rate": 1.796056877873435e-06, "loss": 0.0866, "step": 655600 }, { "epoch": 6.45, "grad_norm": 3.169848680496216, "learning_rate": 1.7959327554191866e-06, "loss": 0.015, "step": 655625 }, { "epoch": 6.45, "grad_norm": 2.423929214477539, "learning_rate": 1.795808632964938e-06, "loss": 0.0792, "step": 655650 }, { "epoch": 6.45, "grad_norm": 9.912242889404297, "learning_rate": 1.7956845105106897e-06, "loss": 0.0163, "step": 655675 }, { "epoch": 6.45, "grad_norm": 11.547633171081543, "learning_rate": 1.7955603880564413e-06, "loss": 0.0833, "step": 655700 }, { "epoch": 6.45, "grad_norm": 19.511951446533203, "learning_rate": 1.7954362656021925e-06, "loss": 0.0318, "step": 655725 }, { "epoch": 6.45, "grad_norm": 6.749334812164307, "learning_rate": 1.7953121431479441e-06, "loss": 0.0862, "step": 655750 }, { "epoch": 6.45, "grad_norm": 7.401627540588379, "learning_rate": 1.7951880206936958e-06, "loss": 0.0236, "step": 655775 }, { "epoch": 6.45, "grad_norm": 4.603045463562012, "learning_rate": 1.7950638982394472e-06, "loss": 0.0677, "step": 655800 }, { "epoch": 6.45, "grad_norm": 0.6168071627616882, "learning_rate": 1.7949397757851988e-06, "loss": 0.0185, "step": 655825 }, { "epoch": 6.45, "grad_norm": 7.13578987121582, "learning_rate": 1.7948156533309503e-06, "loss": 0.0784, "step": 655850 }, { "epoch": 6.45, "grad_norm": 1.6485519409179688, "learning_rate": 1.794691530876702e-06, "loss": 0.0201, "step": 655875 }, { "epoch": 6.45, "grad_norm": 1.9621129035949707, "learning_rate": 1.7945674084224535e-06, "loss": 0.0716, "step": 655900 }, { "epoch": 6.45, "grad_norm": 4.5877180099487305, "learning_rate": 1.7944432859682047e-06, "loss": 0.0195, "step": 655925 }, { "epoch": 6.45, "grad_norm": 4.2908453941345215, "learning_rate": 1.7943191635139564e-06, "loss": 0.0794, "step": 655950 }, { "epoch": 6.45, "grad_norm": 9.526810646057129, "learning_rate": 1.794195041059708e-06, "loss": 0.0182, "step": 655975 }, { "epoch": 6.45, "grad_norm": 0.19998788833618164, "learning_rate": 1.7940709186054594e-06, "loss": 0.0514, "step": 656000 }, { "epoch": 6.45, "grad_norm": 3.777113437652588, "learning_rate": 1.793946796151211e-06, "loss": 0.0221, "step": 656025 }, { "epoch": 6.45, "grad_norm": 1.6967593431472778, "learning_rate": 1.7938226736969627e-06, "loss": 0.0889, "step": 656050 }, { "epoch": 6.45, "grad_norm": 7.832505702972412, "learning_rate": 1.7936985512427141e-06, "loss": 0.0169, "step": 656075 }, { "epoch": 6.45, "grad_norm": 0.12340344488620758, "learning_rate": 1.7935744287884658e-06, "loss": 0.0687, "step": 656100 }, { "epoch": 6.45, "grad_norm": 5.478213787078857, "learning_rate": 1.7934503063342174e-06, "loss": 0.0298, "step": 656125 }, { "epoch": 6.45, "grad_norm": 0.2922235429286957, "learning_rate": 1.7933261838799686e-06, "loss": 0.0739, "step": 656150 }, { "epoch": 6.45, "grad_norm": 17.522098541259766, "learning_rate": 1.7932020614257202e-06, "loss": 0.0242, "step": 656175 }, { "epoch": 6.45, "grad_norm": 1.3352112770080566, "learning_rate": 1.7930779389714719e-06, "loss": 0.0612, "step": 656200 }, { "epoch": 6.45, "grad_norm": 13.681044578552246, "learning_rate": 1.7929538165172233e-06, "loss": 0.021, "step": 656225 }, { "epoch": 6.45, "grad_norm": 1.7787673473358154, "learning_rate": 1.792829694062975e-06, "loss": 0.0728, "step": 656250 }, { "epoch": 6.45, "grad_norm": 0.29918497800827026, "learning_rate": 1.7927055716087264e-06, "loss": 0.0193, "step": 656275 }, { "epoch": 6.45, "grad_norm": 5.134958267211914, "learning_rate": 1.792581449154478e-06, "loss": 0.0968, "step": 656300 }, { "epoch": 6.45, "grad_norm": 16.465944290161133, "learning_rate": 1.7924573267002296e-06, "loss": 0.0391, "step": 656325 }, { "epoch": 6.45, "grad_norm": 0.2859799861907959, "learning_rate": 1.792333204245981e-06, "loss": 0.0439, "step": 656350 }, { "epoch": 6.45, "grad_norm": 6.912945747375488, "learning_rate": 1.7922090817917327e-06, "loss": 0.0177, "step": 656375 }, { "epoch": 6.45, "grad_norm": 0.0837337076663971, "learning_rate": 1.7920849593374843e-06, "loss": 0.0616, "step": 656400 }, { "epoch": 6.45, "grad_norm": 2.947321891784668, "learning_rate": 1.7919608368832355e-06, "loss": 0.0292, "step": 656425 }, { "epoch": 6.45, "grad_norm": 10.788904190063477, "learning_rate": 1.7918367144289872e-06, "loss": 0.0891, "step": 656450 }, { "epoch": 6.45, "grad_norm": 8.799477577209473, "learning_rate": 1.7917125919747388e-06, "loss": 0.0275, "step": 656475 }, { "epoch": 6.45, "grad_norm": 3.025406837463379, "learning_rate": 1.7915884695204902e-06, "loss": 0.0868, "step": 656500 }, { "epoch": 6.46, "grad_norm": 2.024916648864746, "learning_rate": 1.7914643470662419e-06, "loss": 0.0191, "step": 656525 }, { "epoch": 6.46, "grad_norm": 2.719606399536133, "learning_rate": 1.7913402246119935e-06, "loss": 0.0694, "step": 656550 }, { "epoch": 6.46, "grad_norm": 1.734028935432434, "learning_rate": 1.791216102157745e-06, "loss": 0.0297, "step": 656575 }, { "epoch": 6.46, "grad_norm": 2.011528968811035, "learning_rate": 1.7910919797034966e-06, "loss": 0.0781, "step": 656600 }, { "epoch": 6.46, "grad_norm": 17.98263931274414, "learning_rate": 1.7909678572492482e-06, "loss": 0.0193, "step": 656625 }, { "epoch": 6.46, "grad_norm": 1.8526204824447632, "learning_rate": 1.7908437347949994e-06, "loss": 0.1054, "step": 656650 }, { "epoch": 6.46, "grad_norm": 1.8269449472427368, "learning_rate": 1.790719612340751e-06, "loss": 0.0263, "step": 656675 }, { "epoch": 6.46, "grad_norm": 4.623889446258545, "learning_rate": 1.7905954898865025e-06, "loss": 0.0872, "step": 656700 }, { "epoch": 6.46, "grad_norm": 1.6358542442321777, "learning_rate": 1.790471367432254e-06, "loss": 0.0256, "step": 656725 }, { "epoch": 6.46, "grad_norm": 5.144856929779053, "learning_rate": 1.7903472449780057e-06, "loss": 0.0759, "step": 656750 }, { "epoch": 6.46, "grad_norm": 10.356302261352539, "learning_rate": 1.7902231225237572e-06, "loss": 0.0233, "step": 656775 }, { "epoch": 6.46, "grad_norm": 1.6650075912475586, "learning_rate": 1.7900990000695088e-06, "loss": 0.066, "step": 656800 }, { "epoch": 6.46, "grad_norm": 8.573614120483398, "learning_rate": 1.7899748776152604e-06, "loss": 0.0385, "step": 656825 }, { "epoch": 6.46, "grad_norm": 1.4639031887054443, "learning_rate": 1.7898507551610116e-06, "loss": 0.0593, "step": 656850 }, { "epoch": 6.46, "grad_norm": 1.8061845302581787, "learning_rate": 1.7897266327067633e-06, "loss": 0.0294, "step": 656875 }, { "epoch": 6.46, "grad_norm": 0.3208179771900177, "learning_rate": 1.789602510252515e-06, "loss": 0.0729, "step": 656900 }, { "epoch": 6.46, "grad_norm": 2.87809157371521, "learning_rate": 1.7894783877982663e-06, "loss": 0.0159, "step": 656925 }, { "epoch": 6.46, "grad_norm": 0.15885263681411743, "learning_rate": 1.789354265344018e-06, "loss": 0.0567, "step": 656950 }, { "epoch": 6.46, "grad_norm": 3.0230772495269775, "learning_rate": 1.7892301428897696e-06, "loss": 0.0277, "step": 656975 }, { "epoch": 6.46, "grad_norm": 1.9433516263961792, "learning_rate": 1.789106020435521e-06, "loss": 0.081, "step": 657000 }, { "epoch": 6.46, "grad_norm": 7.873944282531738, "learning_rate": 1.7889818979812727e-06, "loss": 0.0238, "step": 657025 }, { "epoch": 6.46, "grad_norm": 1.6860853433609009, "learning_rate": 1.7888577755270243e-06, "loss": 0.0598, "step": 657050 }, { "epoch": 6.46, "grad_norm": 2.6680054664611816, "learning_rate": 1.7887336530727755e-06, "loss": 0.0229, "step": 657075 }, { "epoch": 6.46, "grad_norm": 0.05292467772960663, "learning_rate": 1.7886095306185271e-06, "loss": 0.0569, "step": 657100 }, { "epoch": 6.46, "grad_norm": 8.473898887634277, "learning_rate": 1.7884854081642786e-06, "loss": 0.0188, "step": 657125 }, { "epoch": 6.46, "grad_norm": 2.4129016399383545, "learning_rate": 1.7883612857100302e-06, "loss": 0.0627, "step": 657150 }, { "epoch": 6.46, "grad_norm": 10.642199516296387, "learning_rate": 1.7882371632557818e-06, "loss": 0.0207, "step": 657175 }, { "epoch": 6.46, "grad_norm": 1.1249561309814453, "learning_rate": 1.7881130408015333e-06, "loss": 0.07, "step": 657200 }, { "epoch": 6.46, "grad_norm": 8.435446739196777, "learning_rate": 1.7879889183472849e-06, "loss": 0.0299, "step": 657225 }, { "epoch": 6.46, "grad_norm": 6.3719072341918945, "learning_rate": 1.7878647958930365e-06, "loss": 0.0698, "step": 657250 }, { "epoch": 6.46, "grad_norm": 0.8837615251541138, "learning_rate": 1.7877406734387877e-06, "loss": 0.0171, "step": 657275 }, { "epoch": 6.46, "grad_norm": 0.9941131472587585, "learning_rate": 1.7876165509845394e-06, "loss": 0.0718, "step": 657300 }, { "epoch": 6.46, "grad_norm": 2.2373125553131104, "learning_rate": 1.787492428530291e-06, "loss": 0.0211, "step": 657325 }, { "epoch": 6.46, "grad_norm": 7.685915946960449, "learning_rate": 1.7873683060760424e-06, "loss": 0.0593, "step": 657350 }, { "epoch": 6.46, "grad_norm": 6.20241117477417, "learning_rate": 1.787244183621794e-06, "loss": 0.0273, "step": 657375 }, { "epoch": 6.46, "grad_norm": 1.843080997467041, "learning_rate": 1.7871250260657155e-06, "loss": 0.086, "step": 657400 }, { "epoch": 6.46, "grad_norm": 2.8936069011688232, "learning_rate": 1.7870009036114671e-06, "loss": 0.017, "step": 657425 }, { "epoch": 6.46, "grad_norm": 2.2659292221069336, "learning_rate": 1.7868767811572186e-06, "loss": 0.0508, "step": 657450 }, { "epoch": 6.46, "grad_norm": 2.038072347640991, "learning_rate": 1.7867526587029702e-06, "loss": 0.0138, "step": 657475 }, { "epoch": 6.46, "grad_norm": 1.1100784540176392, "learning_rate": 1.7866285362487218e-06, "loss": 0.0982, "step": 657500 }, { "epoch": 6.46, "grad_norm": 1.0526124238967896, "learning_rate": 1.786504413794473e-06, "loss": 0.0217, "step": 657525 }, { "epoch": 6.47, "grad_norm": 1.0860074758529663, "learning_rate": 1.7863802913402247e-06, "loss": 0.0764, "step": 657550 }, { "epoch": 6.47, "grad_norm": 5.24142599105835, "learning_rate": 1.7862561688859763e-06, "loss": 0.025, "step": 657575 }, { "epoch": 6.47, "grad_norm": 1.728498101234436, "learning_rate": 1.7861320464317277e-06, "loss": 0.0689, "step": 657600 }, { "epoch": 6.47, "grad_norm": 13.287552833557129, "learning_rate": 1.7860079239774794e-06, "loss": 0.0346, "step": 657625 }, { "epoch": 6.47, "grad_norm": 0.11362628638744354, "learning_rate": 1.785883801523231e-06, "loss": 0.0976, "step": 657650 }, { "epoch": 6.47, "grad_norm": 11.224457740783691, "learning_rate": 1.7857596790689824e-06, "loss": 0.0322, "step": 657675 }, { "epoch": 6.47, "grad_norm": 2.27998423576355, "learning_rate": 1.785635556614734e-06, "loss": 0.0785, "step": 657700 }, { "epoch": 6.47, "grad_norm": 7.096724033355713, "learning_rate": 1.7855114341604853e-06, "loss": 0.0357, "step": 657725 }, { "epoch": 6.47, "grad_norm": 1.1953834295272827, "learning_rate": 1.785387311706237e-06, "loss": 0.0991, "step": 657750 }, { "epoch": 6.47, "grad_norm": 10.614176750183105, "learning_rate": 1.7852631892519886e-06, "loss": 0.0237, "step": 657775 }, { "epoch": 6.47, "grad_norm": 3.3817672729492188, "learning_rate": 1.78513906679774e-06, "loss": 0.0982, "step": 657800 }, { "epoch": 6.47, "grad_norm": 20.156618118286133, "learning_rate": 1.7850149443434916e-06, "loss": 0.0379, "step": 657825 }, { "epoch": 6.47, "grad_norm": 1.3359676599502563, "learning_rate": 1.7848908218892432e-06, "loss": 0.0918, "step": 657850 }, { "epoch": 6.47, "grad_norm": 5.109052658081055, "learning_rate": 1.7847666994349947e-06, "loss": 0.0091, "step": 657875 }, { "epoch": 6.47, "grad_norm": 0.3787485361099243, "learning_rate": 1.7846425769807463e-06, "loss": 0.0674, "step": 657900 }, { "epoch": 6.47, "grad_norm": 7.189507484436035, "learning_rate": 1.784518454526498e-06, "loss": 0.0365, "step": 657925 }, { "epoch": 6.47, "grad_norm": 3.7792720794677734, "learning_rate": 1.7843943320722494e-06, "loss": 0.0729, "step": 657950 }, { "epoch": 6.47, "grad_norm": 4.595759868621826, "learning_rate": 1.784270209618001e-06, "loss": 0.017, "step": 657975 }, { "epoch": 6.47, "grad_norm": 1.3139920234680176, "learning_rate": 1.7841460871637526e-06, "loss": 0.0878, "step": 658000 }, { "epoch": 6.47, "grad_norm": 7.9847517013549805, "learning_rate": 1.7840219647095038e-06, "loss": 0.0165, "step": 658025 }, { "epoch": 6.47, "grad_norm": 0.059806983917951584, "learning_rate": 1.7838978422552555e-06, "loss": 0.099, "step": 658050 }, { "epoch": 6.47, "grad_norm": 10.248119354248047, "learning_rate": 1.7837737198010071e-06, "loss": 0.0157, "step": 658075 }, { "epoch": 6.47, "grad_norm": 2.49552845954895, "learning_rate": 1.7836495973467585e-06, "loss": 0.064, "step": 658100 }, { "epoch": 6.47, "grad_norm": 2.177438974380493, "learning_rate": 1.7835254748925102e-06, "loss": 0.0243, "step": 658125 }, { "epoch": 6.47, "grad_norm": 1.8860701322555542, "learning_rate": 1.7834013524382616e-06, "loss": 0.0728, "step": 658150 }, { "epoch": 6.47, "grad_norm": 6.113802909851074, "learning_rate": 1.7832772299840132e-06, "loss": 0.0344, "step": 658175 }, { "epoch": 6.47, "grad_norm": 4.290478229522705, "learning_rate": 1.7831531075297649e-06, "loss": 0.0634, "step": 658200 }, { "epoch": 6.47, "grad_norm": 6.66369104385376, "learning_rate": 1.783028985075516e-06, "loss": 0.0182, "step": 658225 }, { "epoch": 6.47, "grad_norm": 3.59446382522583, "learning_rate": 1.7829048626212677e-06, "loss": 0.0819, "step": 658250 }, { "epoch": 6.47, "grad_norm": 1.680798053741455, "learning_rate": 1.7827807401670194e-06, "loss": 0.0153, "step": 658275 }, { "epoch": 6.47, "grad_norm": 11.296441078186035, "learning_rate": 1.7826566177127708e-06, "loss": 0.0891, "step": 658300 }, { "epoch": 6.47, "grad_norm": 3.3470277786254883, "learning_rate": 1.7825324952585224e-06, "loss": 0.0265, "step": 658325 }, { "epoch": 6.47, "grad_norm": 1.517032265663147, "learning_rate": 1.782408372804274e-06, "loss": 0.065, "step": 658350 }, { "epoch": 6.47, "grad_norm": 5.819012641906738, "learning_rate": 1.7822842503500255e-06, "loss": 0.0238, "step": 658375 }, { "epoch": 6.47, "grad_norm": 0.2529450058937073, "learning_rate": 1.782160127895777e-06, "loss": 0.0784, "step": 658400 }, { "epoch": 6.47, "grad_norm": 1.5006791353225708, "learning_rate": 1.7820360054415287e-06, "loss": 0.0437, "step": 658425 }, { "epoch": 6.47, "grad_norm": 5.659828186035156, "learning_rate": 1.78191188298728e-06, "loss": 0.1088, "step": 658450 }, { "epoch": 6.47, "grad_norm": 11.181060791015625, "learning_rate": 1.7817877605330316e-06, "loss": 0.0186, "step": 658475 }, { "epoch": 6.47, "grad_norm": 5.802740573883057, "learning_rate": 1.7816636380787832e-06, "loss": 0.0772, "step": 658500 }, { "epoch": 6.47, "grad_norm": 6.528761386871338, "learning_rate": 1.7815395156245346e-06, "loss": 0.0178, "step": 658525 }, { "epoch": 6.47, "grad_norm": 0.18647927045822144, "learning_rate": 1.7814153931702863e-06, "loss": 0.0574, "step": 658550 }, { "epoch": 6.48, "grad_norm": 13.732588768005371, "learning_rate": 1.7812912707160377e-06, "loss": 0.0259, "step": 658575 }, { "epoch": 6.48, "grad_norm": 10.345952033996582, "learning_rate": 1.7811671482617893e-06, "loss": 0.0975, "step": 658600 }, { "epoch": 6.48, "grad_norm": 6.442919731140137, "learning_rate": 1.781043025807541e-06, "loss": 0.0151, "step": 658625 }, { "epoch": 6.48, "grad_norm": 1.019486427307129, "learning_rate": 1.7809189033532922e-06, "loss": 0.1003, "step": 658650 }, { "epoch": 6.48, "grad_norm": 0.16688403487205505, "learning_rate": 1.7807947808990438e-06, "loss": 0.0322, "step": 658675 }, { "epoch": 6.48, "grad_norm": 1.8439112901687622, "learning_rate": 1.7806706584447955e-06, "loss": 0.0717, "step": 658700 }, { "epoch": 6.48, "grad_norm": 9.529854774475098, "learning_rate": 1.7805465359905469e-06, "loss": 0.0218, "step": 658725 }, { "epoch": 6.48, "grad_norm": 2.004347562789917, "learning_rate": 1.7804224135362985e-06, "loss": 0.0866, "step": 658750 }, { "epoch": 6.48, "grad_norm": 1.5979397296905518, "learning_rate": 1.7802982910820501e-06, "loss": 0.018, "step": 658775 }, { "epoch": 6.48, "grad_norm": 0.1467166692018509, "learning_rate": 1.7801741686278016e-06, "loss": 0.0551, "step": 658800 }, { "epoch": 6.48, "grad_norm": 1.3222858905792236, "learning_rate": 1.7800500461735532e-06, "loss": 0.0126, "step": 658825 }, { "epoch": 6.48, "grad_norm": 8.607177734375, "learning_rate": 1.7799259237193048e-06, "loss": 0.0707, "step": 658850 }, { "epoch": 6.48, "grad_norm": 9.999656677246094, "learning_rate": 1.779801801265056e-06, "loss": 0.0359, "step": 658875 }, { "epoch": 6.48, "grad_norm": 1.1922121047973633, "learning_rate": 1.7796776788108077e-06, "loss": 0.0932, "step": 658900 }, { "epoch": 6.48, "grad_norm": 2.84786319732666, "learning_rate": 1.7795535563565593e-06, "loss": 0.0264, "step": 658925 }, { "epoch": 6.48, "grad_norm": 1.9101325273513794, "learning_rate": 1.7794294339023107e-06, "loss": 0.1026, "step": 658950 }, { "epoch": 6.48, "grad_norm": 7.928747653961182, "learning_rate": 1.7793053114480624e-06, "loss": 0.027, "step": 658975 }, { "epoch": 6.48, "grad_norm": 0.8370623588562012, "learning_rate": 1.7791811889938138e-06, "loss": 0.0605, "step": 659000 }, { "epoch": 6.48, "grad_norm": 2.7526893615722656, "learning_rate": 1.7790570665395654e-06, "loss": 0.0156, "step": 659025 }, { "epoch": 6.48, "grad_norm": 7.603816032409668, "learning_rate": 1.778932944085317e-06, "loss": 0.0933, "step": 659050 }, { "epoch": 6.48, "grad_norm": 8.953385353088379, "learning_rate": 1.7788088216310683e-06, "loss": 0.016, "step": 659075 }, { "epoch": 6.48, "grad_norm": 0.04945269227027893, "learning_rate": 1.77868469917682e-06, "loss": 0.0683, "step": 659100 }, { "epoch": 6.48, "grad_norm": 5.6641764640808105, "learning_rate": 1.7785605767225716e-06, "loss": 0.0228, "step": 659125 }, { "epoch": 6.48, "grad_norm": 2.3151278495788574, "learning_rate": 1.778436454268323e-06, "loss": 0.0886, "step": 659150 }, { "epoch": 6.48, "grad_norm": 6.589200973510742, "learning_rate": 1.7783123318140746e-06, "loss": 0.022, "step": 659175 }, { "epoch": 6.48, "grad_norm": 4.4661054611206055, "learning_rate": 1.7781882093598262e-06, "loss": 0.0794, "step": 659200 }, { "epoch": 6.48, "grad_norm": 20.252105712890625, "learning_rate": 1.7780640869055777e-06, "loss": 0.0332, "step": 659225 }, { "epoch": 6.48, "grad_norm": 0.31695133447647095, "learning_rate": 1.7779399644513293e-06, "loss": 0.0787, "step": 659250 }, { "epoch": 6.48, "grad_norm": 9.725628852844238, "learning_rate": 1.777815841997081e-06, "loss": 0.024, "step": 659275 }, { "epoch": 6.48, "grad_norm": 0.17273083329200745, "learning_rate": 1.7776917195428324e-06, "loss": 0.0699, "step": 659300 }, { "epoch": 6.48, "grad_norm": 5.515263557434082, "learning_rate": 1.777567597088584e-06, "loss": 0.0223, "step": 659325 }, { "epoch": 6.48, "grad_norm": 1.919836401939392, "learning_rate": 1.7774434746343356e-06, "loss": 0.064, "step": 659350 }, { "epoch": 6.48, "grad_norm": 1.8263800144195557, "learning_rate": 1.7773193521800868e-06, "loss": 0.0231, "step": 659375 }, { "epoch": 6.48, "grad_norm": 9.409089088439941, "learning_rate": 1.7771952297258385e-06, "loss": 0.0657, "step": 659400 }, { "epoch": 6.48, "grad_norm": 0.822384774684906, "learning_rate": 1.77707110727159e-06, "loss": 0.0275, "step": 659425 }, { "epoch": 6.48, "grad_norm": 1.7907326221466064, "learning_rate": 1.7769469848173415e-06, "loss": 0.0821, "step": 659450 }, { "epoch": 6.48, "grad_norm": 4.663520812988281, "learning_rate": 1.7768228623630932e-06, "loss": 0.0337, "step": 659475 }, { "epoch": 6.48, "grad_norm": 5.064788818359375, "learning_rate": 1.7766987399088446e-06, "loss": 0.0495, "step": 659500 }, { "epoch": 6.48, "grad_norm": 12.485033988952637, "learning_rate": 1.7765746174545962e-06, "loss": 0.0167, "step": 659525 }, { "epoch": 6.48, "grad_norm": 1.0340629816055298, "learning_rate": 1.7764504950003479e-06, "loss": 0.0694, "step": 659550 }, { "epoch": 6.49, "grad_norm": 2.687472105026245, "learning_rate": 1.776326372546099e-06, "loss": 0.0218, "step": 659575 }, { "epoch": 6.49, "grad_norm": 3.1288251876831055, "learning_rate": 1.7762022500918507e-06, "loss": 0.085, "step": 659600 }, { "epoch": 6.49, "grad_norm": 2.6127781867980957, "learning_rate": 1.7760781276376023e-06, "loss": 0.016, "step": 659625 }, { "epoch": 6.49, "grad_norm": 2.325939655303955, "learning_rate": 1.7759540051833538e-06, "loss": 0.097, "step": 659650 }, { "epoch": 6.49, "grad_norm": 2.8077940940856934, "learning_rate": 1.7758298827291054e-06, "loss": 0.0174, "step": 659675 }, { "epoch": 6.49, "grad_norm": 2.927513599395752, "learning_rate": 1.775705760274857e-06, "loss": 0.0597, "step": 659700 }, { "epoch": 6.49, "grad_norm": 4.802402019500732, "learning_rate": 1.7755816378206085e-06, "loss": 0.0148, "step": 659725 }, { "epoch": 6.49, "grad_norm": 4.70927619934082, "learning_rate": 1.77545751536636e-06, "loss": 0.0821, "step": 659750 }, { "epoch": 6.49, "grad_norm": 3.3552329540252686, "learning_rate": 1.7753333929121117e-06, "loss": 0.0309, "step": 659775 }, { "epoch": 6.49, "grad_norm": 6.1241068840026855, "learning_rate": 1.775209270457863e-06, "loss": 0.0835, "step": 659800 }, { "epoch": 6.49, "grad_norm": 1.4241487979888916, "learning_rate": 1.7750851480036146e-06, "loss": 0.0152, "step": 659825 }, { "epoch": 6.49, "grad_norm": 0.9507260918617249, "learning_rate": 1.774961025549366e-06, "loss": 0.0628, "step": 659850 }, { "epoch": 6.49, "grad_norm": 5.304272174835205, "learning_rate": 1.7748369030951176e-06, "loss": 0.0119, "step": 659875 }, { "epoch": 6.49, "grad_norm": 0.14887362718582153, "learning_rate": 1.7747127806408693e-06, "loss": 0.1084, "step": 659900 }, { "epoch": 6.49, "grad_norm": 6.315070629119873, "learning_rate": 1.7745886581866207e-06, "loss": 0.0346, "step": 659925 }, { "epoch": 6.49, "grad_norm": 1.4893524646759033, "learning_rate": 1.7744645357323723e-06, "loss": 0.0751, "step": 659950 }, { "epoch": 6.49, "grad_norm": 0.7759232521057129, "learning_rate": 1.774340413278124e-06, "loss": 0.0306, "step": 659975 }, { "epoch": 6.49, "grad_norm": 4.884652137756348, "learning_rate": 1.7742212557220454e-06, "loss": 0.0654, "step": 660000 }, { "epoch": 6.49, "eval_loss": 0.8278372883796692, "eval_runtime": 6122.8386, "eval_samples_per_second": 1.546, "eval_steps_per_second": 0.193, "eval_wer": 0.11190690787355952, "step": 660000 }, { "epoch": 6.49, "grad_norm": 14.069220542907715, "learning_rate": 1.7740971332677966e-06, "loss": 0.0171, "step": 660025 }, { "epoch": 6.49, "grad_norm": 1.8587050437927246, "learning_rate": 1.7739730108135483e-06, "loss": 0.0655, "step": 660050 }, { "epoch": 6.49, "grad_norm": 8.994688034057617, "learning_rate": 1.7738488883593e-06, "loss": 0.0219, "step": 660075 }, { "epoch": 6.49, "grad_norm": 1.0650469064712524, "learning_rate": 1.7737247659050513e-06, "loss": 0.0791, "step": 660100 }, { "epoch": 6.49, "grad_norm": 2.3002212047576904, "learning_rate": 1.773600643450803e-06, "loss": 0.0151, "step": 660125 }, { "epoch": 6.49, "grad_norm": 1.327704906463623, "learning_rate": 1.7734765209965546e-06, "loss": 0.0855, "step": 660150 }, { "epoch": 6.49, "grad_norm": 6.1658759117126465, "learning_rate": 1.773352398542306e-06, "loss": 0.0197, "step": 660175 }, { "epoch": 6.49, "grad_norm": 2.1825854778289795, "learning_rate": 1.7732282760880576e-06, "loss": 0.0812, "step": 660200 }, { "epoch": 6.49, "grad_norm": 5.599759578704834, "learning_rate": 1.7731041536338093e-06, "loss": 0.0171, "step": 660225 }, { "epoch": 6.49, "grad_norm": 3.1910266876220703, "learning_rate": 1.7729800311795605e-06, "loss": 0.0782, "step": 660250 }, { "epoch": 6.49, "grad_norm": 1.1354378461837769, "learning_rate": 1.7728559087253121e-06, "loss": 0.0264, "step": 660275 }, { "epoch": 6.49, "grad_norm": 0.3898811936378479, "learning_rate": 1.7727317862710638e-06, "loss": 0.0575, "step": 660300 }, { "epoch": 6.49, "grad_norm": 4.238017559051514, "learning_rate": 1.7726076638168152e-06, "loss": 0.0266, "step": 660325 }, { "epoch": 6.49, "grad_norm": 8.78394889831543, "learning_rate": 1.7724835413625668e-06, "loss": 0.1138, "step": 660350 }, { "epoch": 6.49, "grad_norm": 1.3844767808914185, "learning_rate": 1.7723594189083182e-06, "loss": 0.0197, "step": 660375 }, { "epoch": 6.49, "grad_norm": 6.4986653327941895, "learning_rate": 1.7722352964540699e-06, "loss": 0.0808, "step": 660400 }, { "epoch": 6.49, "grad_norm": 7.205049991607666, "learning_rate": 1.7721111739998215e-06, "loss": 0.0212, "step": 660425 }, { "epoch": 6.49, "grad_norm": 3.0761051177978516, "learning_rate": 1.7719870515455727e-06, "loss": 0.085, "step": 660450 }, { "epoch": 6.49, "grad_norm": 6.866081237792969, "learning_rate": 1.7718629290913244e-06, "loss": 0.0155, "step": 660475 }, { "epoch": 6.49, "grad_norm": 3.898594617843628, "learning_rate": 1.771738806637076e-06, "loss": 0.0633, "step": 660500 }, { "epoch": 6.49, "grad_norm": 0.33455324172973633, "learning_rate": 1.7716146841828274e-06, "loss": 0.0302, "step": 660525 }, { "epoch": 6.49, "grad_norm": 9.328761100769043, "learning_rate": 1.771490561728579e-06, "loss": 0.0811, "step": 660550 }, { "epoch": 6.49, "grad_norm": 0.7724242806434631, "learning_rate": 1.7713664392743307e-06, "loss": 0.026, "step": 660575 }, { "epoch": 6.5, "grad_norm": 2.85597825050354, "learning_rate": 1.7712423168200821e-06, "loss": 0.0912, "step": 660600 }, { "epoch": 6.5, "grad_norm": 4.573678493499756, "learning_rate": 1.7711181943658337e-06, "loss": 0.0149, "step": 660625 }, { "epoch": 6.5, "grad_norm": 1.7424628734588623, "learning_rate": 1.7709940719115854e-06, "loss": 0.0709, "step": 660650 }, { "epoch": 6.5, "grad_norm": 2.2283074855804443, "learning_rate": 1.7708699494573368e-06, "loss": 0.0232, "step": 660675 }, { "epoch": 6.5, "grad_norm": 1.8053172826766968, "learning_rate": 1.7707458270030884e-06, "loss": 0.0774, "step": 660700 }, { "epoch": 6.5, "grad_norm": 10.563830375671387, "learning_rate": 1.7706217045488399e-06, "loss": 0.0198, "step": 660725 }, { "epoch": 6.5, "grad_norm": 0.1001075804233551, "learning_rate": 1.7704975820945913e-06, "loss": 0.0932, "step": 660750 }, { "epoch": 6.5, "grad_norm": 10.143744468688965, "learning_rate": 1.770373459640343e-06, "loss": 0.0277, "step": 660775 }, { "epoch": 6.5, "grad_norm": 5.792082786560059, "learning_rate": 1.7702493371860946e-06, "loss": 0.0814, "step": 660800 }, { "epoch": 6.5, "grad_norm": 7.499363899230957, "learning_rate": 1.770125214731846e-06, "loss": 0.0268, "step": 660825 }, { "epoch": 6.5, "grad_norm": 6.192385673522949, "learning_rate": 1.7700010922775976e-06, "loss": 0.0657, "step": 660850 }, { "epoch": 6.5, "grad_norm": 6.774868965148926, "learning_rate": 1.769876969823349e-06, "loss": 0.0357, "step": 660875 }, { "epoch": 6.5, "grad_norm": 1.1733782291412354, "learning_rate": 1.7697528473691007e-06, "loss": 0.0886, "step": 660900 }, { "epoch": 6.5, "grad_norm": 16.883026123046875, "learning_rate": 1.7696287249148523e-06, "loss": 0.0332, "step": 660925 }, { "epoch": 6.5, "grad_norm": 4.084510326385498, "learning_rate": 1.7695046024606035e-06, "loss": 0.0687, "step": 660950 }, { "epoch": 6.5, "grad_norm": 4.321298599243164, "learning_rate": 1.7693804800063552e-06, "loss": 0.0268, "step": 660975 }, { "epoch": 6.5, "grad_norm": 4.531108379364014, "learning_rate": 1.7692563575521068e-06, "loss": 0.0667, "step": 661000 }, { "epoch": 6.5, "grad_norm": 10.875694274902344, "learning_rate": 1.7691322350978582e-06, "loss": 0.0421, "step": 661025 }, { "epoch": 6.5, "grad_norm": 4.223578929901123, "learning_rate": 1.7690081126436098e-06, "loss": 0.0601, "step": 661050 }, { "epoch": 6.5, "grad_norm": 2.656244993209839, "learning_rate": 1.7688839901893615e-06, "loss": 0.0369, "step": 661075 }, { "epoch": 6.5, "grad_norm": 0.1015833392739296, "learning_rate": 1.768759867735113e-06, "loss": 0.0694, "step": 661100 }, { "epoch": 6.5, "grad_norm": 5.163730144500732, "learning_rate": 1.7686357452808645e-06, "loss": 0.0273, "step": 661125 }, { "epoch": 6.5, "grad_norm": 1.6323399543762207, "learning_rate": 1.7685116228266162e-06, "loss": 0.0816, "step": 661150 }, { "epoch": 6.5, "grad_norm": 6.007724761962891, "learning_rate": 1.7683875003723674e-06, "loss": 0.0203, "step": 661175 }, { "epoch": 6.5, "grad_norm": 2.3044791221618652, "learning_rate": 1.768263377918119e-06, "loss": 0.0699, "step": 661200 }, { "epoch": 6.5, "grad_norm": 4.712219715118408, "learning_rate": 1.7681392554638707e-06, "loss": 0.0294, "step": 661225 }, { "epoch": 6.5, "grad_norm": 0.5236170887947083, "learning_rate": 1.768015133009622e-06, "loss": 0.0634, "step": 661250 }, { "epoch": 6.5, "grad_norm": 18.14234733581543, "learning_rate": 1.7678910105553737e-06, "loss": 0.0199, "step": 661275 }, { "epoch": 6.5, "grad_norm": 0.07908939570188522, "learning_rate": 1.7677668881011251e-06, "loss": 0.0914, "step": 661300 }, { "epoch": 6.5, "grad_norm": 10.370061874389648, "learning_rate": 1.7676427656468768e-06, "loss": 0.0222, "step": 661325 }, { "epoch": 6.5, "grad_norm": 0.6734567284584045, "learning_rate": 1.7675186431926284e-06, "loss": 0.0876, "step": 661350 }, { "epoch": 6.5, "grad_norm": 1.6928629875183105, "learning_rate": 1.7673945207383796e-06, "loss": 0.0275, "step": 661375 }, { "epoch": 6.5, "grad_norm": 2.3404533863067627, "learning_rate": 1.7672703982841313e-06, "loss": 0.0491, "step": 661400 }, { "epoch": 6.5, "grad_norm": 4.112696170806885, "learning_rate": 1.7671462758298829e-06, "loss": 0.0186, "step": 661425 }, { "epoch": 6.5, "grad_norm": 0.8144617080688477, "learning_rate": 1.7670221533756343e-06, "loss": 0.0828, "step": 661450 }, { "epoch": 6.5, "grad_norm": 4.057206630706787, "learning_rate": 1.766898030921386e-06, "loss": 0.0326, "step": 661475 }, { "epoch": 6.5, "grad_norm": 3.7722527980804443, "learning_rate": 1.7667739084671376e-06, "loss": 0.0789, "step": 661500 }, { "epoch": 6.5, "grad_norm": 5.654666900634766, "learning_rate": 1.766649786012889e-06, "loss": 0.0337, "step": 661525 }, { "epoch": 6.5, "grad_norm": 2.8701558113098145, "learning_rate": 1.7665256635586406e-06, "loss": 0.0867, "step": 661550 }, { "epoch": 6.5, "grad_norm": 11.091866493225098, "learning_rate": 1.7664015411043923e-06, "loss": 0.0313, "step": 661575 }, { "epoch": 6.5, "grad_norm": 2.664722204208374, "learning_rate": 1.7662774186501435e-06, "loss": 0.0838, "step": 661600 }, { "epoch": 6.51, "grad_norm": 23.219144821166992, "learning_rate": 1.7661532961958951e-06, "loss": 0.0289, "step": 661625 }, { "epoch": 6.51, "grad_norm": 1.18478524684906, "learning_rate": 1.7660291737416468e-06, "loss": 0.071, "step": 661650 }, { "epoch": 6.51, "grad_norm": 6.749780178070068, "learning_rate": 1.7659050512873982e-06, "loss": 0.0139, "step": 661675 }, { "epoch": 6.51, "grad_norm": 0.37137654423713684, "learning_rate": 1.7657809288331498e-06, "loss": 0.0728, "step": 661700 }, { "epoch": 6.51, "grad_norm": 3.701543092727661, "learning_rate": 1.7656568063789012e-06, "loss": 0.0233, "step": 661725 }, { "epoch": 6.51, "grad_norm": 5.304332733154297, "learning_rate": 1.7655326839246529e-06, "loss": 0.082, "step": 661750 }, { "epoch": 6.51, "grad_norm": 7.601253032684326, "learning_rate": 1.7654085614704045e-06, "loss": 0.0336, "step": 661775 }, { "epoch": 6.51, "grad_norm": 1.658255696296692, "learning_rate": 1.7652844390161557e-06, "loss": 0.0552, "step": 661800 }, { "epoch": 6.51, "grad_norm": 13.8595552444458, "learning_rate": 1.7651603165619074e-06, "loss": 0.0151, "step": 661825 }, { "epoch": 6.51, "grad_norm": 0.5332741737365723, "learning_rate": 1.765036194107659e-06, "loss": 0.0804, "step": 661850 }, { "epoch": 6.51, "grad_norm": 5.491749286651611, "learning_rate": 1.7649120716534104e-06, "loss": 0.0232, "step": 661875 }, { "epoch": 6.51, "grad_norm": 7.168395519256592, "learning_rate": 1.764787949199162e-06, "loss": 0.0922, "step": 661900 }, { "epoch": 6.51, "grad_norm": 6.766337871551514, "learning_rate": 1.7646638267449137e-06, "loss": 0.0225, "step": 661925 }, { "epoch": 6.51, "grad_norm": 7.308917045593262, "learning_rate": 1.7645397042906651e-06, "loss": 0.1006, "step": 661950 }, { "epoch": 6.51, "grad_norm": 3.738150119781494, "learning_rate": 1.7644155818364167e-06, "loss": 0.0267, "step": 661975 }, { "epoch": 6.51, "grad_norm": 1.2387003898620605, "learning_rate": 1.7642914593821684e-06, "loss": 0.063, "step": 662000 }, { "epoch": 6.51, "grad_norm": 5.8768157958984375, "learning_rate": 1.7641673369279196e-06, "loss": 0.0273, "step": 662025 }, { "epoch": 6.51, "grad_norm": 3.175724983215332, "learning_rate": 1.7640432144736712e-06, "loss": 0.0547, "step": 662050 }, { "epoch": 6.51, "grad_norm": 0.3541868329048157, "learning_rate": 1.7639190920194229e-06, "loss": 0.0186, "step": 662075 }, { "epoch": 6.51, "grad_norm": 0.4498620629310608, "learning_rate": 1.7637949695651743e-06, "loss": 0.0942, "step": 662100 }, { "epoch": 6.51, "grad_norm": 10.350646018981934, "learning_rate": 1.763670847110926e-06, "loss": 0.0256, "step": 662125 }, { "epoch": 6.51, "grad_norm": 0.4285840392112732, "learning_rate": 1.7635516895548474e-06, "loss": 0.0776, "step": 662150 }, { "epoch": 6.51, "grad_norm": 14.696369171142578, "learning_rate": 1.763427567100599e-06, "loss": 0.0182, "step": 662175 }, { "epoch": 6.51, "grad_norm": 3.1497888565063477, "learning_rate": 1.7633034446463504e-06, "loss": 0.0854, "step": 662200 }, { "epoch": 6.51, "grad_norm": 3.613922595977783, "learning_rate": 1.763179322192102e-06, "loss": 0.0271, "step": 662225 }, { "epoch": 6.51, "grad_norm": 8.797826766967773, "learning_rate": 1.7630551997378535e-06, "loss": 0.0605, "step": 662250 }, { "epoch": 6.51, "grad_norm": 6.467240810394287, "learning_rate": 1.7629310772836051e-06, "loss": 0.0207, "step": 662275 }, { "epoch": 6.51, "grad_norm": 6.3923726081848145, "learning_rate": 1.7628069548293568e-06, "loss": 0.0695, "step": 662300 }, { "epoch": 6.51, "grad_norm": 4.310260772705078, "learning_rate": 1.762682832375108e-06, "loss": 0.0222, "step": 662325 }, { "epoch": 6.51, "grad_norm": 2.4263224601745605, "learning_rate": 1.7625587099208596e-06, "loss": 0.1016, "step": 662350 }, { "epoch": 6.51, "grad_norm": 12.420886039733887, "learning_rate": 1.7624345874666112e-06, "loss": 0.0192, "step": 662375 }, { "epoch": 6.51, "grad_norm": 3.1809957027435303, "learning_rate": 1.7623104650123627e-06, "loss": 0.0689, "step": 662400 }, { "epoch": 6.51, "grad_norm": 3.431650161743164, "learning_rate": 1.7621863425581143e-06, "loss": 0.0225, "step": 662425 }, { "epoch": 6.51, "grad_norm": 1.0527026653289795, "learning_rate": 1.762062220103866e-06, "loss": 0.0608, "step": 662450 }, { "epoch": 6.51, "grad_norm": 5.402132511138916, "learning_rate": 1.7619380976496174e-06, "loss": 0.0125, "step": 662475 }, { "epoch": 6.51, "grad_norm": 2.6717276573181152, "learning_rate": 1.761813975195369e-06, "loss": 0.0749, "step": 662500 }, { "epoch": 6.51, "grad_norm": 9.353358268737793, "learning_rate": 1.7616898527411206e-06, "loss": 0.0252, "step": 662525 }, { "epoch": 6.51, "grad_norm": 6.854691028594971, "learning_rate": 1.7615657302868718e-06, "loss": 0.0857, "step": 662550 }, { "epoch": 6.51, "grad_norm": 1.5903393030166626, "learning_rate": 1.7614416078326235e-06, "loss": 0.0376, "step": 662575 }, { "epoch": 6.51, "grad_norm": 1.8679741621017456, "learning_rate": 1.761317485378375e-06, "loss": 0.0842, "step": 662600 }, { "epoch": 6.52, "grad_norm": 9.68603229522705, "learning_rate": 1.7611933629241265e-06, "loss": 0.0229, "step": 662625 }, { "epoch": 6.52, "grad_norm": 0.979909360408783, "learning_rate": 1.7610692404698782e-06, "loss": 0.0548, "step": 662650 }, { "epoch": 6.52, "grad_norm": 5.0875444412231445, "learning_rate": 1.7609451180156296e-06, "loss": 0.0141, "step": 662675 }, { "epoch": 6.52, "grad_norm": 0.07873452454805374, "learning_rate": 1.7608209955613812e-06, "loss": 0.0632, "step": 662700 }, { "epoch": 6.52, "grad_norm": 10.367020606994629, "learning_rate": 1.7606968731071329e-06, "loss": 0.0249, "step": 662725 }, { "epoch": 6.52, "grad_norm": 8.745749473571777, "learning_rate": 1.760572750652884e-06, "loss": 0.115, "step": 662750 }, { "epoch": 6.52, "grad_norm": 1.79843008518219, "learning_rate": 1.7604486281986357e-06, "loss": 0.0152, "step": 662775 }, { "epoch": 6.52, "grad_norm": 5.35725212097168, "learning_rate": 1.7603245057443873e-06, "loss": 0.0929, "step": 662800 }, { "epoch": 6.52, "grad_norm": 4.322059631347656, "learning_rate": 1.7602003832901388e-06, "loss": 0.0241, "step": 662825 }, { "epoch": 6.52, "grad_norm": 4.093050003051758, "learning_rate": 1.7600762608358904e-06, "loss": 0.0753, "step": 662850 }, { "epoch": 6.52, "grad_norm": 10.117828369140625, "learning_rate": 1.759952138381642e-06, "loss": 0.0234, "step": 662875 }, { "epoch": 6.52, "grad_norm": 0.5662024021148682, "learning_rate": 1.7598280159273935e-06, "loss": 0.0656, "step": 662900 }, { "epoch": 6.52, "grad_norm": 6.170635223388672, "learning_rate": 1.759703893473145e-06, "loss": 0.0333, "step": 662925 }, { "epoch": 6.52, "grad_norm": 3.337740659713745, "learning_rate": 1.7595797710188967e-06, "loss": 0.0742, "step": 662950 }, { "epoch": 6.52, "grad_norm": 8.17275333404541, "learning_rate": 1.759455648564648e-06, "loss": 0.0243, "step": 662975 }, { "epoch": 6.52, "grad_norm": 2.9943597316741943, "learning_rate": 1.7593315261103996e-06, "loss": 0.0807, "step": 663000 }, { "epoch": 6.52, "grad_norm": 10.18732738494873, "learning_rate": 1.7592074036561512e-06, "loss": 0.0257, "step": 663025 }, { "epoch": 6.52, "grad_norm": 1.0092101097106934, "learning_rate": 1.7590832812019026e-06, "loss": 0.054, "step": 663050 }, { "epoch": 6.52, "grad_norm": 1.3941130638122559, "learning_rate": 1.7589591587476543e-06, "loss": 0.0233, "step": 663075 }, { "epoch": 6.52, "grad_norm": 2.790069580078125, "learning_rate": 1.7588350362934057e-06, "loss": 0.0779, "step": 663100 }, { "epoch": 6.52, "grad_norm": 11.449323654174805, "learning_rate": 1.7587109138391573e-06, "loss": 0.0245, "step": 663125 }, { "epoch": 6.52, "grad_norm": 5.724154472351074, "learning_rate": 1.758586791384909e-06, "loss": 0.0709, "step": 663150 }, { "epoch": 6.52, "grad_norm": 6.638367652893066, "learning_rate": 1.7584626689306602e-06, "loss": 0.0234, "step": 663175 }, { "epoch": 6.52, "grad_norm": 1.3696082830429077, "learning_rate": 1.7583385464764118e-06, "loss": 0.0652, "step": 663200 }, { "epoch": 6.52, "grad_norm": 7.116377353668213, "learning_rate": 1.7582144240221634e-06, "loss": 0.0246, "step": 663225 }, { "epoch": 6.52, "grad_norm": 1.555016279220581, "learning_rate": 1.7580903015679149e-06, "loss": 0.0861, "step": 663250 }, { "epoch": 6.52, "grad_norm": 5.515686511993408, "learning_rate": 1.7579661791136665e-06, "loss": 0.0274, "step": 663275 }, { "epoch": 6.52, "grad_norm": 0.14226627349853516, "learning_rate": 1.7578420566594181e-06, "loss": 0.0783, "step": 663300 }, { "epoch": 6.52, "grad_norm": 5.489019393920898, "learning_rate": 1.7577179342051696e-06, "loss": 0.0246, "step": 663325 }, { "epoch": 6.52, "grad_norm": 0.1837407499551773, "learning_rate": 1.7575938117509212e-06, "loss": 0.0843, "step": 663350 }, { "epoch": 6.52, "grad_norm": 3.2993392944335938, "learning_rate": 1.7574696892966728e-06, "loss": 0.0186, "step": 663375 }, { "epoch": 6.52, "grad_norm": 1.1137666702270508, "learning_rate": 1.757345566842424e-06, "loss": 0.0679, "step": 663400 }, { "epoch": 6.52, "grad_norm": 6.719437599182129, "learning_rate": 1.7572214443881757e-06, "loss": 0.0247, "step": 663425 }, { "epoch": 6.52, "grad_norm": 2.4423184394836426, "learning_rate": 1.7570973219339273e-06, "loss": 0.0683, "step": 663450 }, { "epoch": 6.52, "grad_norm": 2.382797956466675, "learning_rate": 1.7569731994796787e-06, "loss": 0.0151, "step": 663475 }, { "epoch": 6.52, "grad_norm": 0.8674654364585876, "learning_rate": 1.7568490770254304e-06, "loss": 0.0768, "step": 663500 }, { "epoch": 6.52, "grad_norm": 8.82262134552002, "learning_rate": 1.7567249545711818e-06, "loss": 0.0286, "step": 663525 }, { "epoch": 6.52, "grad_norm": 4.092211723327637, "learning_rate": 1.7566008321169334e-06, "loss": 0.061, "step": 663550 }, { "epoch": 6.52, "grad_norm": 17.117820739746094, "learning_rate": 1.756476709662685e-06, "loss": 0.0363, "step": 663575 }, { "epoch": 6.52, "grad_norm": 4.542871475219727, "learning_rate": 1.7563525872084365e-06, "loss": 0.085, "step": 663600 }, { "epoch": 6.52, "grad_norm": 9.76734447479248, "learning_rate": 1.7562284647541881e-06, "loss": 0.019, "step": 663625 }, { "epoch": 6.53, "grad_norm": 1.27806556224823, "learning_rate": 1.7561043422999397e-06, "loss": 0.0787, "step": 663650 }, { "epoch": 6.53, "grad_norm": 0.47523200511932373, "learning_rate": 1.755980219845691e-06, "loss": 0.0302, "step": 663675 }, { "epoch": 6.53, "grad_norm": 0.5882977247238159, "learning_rate": 1.7558560973914426e-06, "loss": 0.0741, "step": 663700 }, { "epoch": 6.53, "grad_norm": 7.0020751953125, "learning_rate": 1.7557319749371942e-06, "loss": 0.031, "step": 663725 }, { "epoch": 6.53, "grad_norm": 3.84106183052063, "learning_rate": 1.7556078524829457e-06, "loss": 0.0764, "step": 663750 }, { "epoch": 6.53, "grad_norm": 6.672238349914551, "learning_rate": 1.7554837300286973e-06, "loss": 0.0291, "step": 663775 }, { "epoch": 6.53, "grad_norm": 3.887500047683716, "learning_rate": 1.755359607574449e-06, "loss": 0.0575, "step": 663800 }, { "epoch": 6.53, "grad_norm": 7.220458030700684, "learning_rate": 1.7552354851202003e-06, "loss": 0.0332, "step": 663825 }, { "epoch": 6.53, "grad_norm": 1.080947995185852, "learning_rate": 1.755111362665952e-06, "loss": 0.0971, "step": 663850 }, { "epoch": 6.53, "grad_norm": 3.8169829845428467, "learning_rate": 1.7549872402117036e-06, "loss": 0.022, "step": 663875 }, { "epoch": 6.53, "grad_norm": 4.4771409034729, "learning_rate": 1.7548631177574548e-06, "loss": 0.0487, "step": 663900 }, { "epoch": 6.53, "grad_norm": 7.692729473114014, "learning_rate": 1.7547389953032065e-06, "loss": 0.0235, "step": 663925 }, { "epoch": 6.53, "grad_norm": 10.652542114257812, "learning_rate": 1.7546148728489579e-06, "loss": 0.0857, "step": 663950 }, { "epoch": 6.53, "grad_norm": 9.235116004943848, "learning_rate": 1.7544907503947095e-06, "loss": 0.0166, "step": 663975 }, { "epoch": 6.53, "grad_norm": 1.7402808666229248, "learning_rate": 1.7543666279404612e-06, "loss": 0.0681, "step": 664000 }, { "epoch": 6.53, "grad_norm": 9.350111961364746, "learning_rate": 1.7542425054862126e-06, "loss": 0.0173, "step": 664025 }, { "epoch": 6.53, "grad_norm": 1.01729154586792, "learning_rate": 1.7541183830319642e-06, "loss": 0.0762, "step": 664050 }, { "epoch": 6.53, "grad_norm": 3.7205049991607666, "learning_rate": 1.7539942605777159e-06, "loss": 0.0249, "step": 664075 }, { "epoch": 6.53, "grad_norm": 4.155632495880127, "learning_rate": 1.753870138123467e-06, "loss": 0.056, "step": 664100 }, { "epoch": 6.53, "grad_norm": 2.893033742904663, "learning_rate": 1.7537460156692187e-06, "loss": 0.0132, "step": 664125 }, { "epoch": 6.53, "grad_norm": 1.647017002105713, "learning_rate": 1.7536218932149703e-06, "loss": 0.083, "step": 664150 }, { "epoch": 6.53, "grad_norm": 2.8957502841949463, "learning_rate": 1.7534977707607218e-06, "loss": 0.0198, "step": 664175 }, { "epoch": 6.53, "grad_norm": 5.630488872528076, "learning_rate": 1.7533786132046432e-06, "loss": 0.0755, "step": 664200 }, { "epoch": 6.53, "grad_norm": 5.010453701019287, "learning_rate": 1.7532544907503948e-06, "loss": 0.0243, "step": 664225 }, { "epoch": 6.53, "grad_norm": 0.183375746011734, "learning_rate": 1.7531303682961465e-06, "loss": 0.0736, "step": 664250 }, { "epoch": 6.53, "grad_norm": 5.185333728790283, "learning_rate": 1.753006245841898e-06, "loss": 0.0264, "step": 664275 }, { "epoch": 6.53, "grad_norm": 0.33430689573287964, "learning_rate": 1.7528821233876495e-06, "loss": 0.0717, "step": 664300 }, { "epoch": 6.53, "grad_norm": 6.4827165603637695, "learning_rate": 1.7527580009334012e-06, "loss": 0.0184, "step": 664325 }, { "epoch": 6.53, "grad_norm": 2.7644424438476562, "learning_rate": 1.7526338784791524e-06, "loss": 0.0806, "step": 664350 }, { "epoch": 6.53, "grad_norm": 1.2173686027526855, "learning_rate": 1.752509756024904e-06, "loss": 0.0157, "step": 664375 }, { "epoch": 6.53, "grad_norm": 1.3838307857513428, "learning_rate": 1.7523856335706557e-06, "loss": 0.0579, "step": 664400 }, { "epoch": 6.53, "grad_norm": 4.436415672302246, "learning_rate": 1.752261511116407e-06, "loss": 0.0246, "step": 664425 }, { "epoch": 6.53, "grad_norm": 2.2905046939849854, "learning_rate": 1.7521373886621587e-06, "loss": 0.0704, "step": 664450 }, { "epoch": 6.53, "grad_norm": 8.88100814819336, "learning_rate": 1.7520132662079103e-06, "loss": 0.0319, "step": 664475 }, { "epoch": 6.53, "grad_norm": 4.179770469665527, "learning_rate": 1.7518891437536618e-06, "loss": 0.1008, "step": 664500 }, { "epoch": 6.53, "grad_norm": 8.13878059387207, "learning_rate": 1.7517650212994134e-06, "loss": 0.0215, "step": 664525 }, { "epoch": 6.53, "grad_norm": 4.150840759277344, "learning_rate": 1.7516408988451646e-06, "loss": 0.0873, "step": 664550 }, { "epoch": 6.53, "grad_norm": 4.681792259216309, "learning_rate": 1.7515167763909162e-06, "loss": 0.0238, "step": 664575 }, { "epoch": 6.53, "grad_norm": 3.43367338180542, "learning_rate": 1.7513926539366679e-06, "loss": 0.0698, "step": 664600 }, { "epoch": 6.53, "grad_norm": 4.37947940826416, "learning_rate": 1.7512685314824193e-06, "loss": 0.0164, "step": 664625 }, { "epoch": 6.53, "grad_norm": 1.8755123615264893, "learning_rate": 1.751144409028171e-06, "loss": 0.0734, "step": 664650 }, { "epoch": 6.54, "grad_norm": 8.143732070922852, "learning_rate": 1.7510202865739226e-06, "loss": 0.0229, "step": 664675 }, { "epoch": 6.54, "grad_norm": 0.03988053277134895, "learning_rate": 1.750896164119674e-06, "loss": 0.0712, "step": 664700 }, { "epoch": 6.54, "grad_norm": 3.8928933143615723, "learning_rate": 1.7507720416654256e-06, "loss": 0.02, "step": 664725 }, { "epoch": 6.54, "grad_norm": 0.3615834414958954, "learning_rate": 1.7506479192111773e-06, "loss": 0.08, "step": 664750 }, { "epoch": 6.54, "grad_norm": 4.596623420715332, "learning_rate": 1.7505237967569285e-06, "loss": 0.017, "step": 664775 }, { "epoch": 6.54, "grad_norm": 4.1355509757995605, "learning_rate": 1.7503996743026801e-06, "loss": 0.083, "step": 664800 }, { "epoch": 6.54, "grad_norm": 6.480256080627441, "learning_rate": 1.7502755518484318e-06, "loss": 0.0271, "step": 664825 }, { "epoch": 6.54, "grad_norm": 0.5807973742485046, "learning_rate": 1.7501514293941832e-06, "loss": 0.0736, "step": 664850 }, { "epoch": 6.54, "grad_norm": 11.443944931030273, "learning_rate": 1.7500273069399348e-06, "loss": 0.0218, "step": 664875 }, { "epoch": 6.54, "grad_norm": 7.045327663421631, "learning_rate": 1.7499031844856864e-06, "loss": 0.062, "step": 664900 }, { "epoch": 6.54, "grad_norm": 0.19261009991168976, "learning_rate": 1.7497790620314379e-06, "loss": 0.0245, "step": 664925 }, { "epoch": 6.54, "grad_norm": 1.2513388395309448, "learning_rate": 1.7496549395771895e-06, "loss": 0.0791, "step": 664950 }, { "epoch": 6.54, "grad_norm": 6.326504230499268, "learning_rate": 1.7495308171229407e-06, "loss": 0.0201, "step": 664975 }, { "epoch": 6.54, "grad_norm": 0.3515150547027588, "learning_rate": 1.7494066946686923e-06, "loss": 0.0648, "step": 665000 }, { "epoch": 6.54, "grad_norm": 6.409961223602295, "learning_rate": 1.749282572214444e-06, "loss": 0.0236, "step": 665025 }, { "epoch": 6.54, "grad_norm": 1.3346930742263794, "learning_rate": 1.7491584497601954e-06, "loss": 0.0827, "step": 665050 }, { "epoch": 6.54, "grad_norm": 7.056695938110352, "learning_rate": 1.749034327305947e-06, "loss": 0.0286, "step": 665075 }, { "epoch": 6.54, "grad_norm": 3.7321865558624268, "learning_rate": 1.7489102048516987e-06, "loss": 0.0602, "step": 665100 }, { "epoch": 6.54, "grad_norm": 2.701211929321289, "learning_rate": 1.74878608239745e-06, "loss": 0.0262, "step": 665125 }, { "epoch": 6.54, "grad_norm": 0.5079078674316406, "learning_rate": 1.7486619599432017e-06, "loss": 0.0906, "step": 665150 }, { "epoch": 6.54, "grad_norm": 6.515922546386719, "learning_rate": 1.7485378374889534e-06, "loss": 0.0392, "step": 665175 }, { "epoch": 6.54, "grad_norm": 0.009838640689849854, "learning_rate": 1.7484137150347048e-06, "loss": 0.0638, "step": 665200 }, { "epoch": 6.54, "grad_norm": 16.667783737182617, "learning_rate": 1.7482895925804564e-06, "loss": 0.0199, "step": 665225 }, { "epoch": 6.54, "grad_norm": 2.1233878135681152, "learning_rate": 1.748165470126208e-06, "loss": 0.0738, "step": 665250 }, { "epoch": 6.54, "grad_norm": 4.553637981414795, "learning_rate": 1.7480413476719593e-06, "loss": 0.0225, "step": 665275 }, { "epoch": 6.54, "grad_norm": 1.2456878423690796, "learning_rate": 1.747917225217711e-06, "loss": 0.1006, "step": 665300 }, { "epoch": 6.54, "grad_norm": 8.312359809875488, "learning_rate": 1.7477931027634625e-06, "loss": 0.0396, "step": 665325 }, { "epoch": 6.54, "grad_norm": 0.29206711053848267, "learning_rate": 1.747668980309214e-06, "loss": 0.076, "step": 665350 }, { "epoch": 6.54, "grad_norm": 3.026651620864868, "learning_rate": 1.7475448578549656e-06, "loss": 0.0335, "step": 665375 }, { "epoch": 6.54, "grad_norm": 2.335190773010254, "learning_rate": 1.747420735400717e-06, "loss": 0.0783, "step": 665400 }, { "epoch": 6.54, "grad_norm": 8.318977355957031, "learning_rate": 1.7472966129464687e-06, "loss": 0.031, "step": 665425 }, { "epoch": 6.54, "grad_norm": 0.6979125142097473, "learning_rate": 1.7471724904922203e-06, "loss": 0.0672, "step": 665450 }, { "epoch": 6.54, "grad_norm": 9.973928451538086, "learning_rate": 1.7470483680379715e-06, "loss": 0.0288, "step": 665475 }, { "epoch": 6.54, "grad_norm": 5.12700891494751, "learning_rate": 1.7469242455837231e-06, "loss": 0.0754, "step": 665500 }, { "epoch": 6.54, "grad_norm": 8.71298599243164, "learning_rate": 1.7468001231294748e-06, "loss": 0.0173, "step": 665525 }, { "epoch": 6.54, "grad_norm": 3.246520519256592, "learning_rate": 1.7466760006752262e-06, "loss": 0.1015, "step": 665550 }, { "epoch": 6.54, "grad_norm": 7.4615397453308105, "learning_rate": 1.7465518782209778e-06, "loss": 0.0398, "step": 665575 }, { "epoch": 6.54, "grad_norm": 2.8158929347991943, "learning_rate": 1.7464277557667295e-06, "loss": 0.0726, "step": 665600 }, { "epoch": 6.54, "grad_norm": 7.7516937255859375, "learning_rate": 1.7463036333124809e-06, "loss": 0.0143, "step": 665625 }, { "epoch": 6.54, "grad_norm": 2.6749637126922607, "learning_rate": 1.7461795108582325e-06, "loss": 0.0753, "step": 665650 }, { "epoch": 6.55, "grad_norm": 13.093772888183594, "learning_rate": 1.7460553884039842e-06, "loss": 0.0301, "step": 665675 }, { "epoch": 6.55, "grad_norm": 2.057237386703491, "learning_rate": 1.7459312659497354e-06, "loss": 0.0547, "step": 665700 }, { "epoch": 6.55, "grad_norm": 11.31868839263916, "learning_rate": 1.745807143495487e-06, "loss": 0.0253, "step": 665725 }, { "epoch": 6.55, "grad_norm": 7.878498077392578, "learning_rate": 1.7456830210412386e-06, "loss": 0.0756, "step": 665750 }, { "epoch": 6.55, "grad_norm": 9.573188781738281, "learning_rate": 1.74555889858699e-06, "loss": 0.0207, "step": 665775 }, { "epoch": 6.55, "grad_norm": 1.5877230167388916, "learning_rate": 1.7454347761327417e-06, "loss": 0.0723, "step": 665800 }, { "epoch": 6.55, "grad_norm": 3.7344820499420166, "learning_rate": 1.7453106536784931e-06, "loss": 0.0236, "step": 665825 }, { "epoch": 6.55, "grad_norm": 1.1761753559112549, "learning_rate": 1.7451865312242448e-06, "loss": 0.0891, "step": 665850 }, { "epoch": 6.55, "grad_norm": 6.6922760009765625, "learning_rate": 1.7450624087699964e-06, "loss": 0.0236, "step": 665875 }, { "epoch": 6.55, "grad_norm": 6.343731880187988, "learning_rate": 1.7449382863157476e-06, "loss": 0.0997, "step": 665900 }, { "epoch": 6.55, "grad_norm": 1.268100380897522, "learning_rate": 1.7448141638614992e-06, "loss": 0.0259, "step": 665925 }, { "epoch": 6.55, "grad_norm": 3.4977262020111084, "learning_rate": 1.7446900414072509e-06, "loss": 0.0809, "step": 665950 }, { "epoch": 6.55, "grad_norm": 0.6114645600318909, "learning_rate": 1.7445659189530023e-06, "loss": 0.0353, "step": 665975 }, { "epoch": 6.55, "grad_norm": 0.7044050693511963, "learning_rate": 1.744441796498754e-06, "loss": 0.0493, "step": 666000 }, { "epoch": 6.55, "grad_norm": 5.064993858337402, "learning_rate": 1.7443176740445056e-06, "loss": 0.0258, "step": 666025 }, { "epoch": 6.55, "grad_norm": 3.598566770553589, "learning_rate": 1.744193551590257e-06, "loss": 0.0869, "step": 666050 }, { "epoch": 6.55, "grad_norm": 1.160305380821228, "learning_rate": 1.7440694291360086e-06, "loss": 0.0133, "step": 666075 }, { "epoch": 6.55, "grad_norm": 5.52017068862915, "learning_rate": 1.7439453066817603e-06, "loss": 0.0752, "step": 666100 }, { "epoch": 6.55, "grad_norm": 2.790360450744629, "learning_rate": 1.7438211842275115e-06, "loss": 0.0335, "step": 666125 }, { "epoch": 6.55, "grad_norm": 3.251495838165283, "learning_rate": 1.7436970617732631e-06, "loss": 0.0933, "step": 666150 }, { "epoch": 6.55, "grad_norm": 10.652704238891602, "learning_rate": 1.7435729393190147e-06, "loss": 0.0191, "step": 666175 }, { "epoch": 6.55, "grad_norm": 0.12344814091920853, "learning_rate": 1.7434488168647662e-06, "loss": 0.0809, "step": 666200 }, { "epoch": 6.55, "grad_norm": 8.859701156616211, "learning_rate": 1.7433246944105178e-06, "loss": 0.0228, "step": 666225 }, { "epoch": 6.55, "grad_norm": 2.131537914276123, "learning_rate": 1.7432005719562692e-06, "loss": 0.0841, "step": 666250 }, { "epoch": 6.55, "grad_norm": 11.922379493713379, "learning_rate": 1.7430764495020209e-06, "loss": 0.0114, "step": 666275 }, { "epoch": 6.55, "grad_norm": 0.6408648490905762, "learning_rate": 1.7429523270477725e-06, "loss": 0.0651, "step": 666300 }, { "epoch": 6.55, "grad_norm": 10.453777313232422, "learning_rate": 1.7428282045935237e-06, "loss": 0.013, "step": 666325 }, { "epoch": 6.55, "grad_norm": 0.3251776695251465, "learning_rate": 1.7427040821392753e-06, "loss": 0.0452, "step": 666350 }, { "epoch": 6.55, "grad_norm": 7.309683322906494, "learning_rate": 1.742579959685027e-06, "loss": 0.022, "step": 666375 }, { "epoch": 6.55, "grad_norm": 0.3781055510044098, "learning_rate": 1.7424558372307784e-06, "loss": 0.069, "step": 666400 }, { "epoch": 6.55, "grad_norm": 17.54348373413086, "learning_rate": 1.74233171477653e-06, "loss": 0.0391, "step": 666425 }, { "epoch": 6.55, "grad_norm": 2.7662570476531982, "learning_rate": 1.7422075923222817e-06, "loss": 0.0995, "step": 666450 }, { "epoch": 6.55, "grad_norm": 1.2672172784805298, "learning_rate": 1.742083469868033e-06, "loss": 0.0329, "step": 666475 }, { "epoch": 6.55, "grad_norm": 7.031009197235107, "learning_rate": 1.7419593474137847e-06, "loss": 0.1096, "step": 666500 }, { "epoch": 6.55, "grad_norm": 9.183854103088379, "learning_rate": 1.7418352249595364e-06, "loss": 0.0177, "step": 666525 }, { "epoch": 6.55, "grad_norm": 3.5571436882019043, "learning_rate": 1.7417111025052878e-06, "loss": 0.0631, "step": 666550 }, { "epoch": 6.55, "grad_norm": 1.6121104955673218, "learning_rate": 1.7415869800510394e-06, "loss": 0.0271, "step": 666575 }, { "epoch": 6.55, "grad_norm": 1.154729962348938, "learning_rate": 1.741462857596791e-06, "loss": 0.0771, "step": 666600 }, { "epoch": 6.55, "grad_norm": 2.4718472957611084, "learning_rate": 1.7413387351425423e-06, "loss": 0.0128, "step": 666625 }, { "epoch": 6.55, "grad_norm": 1.243308424949646, "learning_rate": 1.741214612688294e-06, "loss": 0.0691, "step": 666650 }, { "epoch": 6.55, "grad_norm": 3.4340178966522217, "learning_rate": 1.7410904902340453e-06, "loss": 0.0283, "step": 666675 }, { "epoch": 6.56, "grad_norm": 1.5476490259170532, "learning_rate": 1.740966367779797e-06, "loss": 0.1005, "step": 666700 }, { "epoch": 6.56, "grad_norm": 13.46635913848877, "learning_rate": 1.7408422453255486e-06, "loss": 0.0268, "step": 666725 }, { "epoch": 6.56, "grad_norm": 2.388136625289917, "learning_rate": 1.7407181228713e-06, "loss": 0.0873, "step": 666750 }, { "epoch": 6.56, "grad_norm": 6.411008358001709, "learning_rate": 1.7405940004170517e-06, "loss": 0.0202, "step": 666775 }, { "epoch": 6.56, "grad_norm": 4.4104180335998535, "learning_rate": 1.7404698779628033e-06, "loss": 0.0737, "step": 666800 }, { "epoch": 6.56, "grad_norm": 6.127656936645508, "learning_rate": 1.7403457555085545e-06, "loss": 0.0236, "step": 666825 }, { "epoch": 6.56, "grad_norm": 6.623282432556152, "learning_rate": 1.7402216330543061e-06, "loss": 0.0751, "step": 666850 }, { "epoch": 6.56, "grad_norm": 5.468320369720459, "learning_rate": 1.7400975106000578e-06, "loss": 0.0275, "step": 666875 }, { "epoch": 6.56, "grad_norm": 5.308740615844727, "learning_rate": 1.7399733881458092e-06, "loss": 0.0884, "step": 666900 }, { "epoch": 6.56, "grad_norm": 5.6278815269470215, "learning_rate": 1.7398492656915608e-06, "loss": 0.0267, "step": 666925 }, { "epoch": 6.56, "grad_norm": 2.487727165222168, "learning_rate": 1.7397251432373125e-06, "loss": 0.0887, "step": 666950 }, { "epoch": 6.56, "grad_norm": 15.053091049194336, "learning_rate": 1.7396010207830639e-06, "loss": 0.0364, "step": 666975 }, { "epoch": 6.56, "grad_norm": 1.3856451511383057, "learning_rate": 1.7394768983288155e-06, "loss": 0.0795, "step": 667000 }, { "epoch": 6.56, "grad_norm": 7.561598777770996, "learning_rate": 1.7393527758745672e-06, "loss": 0.0293, "step": 667025 }, { "epoch": 6.56, "grad_norm": 2.0344200134277344, "learning_rate": 1.7392286534203184e-06, "loss": 0.0848, "step": 667050 }, { "epoch": 6.56, "grad_norm": 7.828742504119873, "learning_rate": 1.73910453096607e-06, "loss": 0.0303, "step": 667075 }, { "epoch": 6.56, "grad_norm": 2.581970691680908, "learning_rate": 1.7389804085118214e-06, "loss": 0.0712, "step": 667100 }, { "epoch": 6.56, "grad_norm": 5.490832328796387, "learning_rate": 1.738856286057573e-06, "loss": 0.0264, "step": 667125 }, { "epoch": 6.56, "grad_norm": 3.9502451419830322, "learning_rate": 1.7387321636033247e-06, "loss": 0.073, "step": 667150 }, { "epoch": 6.56, "grad_norm": 2.962033987045288, "learning_rate": 1.7386080411490761e-06, "loss": 0.0205, "step": 667175 }, { "epoch": 6.56, "grad_norm": 6.074390411376953, "learning_rate": 1.7384839186948278e-06, "loss": 0.0824, "step": 667200 }, { "epoch": 6.56, "grad_norm": 5.288045883178711, "learning_rate": 1.7383597962405794e-06, "loss": 0.0209, "step": 667225 }, { "epoch": 6.56, "grad_norm": 1.4198747873306274, "learning_rate": 1.7382356737863306e-06, "loss": 0.082, "step": 667250 }, { "epoch": 6.56, "grad_norm": 10.334247589111328, "learning_rate": 1.7381115513320822e-06, "loss": 0.0418, "step": 667275 }, { "epoch": 6.56, "grad_norm": 0.2627241909503937, "learning_rate": 1.7379874288778339e-06, "loss": 0.0688, "step": 667300 }, { "epoch": 6.56, "grad_norm": 2.4651939868927, "learning_rate": 1.7378633064235853e-06, "loss": 0.0216, "step": 667325 }, { "epoch": 6.56, "grad_norm": 4.394037246704102, "learning_rate": 1.737739183969337e-06, "loss": 0.0743, "step": 667350 }, { "epoch": 6.56, "grad_norm": 4.886799335479736, "learning_rate": 1.7376150615150886e-06, "loss": 0.0311, "step": 667375 }, { "epoch": 6.56, "grad_norm": 5.987706661224365, "learning_rate": 1.73749093906084e-06, "loss": 0.0809, "step": 667400 }, { "epoch": 6.56, "grad_norm": 2.2429466247558594, "learning_rate": 1.7373668166065916e-06, "loss": 0.0229, "step": 667425 }, { "epoch": 6.56, "grad_norm": 6.914664268493652, "learning_rate": 1.7372426941523433e-06, "loss": 0.0808, "step": 667450 }, { "epoch": 6.56, "grad_norm": 1.9290270805358887, "learning_rate": 1.7371185716980945e-06, "loss": 0.0222, "step": 667475 }, { "epoch": 6.56, "grad_norm": 1.9650452136993408, "learning_rate": 1.736994449243846e-06, "loss": 0.069, "step": 667500 }, { "epoch": 6.56, "grad_norm": 0.9493649005889893, "learning_rate": 1.7368703267895975e-06, "loss": 0.0157, "step": 667525 }, { "epoch": 6.56, "grad_norm": 5.345803737640381, "learning_rate": 1.7367511692335192e-06, "loss": 0.0708, "step": 667550 }, { "epoch": 6.56, "grad_norm": 1.3750120401382446, "learning_rate": 1.7366270467792706e-06, "loss": 0.0216, "step": 667575 }, { "epoch": 6.56, "grad_norm": 2.7734460830688477, "learning_rate": 1.7365029243250222e-06, "loss": 0.0948, "step": 667600 }, { "epoch": 6.56, "grad_norm": 5.575562000274658, "learning_rate": 1.7363788018707739e-06, "loss": 0.0291, "step": 667625 }, { "epoch": 6.56, "grad_norm": 0.11771845817565918, "learning_rate": 1.7362546794165253e-06, "loss": 0.0579, "step": 667650 }, { "epoch": 6.56, "grad_norm": 6.814503192901611, "learning_rate": 1.736130556962277e-06, "loss": 0.0145, "step": 667675 }, { "epoch": 6.56, "grad_norm": 2.7866382598876953, "learning_rate": 1.7360064345080282e-06, "loss": 0.0754, "step": 667700 }, { "epoch": 6.57, "grad_norm": 10.261249542236328, "learning_rate": 1.7358823120537798e-06, "loss": 0.0158, "step": 667725 }, { "epoch": 6.57, "grad_norm": 3.181407928466797, "learning_rate": 1.7357581895995314e-06, "loss": 0.0871, "step": 667750 }, { "epoch": 6.57, "grad_norm": 6.42370080947876, "learning_rate": 1.7356340671452828e-06, "loss": 0.0172, "step": 667775 }, { "epoch": 6.57, "grad_norm": 3.7603604793548584, "learning_rate": 1.7355099446910345e-06, "loss": 0.0708, "step": 667800 }, { "epoch": 6.57, "grad_norm": 11.429699897766113, "learning_rate": 1.7353858222367861e-06, "loss": 0.0269, "step": 667825 }, { "epoch": 6.57, "grad_norm": 8.0067138671875, "learning_rate": 1.7352616997825375e-06, "loss": 0.0983, "step": 667850 }, { "epoch": 6.57, "grad_norm": 5.262625694274902, "learning_rate": 1.7351375773282892e-06, "loss": 0.0223, "step": 667875 }, { "epoch": 6.57, "grad_norm": 2.1290934085845947, "learning_rate": 1.7350134548740408e-06, "loss": 0.0873, "step": 667900 }, { "epoch": 6.57, "grad_norm": 8.503118515014648, "learning_rate": 1.734889332419792e-06, "loss": 0.03, "step": 667925 }, { "epoch": 6.57, "grad_norm": 1.5444660186767578, "learning_rate": 1.7347652099655437e-06, "loss": 0.0711, "step": 667950 }, { "epoch": 6.57, "grad_norm": 2.0356380939483643, "learning_rate": 1.7346410875112953e-06, "loss": 0.0217, "step": 667975 }, { "epoch": 6.57, "grad_norm": 1.6940762996673584, "learning_rate": 1.7345169650570467e-06, "loss": 0.1005, "step": 668000 }, { "epoch": 6.57, "grad_norm": 6.8875508308410645, "learning_rate": 1.7343928426027983e-06, "loss": 0.0143, "step": 668025 }, { "epoch": 6.57, "grad_norm": 0.45664453506469727, "learning_rate": 1.73426872014855e-06, "loss": 0.0808, "step": 668050 }, { "epoch": 6.57, "grad_norm": 14.617616653442383, "learning_rate": 1.7341445976943014e-06, "loss": 0.026, "step": 668075 }, { "epoch": 6.57, "grad_norm": 1.2243953943252563, "learning_rate": 1.734020475240053e-06, "loss": 0.0772, "step": 668100 }, { "epoch": 6.57, "grad_norm": 9.03172779083252, "learning_rate": 1.7338963527858045e-06, "loss": 0.0399, "step": 668125 }, { "epoch": 6.57, "grad_norm": 0.4445461630821228, "learning_rate": 1.733772230331556e-06, "loss": 0.0893, "step": 668150 }, { "epoch": 6.57, "grad_norm": 13.30356502532959, "learning_rate": 1.7336481078773077e-06, "loss": 0.0371, "step": 668175 }, { "epoch": 6.57, "grad_norm": 4.683512210845947, "learning_rate": 1.733523985423059e-06, "loss": 0.0782, "step": 668200 }, { "epoch": 6.57, "grad_norm": 7.8310394287109375, "learning_rate": 1.7333998629688106e-06, "loss": 0.0311, "step": 668225 }, { "epoch": 6.57, "grad_norm": 0.16679856181144714, "learning_rate": 1.7332757405145622e-06, "loss": 0.0735, "step": 668250 }, { "epoch": 6.57, "grad_norm": 11.176474571228027, "learning_rate": 1.7331516180603136e-06, "loss": 0.0241, "step": 668275 }, { "epoch": 6.57, "grad_norm": 9.118462562561035, "learning_rate": 1.7330274956060653e-06, "loss": 0.0557, "step": 668300 }, { "epoch": 6.57, "grad_norm": 14.552441596984863, "learning_rate": 1.732903373151817e-06, "loss": 0.0193, "step": 668325 }, { "epoch": 6.57, "grad_norm": 0.31276026368141174, "learning_rate": 1.7327792506975683e-06, "loss": 0.0747, "step": 668350 }, { "epoch": 6.57, "grad_norm": 15.485901832580566, "learning_rate": 1.73265512824332e-06, "loss": 0.0323, "step": 668375 }, { "epoch": 6.57, "grad_norm": 5.304592132568359, "learning_rate": 1.7325310057890716e-06, "loss": 0.0589, "step": 668400 }, { "epoch": 6.57, "grad_norm": 9.33225154876709, "learning_rate": 1.7324068833348228e-06, "loss": 0.0254, "step": 668425 }, { "epoch": 6.57, "grad_norm": 0.9416689872741699, "learning_rate": 1.7322827608805745e-06, "loss": 0.0721, "step": 668450 }, { "epoch": 6.57, "grad_norm": 7.683352470397949, "learning_rate": 1.732158638426326e-06, "loss": 0.0219, "step": 668475 }, { "epoch": 6.57, "grad_norm": 7.859146595001221, "learning_rate": 1.7320345159720775e-06, "loss": 0.0765, "step": 668500 }, { "epoch": 6.57, "grad_norm": 7.723323345184326, "learning_rate": 1.7319103935178291e-06, "loss": 0.0212, "step": 668525 }, { "epoch": 6.57, "grad_norm": 2.6600699424743652, "learning_rate": 1.7317862710635806e-06, "loss": 0.0731, "step": 668550 }, { "epoch": 6.57, "grad_norm": 18.328506469726562, "learning_rate": 1.7316621486093322e-06, "loss": 0.027, "step": 668575 }, { "epoch": 6.57, "grad_norm": 2.073751449584961, "learning_rate": 1.7315380261550838e-06, "loss": 0.0745, "step": 668600 }, { "epoch": 6.57, "grad_norm": 4.826013565063477, "learning_rate": 1.731413903700835e-06, "loss": 0.0227, "step": 668625 }, { "epoch": 6.57, "grad_norm": 0.46458372473716736, "learning_rate": 1.7312897812465867e-06, "loss": 0.0804, "step": 668650 }, { "epoch": 6.57, "grad_norm": 0.44464194774627686, "learning_rate": 1.7311656587923383e-06, "loss": 0.0251, "step": 668675 }, { "epoch": 6.57, "grad_norm": 3.7753005027770996, "learning_rate": 1.7310415363380897e-06, "loss": 0.1126, "step": 668700 }, { "epoch": 6.58, "grad_norm": 12.341796875, "learning_rate": 1.7309174138838414e-06, "loss": 0.026, "step": 668725 }, { "epoch": 6.58, "grad_norm": 4.732557773590088, "learning_rate": 1.730793291429593e-06, "loss": 0.0614, "step": 668750 }, { "epoch": 6.58, "grad_norm": 4.350136756896973, "learning_rate": 1.7306691689753444e-06, "loss": 0.0223, "step": 668775 }, { "epoch": 6.58, "grad_norm": 9.333439826965332, "learning_rate": 1.730545046521096e-06, "loss": 0.0925, "step": 668800 }, { "epoch": 6.58, "grad_norm": 14.255398750305176, "learning_rate": 1.7304209240668477e-06, "loss": 0.022, "step": 668825 }, { "epoch": 6.58, "grad_norm": 4.777253150939941, "learning_rate": 1.730296801612599e-06, "loss": 0.0931, "step": 668850 }, { "epoch": 6.58, "grad_norm": 11.108134269714355, "learning_rate": 1.7301726791583506e-06, "loss": 0.0416, "step": 668875 }, { "epoch": 6.58, "grad_norm": 0.3107220530509949, "learning_rate": 1.7300485567041022e-06, "loss": 0.0985, "step": 668900 }, { "epoch": 6.58, "grad_norm": 4.689413547515869, "learning_rate": 1.7299244342498536e-06, "loss": 0.0294, "step": 668925 }, { "epoch": 6.58, "grad_norm": 1.4657297134399414, "learning_rate": 1.7298003117956052e-06, "loss": 0.0703, "step": 668950 }, { "epoch": 6.58, "grad_norm": 0.25172901153564453, "learning_rate": 1.7296761893413567e-06, "loss": 0.027, "step": 668975 }, { "epoch": 6.58, "grad_norm": 0.5812081098556519, "learning_rate": 1.7295520668871083e-06, "loss": 0.0835, "step": 669000 }, { "epoch": 6.58, "grad_norm": 8.867551803588867, "learning_rate": 1.72942794443286e-06, "loss": 0.0261, "step": 669025 }, { "epoch": 6.58, "grad_norm": 1.721927523612976, "learning_rate": 1.7293038219786111e-06, "loss": 0.0848, "step": 669050 }, { "epoch": 6.58, "grad_norm": 10.75937271118164, "learning_rate": 1.7291796995243628e-06, "loss": 0.0334, "step": 669075 }, { "epoch": 6.58, "grad_norm": 1.8877958059310913, "learning_rate": 1.7290555770701144e-06, "loss": 0.0888, "step": 669100 }, { "epoch": 6.58, "grad_norm": 9.885171890258789, "learning_rate": 1.7289314546158658e-06, "loss": 0.0413, "step": 669125 }, { "epoch": 6.58, "grad_norm": 3.0308547019958496, "learning_rate": 1.7288073321616175e-06, "loss": 0.0866, "step": 669150 }, { "epoch": 6.58, "grad_norm": 8.750639915466309, "learning_rate": 1.7286832097073691e-06, "loss": 0.0217, "step": 669175 }, { "epoch": 6.58, "grad_norm": 75.01936340332031, "learning_rate": 1.7285590872531205e-06, "loss": 0.0702, "step": 669200 }, { "epoch": 6.58, "grad_norm": 11.753447532653809, "learning_rate": 1.7284349647988722e-06, "loss": 0.0264, "step": 669225 }, { "epoch": 6.58, "grad_norm": 3.1176397800445557, "learning_rate": 1.7283108423446238e-06, "loss": 0.0804, "step": 669250 }, { "epoch": 6.58, "grad_norm": 5.7715020179748535, "learning_rate": 1.728186719890375e-06, "loss": 0.0227, "step": 669275 }, { "epoch": 6.58, "grad_norm": 5.854464054107666, "learning_rate": 1.7280625974361267e-06, "loss": 0.0751, "step": 669300 }, { "epoch": 6.58, "grad_norm": 10.912689208984375, "learning_rate": 1.7279384749818783e-06, "loss": 0.0139, "step": 669325 }, { "epoch": 6.58, "grad_norm": 2.067063808441162, "learning_rate": 1.7278143525276297e-06, "loss": 0.0608, "step": 669350 }, { "epoch": 6.58, "grad_norm": 13.324878692626953, "learning_rate": 1.7276902300733813e-06, "loss": 0.0241, "step": 669375 }, { "epoch": 6.58, "grad_norm": 3.3963844776153564, "learning_rate": 1.7275661076191328e-06, "loss": 0.074, "step": 669400 }, { "epoch": 6.58, "grad_norm": 4.234142780303955, "learning_rate": 1.7274419851648844e-06, "loss": 0.0181, "step": 669425 }, { "epoch": 6.58, "grad_norm": 2.2801685333251953, "learning_rate": 1.727317862710636e-06, "loss": 0.0648, "step": 669450 }, { "epoch": 6.58, "grad_norm": 11.735722541809082, "learning_rate": 1.7271937402563875e-06, "loss": 0.023, "step": 669475 }, { "epoch": 6.58, "grad_norm": 3.2613754272460938, "learning_rate": 1.727069617802139e-06, "loss": 0.0734, "step": 669500 }, { "epoch": 6.58, "grad_norm": 9.986734390258789, "learning_rate": 1.7269454953478907e-06, "loss": 0.0254, "step": 669525 }, { "epoch": 6.58, "grad_norm": 4.633350849151611, "learning_rate": 1.726821372893642e-06, "loss": 0.0887, "step": 669550 }, { "epoch": 6.58, "grad_norm": 11.59229850769043, "learning_rate": 1.7266972504393936e-06, "loss": 0.0354, "step": 669575 }, { "epoch": 6.58, "grad_norm": 0.8032863140106201, "learning_rate": 1.7265731279851452e-06, "loss": 0.0788, "step": 669600 }, { "epoch": 6.58, "grad_norm": 9.969589233398438, "learning_rate": 1.7264490055308966e-06, "loss": 0.0259, "step": 669625 }, { "epoch": 6.58, "grad_norm": 0.3242824673652649, "learning_rate": 1.7263248830766483e-06, "loss": 0.0527, "step": 669650 }, { "epoch": 6.58, "grad_norm": 18.398588180541992, "learning_rate": 1.7262007606224e-06, "loss": 0.0343, "step": 669675 }, { "epoch": 6.58, "grad_norm": 3.882711887359619, "learning_rate": 1.7260766381681513e-06, "loss": 0.0809, "step": 669700 }, { "epoch": 6.58, "grad_norm": 16.89163589477539, "learning_rate": 1.725952515713903e-06, "loss": 0.0291, "step": 669725 }, { "epoch": 6.59, "grad_norm": 5.162402629852295, "learning_rate": 1.7258283932596546e-06, "loss": 0.0643, "step": 669750 }, { "epoch": 6.59, "grad_norm": 5.463461399078369, "learning_rate": 1.7257042708054058e-06, "loss": 0.0242, "step": 669775 }, { "epoch": 6.59, "grad_norm": 3.115297555923462, "learning_rate": 1.7255801483511574e-06, "loss": 0.0973, "step": 669800 }, { "epoch": 6.59, "grad_norm": 6.212015151977539, "learning_rate": 1.7254560258969089e-06, "loss": 0.0151, "step": 669825 }, { "epoch": 6.59, "grad_norm": 11.49070930480957, "learning_rate": 1.7253368683408305e-06, "loss": 0.0708, "step": 669850 }, { "epoch": 6.59, "grad_norm": 7.856271743774414, "learning_rate": 1.725212745886582e-06, "loss": 0.0238, "step": 669875 }, { "epoch": 6.59, "grad_norm": 8.703495025634766, "learning_rate": 1.7250886234323336e-06, "loss": 0.0695, "step": 669900 }, { "epoch": 6.59, "grad_norm": 5.349549293518066, "learning_rate": 1.7249645009780852e-06, "loss": 0.0248, "step": 669925 }, { "epoch": 6.59, "grad_norm": 1.3133138418197632, "learning_rate": 1.7248403785238366e-06, "loss": 0.0604, "step": 669950 }, { "epoch": 6.59, "grad_norm": 7.016327381134033, "learning_rate": 1.7247162560695883e-06, "loss": 0.018, "step": 669975 }, { "epoch": 6.59, "grad_norm": 3.6432013511657715, "learning_rate": 1.7245921336153395e-06, "loss": 0.062, "step": 670000 }, { "epoch": 6.59, "grad_norm": 2.195976734161377, "learning_rate": 1.7244680111610911e-06, "loss": 0.0155, "step": 670025 }, { "epoch": 6.59, "grad_norm": 1.8065105676651, "learning_rate": 1.7243438887068428e-06, "loss": 0.0815, "step": 670050 }, { "epoch": 6.59, "grad_norm": 9.316258430480957, "learning_rate": 1.7242197662525942e-06, "loss": 0.018, "step": 670075 }, { "epoch": 6.59, "grad_norm": 2.1161248683929443, "learning_rate": 1.7240956437983458e-06, "loss": 0.0835, "step": 670100 }, { "epoch": 6.59, "grad_norm": 6.166119575500488, "learning_rate": 1.7239715213440975e-06, "loss": 0.0199, "step": 670125 }, { "epoch": 6.59, "grad_norm": 2.981278419494629, "learning_rate": 1.7238473988898489e-06, "loss": 0.0544, "step": 670150 }, { "epoch": 6.59, "grad_norm": 14.58747386932373, "learning_rate": 1.7237232764356005e-06, "loss": 0.0232, "step": 670175 }, { "epoch": 6.59, "grad_norm": 10.108987808227539, "learning_rate": 1.7235991539813521e-06, "loss": 0.0732, "step": 670200 }, { "epoch": 6.59, "grad_norm": 7.367279052734375, "learning_rate": 1.7234750315271034e-06, "loss": 0.0219, "step": 670225 }, { "epoch": 6.59, "grad_norm": 6.01701545715332, "learning_rate": 1.723350909072855e-06, "loss": 0.0664, "step": 670250 }, { "epoch": 6.59, "grad_norm": 13.779468536376953, "learning_rate": 1.7232267866186066e-06, "loss": 0.0176, "step": 670275 }, { "epoch": 6.59, "grad_norm": 8.59697151184082, "learning_rate": 1.723102664164358e-06, "loss": 0.0656, "step": 670300 }, { "epoch": 6.59, "grad_norm": 10.894217491149902, "learning_rate": 1.7229785417101097e-06, "loss": 0.0211, "step": 670325 }, { "epoch": 6.59, "grad_norm": 1.7293943166732788, "learning_rate": 1.7228544192558613e-06, "loss": 0.0646, "step": 670350 }, { "epoch": 6.59, "grad_norm": 16.309009552001953, "learning_rate": 1.7227302968016127e-06, "loss": 0.0299, "step": 670375 }, { "epoch": 6.59, "grad_norm": 0.7235581874847412, "learning_rate": 1.7226061743473644e-06, "loss": 0.0546, "step": 670400 }, { "epoch": 6.59, "grad_norm": 5.0330095291137695, "learning_rate": 1.7224820518931156e-06, "loss": 0.0224, "step": 670425 }, { "epoch": 6.59, "grad_norm": 9.258567810058594, "learning_rate": 1.7223579294388672e-06, "loss": 0.1055, "step": 670450 }, { "epoch": 6.59, "grad_norm": 3.2544076442718506, "learning_rate": 1.7222338069846189e-06, "loss": 0.0326, "step": 670475 }, { "epoch": 6.59, "grad_norm": 6.958798408508301, "learning_rate": 1.7221096845303703e-06, "loss": 0.0751, "step": 670500 }, { "epoch": 6.59, "grad_norm": 0.9125214219093323, "learning_rate": 1.721985562076122e-06, "loss": 0.0245, "step": 670525 }, { "epoch": 6.59, "grad_norm": 3.245673894882202, "learning_rate": 1.7218614396218736e-06, "loss": 0.0673, "step": 670550 }, { "epoch": 6.59, "grad_norm": 4.07086706161499, "learning_rate": 1.721737317167625e-06, "loss": 0.0286, "step": 670575 }, { "epoch": 6.59, "grad_norm": 1.3257062435150146, "learning_rate": 1.7216131947133766e-06, "loss": 0.0747, "step": 670600 }, { "epoch": 6.59, "grad_norm": 6.779708385467529, "learning_rate": 1.7214890722591283e-06, "loss": 0.0212, "step": 670625 }, { "epoch": 6.59, "grad_norm": 6.108963966369629, "learning_rate": 1.7213649498048795e-06, "loss": 0.0751, "step": 670650 }, { "epoch": 6.59, "grad_norm": 4.247190475463867, "learning_rate": 1.721240827350631e-06, "loss": 0.0199, "step": 670675 }, { "epoch": 6.59, "grad_norm": 0.833062469959259, "learning_rate": 1.7211167048963827e-06, "loss": 0.0988, "step": 670700 }, { "epoch": 6.59, "grad_norm": 6.101485729217529, "learning_rate": 1.7209925824421342e-06, "loss": 0.0205, "step": 670725 }, { "epoch": 6.59, "grad_norm": 1.7239990234375, "learning_rate": 1.7208684599878858e-06, "loss": 0.0997, "step": 670750 }, { "epoch": 6.6, "grad_norm": 5.331031799316406, "learning_rate": 1.7207443375336374e-06, "loss": 0.0242, "step": 670775 }, { "epoch": 6.6, "grad_norm": 0.056649383157491684, "learning_rate": 1.7206202150793888e-06, "loss": 0.0782, "step": 670800 }, { "epoch": 6.6, "grad_norm": 2.799654960632324, "learning_rate": 1.7204960926251405e-06, "loss": 0.0199, "step": 670825 }, { "epoch": 6.6, "grad_norm": 5.380625247955322, "learning_rate": 1.720371970170892e-06, "loss": 0.0742, "step": 670850 }, { "epoch": 6.6, "grad_norm": 29.743778228759766, "learning_rate": 1.7202478477166435e-06, "loss": 0.0179, "step": 670875 }, { "epoch": 6.6, "grad_norm": 3.8812525272369385, "learning_rate": 1.7201237252623952e-06, "loss": 0.0959, "step": 670900 }, { "epoch": 6.6, "grad_norm": 6.6161017417907715, "learning_rate": 1.7199996028081464e-06, "loss": 0.0227, "step": 670925 }, { "epoch": 6.6, "grad_norm": 1.5923066139221191, "learning_rate": 1.719875480353898e-06, "loss": 0.0822, "step": 670950 }, { "epoch": 6.6, "grad_norm": 1.7925677299499512, "learning_rate": 1.7197513578996497e-06, "loss": 0.0198, "step": 670975 }, { "epoch": 6.6, "grad_norm": 0.5013685822486877, "learning_rate": 1.719627235445401e-06, "loss": 0.0744, "step": 671000 }, { "epoch": 6.6, "grad_norm": 11.775482177734375, "learning_rate": 1.7195031129911527e-06, "loss": 0.0167, "step": 671025 }, { "epoch": 6.6, "grad_norm": 3.5081472396850586, "learning_rate": 1.7193789905369044e-06, "loss": 0.0404, "step": 671050 }, { "epoch": 6.6, "grad_norm": 15.69762134552002, "learning_rate": 1.7192548680826558e-06, "loss": 0.0271, "step": 671075 }, { "epoch": 6.6, "grad_norm": 0.40741920471191406, "learning_rate": 1.7191307456284074e-06, "loss": 0.0916, "step": 671100 }, { "epoch": 6.6, "grad_norm": 10.309096336364746, "learning_rate": 1.719006623174159e-06, "loss": 0.0289, "step": 671125 }, { "epoch": 6.6, "grad_norm": 4.262903213500977, "learning_rate": 1.7188825007199103e-06, "loss": 0.0661, "step": 671150 }, { "epoch": 6.6, "grad_norm": 6.540476322174072, "learning_rate": 1.7187583782656619e-06, "loss": 0.0254, "step": 671175 }, { "epoch": 6.6, "grad_norm": 4.344838619232178, "learning_rate": 1.7186342558114135e-06, "loss": 0.0716, "step": 671200 }, { "epoch": 6.6, "grad_norm": 9.063018798828125, "learning_rate": 1.718510133357165e-06, "loss": 0.0152, "step": 671225 }, { "epoch": 6.6, "grad_norm": 0.7289793491363525, "learning_rate": 1.7183860109029166e-06, "loss": 0.0781, "step": 671250 }, { "epoch": 6.6, "grad_norm": 10.936651229858398, "learning_rate": 1.718261888448668e-06, "loss": 0.0309, "step": 671275 }, { "epoch": 6.6, "grad_norm": 6.841744899749756, "learning_rate": 1.7181377659944196e-06, "loss": 0.0829, "step": 671300 }, { "epoch": 6.6, "grad_norm": 3.990250587463379, "learning_rate": 1.7180136435401713e-06, "loss": 0.0297, "step": 671325 }, { "epoch": 6.6, "grad_norm": 3.1154701709747314, "learning_rate": 1.7178895210859225e-06, "loss": 0.1058, "step": 671350 }, { "epoch": 6.6, "grad_norm": 5.773192882537842, "learning_rate": 1.7177653986316741e-06, "loss": 0.018, "step": 671375 }, { "epoch": 6.6, "grad_norm": 0.036704543977975845, "learning_rate": 1.7176412761774258e-06, "loss": 0.0865, "step": 671400 }, { "epoch": 6.6, "grad_norm": 3.2378005981445312, "learning_rate": 1.7175171537231772e-06, "loss": 0.0158, "step": 671425 }, { "epoch": 6.6, "grad_norm": 1.5523167848587036, "learning_rate": 1.7173930312689288e-06, "loss": 0.0648, "step": 671450 }, { "epoch": 6.6, "grad_norm": 6.295127868652344, "learning_rate": 1.7172689088146805e-06, "loss": 0.024, "step": 671475 }, { "epoch": 6.6, "grad_norm": 12.645527839660645, "learning_rate": 1.7171447863604319e-06, "loss": 0.0798, "step": 671500 }, { "epoch": 6.6, "grad_norm": 5.938760757446289, "learning_rate": 1.7170206639061835e-06, "loss": 0.0323, "step": 671525 }, { "epoch": 6.6, "grad_norm": 3.056628704071045, "learning_rate": 1.7168965414519351e-06, "loss": 0.059, "step": 671550 }, { "epoch": 6.6, "grad_norm": 6.088662624359131, "learning_rate": 1.7167724189976864e-06, "loss": 0.0201, "step": 671575 }, { "epoch": 6.6, "grad_norm": 1.5409666299819946, "learning_rate": 1.716648296543438e-06, "loss": 0.09, "step": 671600 }, { "epoch": 6.6, "grad_norm": 0.2259623408317566, "learning_rate": 1.7165241740891896e-06, "loss": 0.0228, "step": 671625 }, { "epoch": 6.6, "grad_norm": 3.000136375427246, "learning_rate": 1.716400051634941e-06, "loss": 0.0795, "step": 671650 }, { "epoch": 6.6, "grad_norm": 3.2746620178222656, "learning_rate": 1.7162759291806927e-06, "loss": 0.0326, "step": 671675 }, { "epoch": 6.6, "grad_norm": 1.857047200202942, "learning_rate": 1.7161518067264441e-06, "loss": 0.0548, "step": 671700 }, { "epoch": 6.6, "grad_norm": 1.1935306787490845, "learning_rate": 1.7160276842721957e-06, "loss": 0.0241, "step": 671725 }, { "epoch": 6.6, "grad_norm": 9.693140029907227, "learning_rate": 1.7159035618179474e-06, "loss": 0.0604, "step": 671750 }, { "epoch": 6.61, "grad_norm": 6.6859540939331055, "learning_rate": 1.7157794393636986e-06, "loss": 0.0282, "step": 671775 }, { "epoch": 6.61, "grad_norm": 0.13749054074287415, "learning_rate": 1.7156553169094502e-06, "loss": 0.0801, "step": 671800 }, { "epoch": 6.61, "grad_norm": 16.657207489013672, "learning_rate": 1.7155311944552019e-06, "loss": 0.0222, "step": 671825 }, { "epoch": 6.61, "grad_norm": 5.0146870613098145, "learning_rate": 1.7154070720009533e-06, "loss": 0.0673, "step": 671850 }, { "epoch": 6.61, "grad_norm": 1.6642929315567017, "learning_rate": 1.715282949546705e-06, "loss": 0.0243, "step": 671875 }, { "epoch": 6.61, "grad_norm": 0.537688136100769, "learning_rate": 1.7151588270924566e-06, "loss": 0.0828, "step": 671900 }, { "epoch": 6.61, "grad_norm": 7.097153186798096, "learning_rate": 1.715034704638208e-06, "loss": 0.0181, "step": 671925 }, { "epoch": 6.61, "grad_norm": 5.582156181335449, "learning_rate": 1.7149105821839596e-06, "loss": 0.0654, "step": 671950 }, { "epoch": 6.61, "grad_norm": 1.059531569480896, "learning_rate": 1.7147864597297112e-06, "loss": 0.0212, "step": 671975 }, { "epoch": 6.61, "grad_norm": 0.08225230127573013, "learning_rate": 1.7146623372754625e-06, "loss": 0.0694, "step": 672000 }, { "epoch": 6.61, "grad_norm": 10.892990112304688, "learning_rate": 1.714538214821214e-06, "loss": 0.0098, "step": 672025 }, { "epoch": 6.61, "grad_norm": 0.4778578579425812, "learning_rate": 1.7144140923669657e-06, "loss": 0.0801, "step": 672050 }, { "epoch": 6.61, "grad_norm": 4.015230655670166, "learning_rate": 1.7142899699127171e-06, "loss": 0.0285, "step": 672075 }, { "epoch": 6.61, "grad_norm": 0.8640105128288269, "learning_rate": 1.7141708123566386e-06, "loss": 0.0769, "step": 672100 }, { "epoch": 6.61, "grad_norm": 1.1343770027160645, "learning_rate": 1.7140466899023902e-06, "loss": 0.0397, "step": 672125 }, { "epoch": 6.61, "grad_norm": 3.7855288982391357, "learning_rate": 1.7139225674481419e-06, "loss": 0.09, "step": 672150 }, { "epoch": 6.61, "grad_norm": 4.462203025817871, "learning_rate": 1.7137984449938933e-06, "loss": 0.0233, "step": 672175 }, { "epoch": 6.61, "grad_norm": 2.704293966293335, "learning_rate": 1.713674322539645e-06, "loss": 0.0907, "step": 672200 }, { "epoch": 6.61, "grad_norm": 18.25559425354004, "learning_rate": 1.7135502000853966e-06, "loss": 0.0252, "step": 672225 }, { "epoch": 6.61, "grad_norm": 2.046314001083374, "learning_rate": 1.7134260776311478e-06, "loss": 0.0646, "step": 672250 }, { "epoch": 6.61, "grad_norm": 2.596761703491211, "learning_rate": 1.7133019551768994e-06, "loss": 0.0191, "step": 672275 }, { "epoch": 6.61, "grad_norm": 1.085472583770752, "learning_rate": 1.7131778327226508e-06, "loss": 0.0619, "step": 672300 }, { "epoch": 6.61, "grad_norm": 8.711350440979004, "learning_rate": 1.7130537102684025e-06, "loss": 0.0249, "step": 672325 }, { "epoch": 6.61, "grad_norm": 2.3959765434265137, "learning_rate": 1.712929587814154e-06, "loss": 0.0622, "step": 672350 }, { "epoch": 6.61, "grad_norm": 0.08124179393053055, "learning_rate": 1.7128054653599055e-06, "loss": 0.0283, "step": 672375 }, { "epoch": 6.61, "grad_norm": 0.13830479979515076, "learning_rate": 1.7126813429056572e-06, "loss": 0.0632, "step": 672400 }, { "epoch": 6.61, "grad_norm": 2.814455986022949, "learning_rate": 1.7125572204514088e-06, "loss": 0.0383, "step": 672425 }, { "epoch": 6.61, "grad_norm": 3.264281988143921, "learning_rate": 1.7124330979971602e-06, "loss": 0.0634, "step": 672450 }, { "epoch": 6.61, "grad_norm": 4.192072868347168, "learning_rate": 1.7123089755429119e-06, "loss": 0.02, "step": 672475 }, { "epoch": 6.61, "grad_norm": 4.504553318023682, "learning_rate": 1.7121848530886635e-06, "loss": 0.0711, "step": 672500 }, { "epoch": 6.61, "grad_norm": 9.907958030700684, "learning_rate": 1.7120607306344147e-06, "loss": 0.0202, "step": 672525 }, { "epoch": 6.61, "grad_norm": 1.2867014408111572, "learning_rate": 1.7119366081801663e-06, "loss": 0.0896, "step": 672550 }, { "epoch": 6.61, "grad_norm": 9.55903148651123, "learning_rate": 1.711812485725918e-06, "loss": 0.0319, "step": 672575 }, { "epoch": 6.61, "grad_norm": 0.1974632441997528, "learning_rate": 1.7116883632716694e-06, "loss": 0.095, "step": 672600 }, { "epoch": 6.61, "grad_norm": 1.0399960279464722, "learning_rate": 1.711564240817421e-06, "loss": 0.02, "step": 672625 }, { "epoch": 6.61, "grad_norm": 3.9903485774993896, "learning_rate": 1.7114401183631727e-06, "loss": 0.0617, "step": 672650 }, { "epoch": 6.61, "grad_norm": 5.86729097366333, "learning_rate": 1.711315995908924e-06, "loss": 0.0198, "step": 672675 }, { "epoch": 6.61, "grad_norm": 2.556393623352051, "learning_rate": 1.7111918734546757e-06, "loss": 0.0697, "step": 672700 }, { "epoch": 6.61, "grad_norm": 7.6539998054504395, "learning_rate": 1.711067751000427e-06, "loss": 0.0289, "step": 672725 }, { "epoch": 6.61, "grad_norm": 4.763133525848389, "learning_rate": 1.7109436285461786e-06, "loss": 0.0763, "step": 672750 }, { "epoch": 6.61, "grad_norm": 5.745403289794922, "learning_rate": 1.7108195060919302e-06, "loss": 0.0196, "step": 672775 }, { "epoch": 6.62, "grad_norm": 2.7313742637634277, "learning_rate": 1.7106953836376816e-06, "loss": 0.0758, "step": 672800 }, { "epoch": 6.62, "grad_norm": 1.2832324504852295, "learning_rate": 1.7105712611834333e-06, "loss": 0.0344, "step": 672825 }, { "epoch": 6.62, "grad_norm": 0.3077593147754669, "learning_rate": 1.710447138729185e-06, "loss": 0.0728, "step": 672850 }, { "epoch": 6.62, "grad_norm": 0.4040396809577942, "learning_rate": 1.7103230162749363e-06, "loss": 0.0252, "step": 672875 }, { "epoch": 6.62, "grad_norm": 4.085273265838623, "learning_rate": 1.710198893820688e-06, "loss": 0.0867, "step": 672900 }, { "epoch": 6.62, "grad_norm": 8.140579223632812, "learning_rate": 1.7100747713664396e-06, "loss": 0.0176, "step": 672925 }, { "epoch": 6.62, "grad_norm": 2.2099969387054443, "learning_rate": 1.7099506489121908e-06, "loss": 0.0873, "step": 672950 }, { "epoch": 6.62, "grad_norm": 7.852938175201416, "learning_rate": 1.7098265264579424e-06, "loss": 0.0257, "step": 672975 }, { "epoch": 6.62, "grad_norm": 1.167132019996643, "learning_rate": 1.709702404003694e-06, "loss": 0.0837, "step": 673000 }, { "epoch": 6.62, "grad_norm": 5.8457536697387695, "learning_rate": 1.7095782815494455e-06, "loss": 0.0196, "step": 673025 }, { "epoch": 6.62, "grad_norm": 4.999305725097656, "learning_rate": 1.7094541590951971e-06, "loss": 0.073, "step": 673050 }, { "epoch": 6.62, "grad_norm": 2.1594302654266357, "learning_rate": 1.7093300366409488e-06, "loss": 0.026, "step": 673075 }, { "epoch": 6.62, "grad_norm": 3.710838794708252, "learning_rate": 1.7092059141867002e-06, "loss": 0.0911, "step": 673100 }, { "epoch": 6.62, "grad_norm": 5.080215930938721, "learning_rate": 1.7090817917324518e-06, "loss": 0.0299, "step": 673125 }, { "epoch": 6.62, "grad_norm": 4.633164882659912, "learning_rate": 1.708957669278203e-06, "loss": 0.0707, "step": 673150 }, { "epoch": 6.62, "grad_norm": 9.197942733764648, "learning_rate": 1.7088335468239547e-06, "loss": 0.0267, "step": 673175 }, { "epoch": 6.62, "grad_norm": 1.3771719932556152, "learning_rate": 1.7087094243697063e-06, "loss": 0.1071, "step": 673200 }, { "epoch": 6.62, "grad_norm": 3.0122437477111816, "learning_rate": 1.7085853019154577e-06, "loss": 0.0284, "step": 673225 }, { "epoch": 6.62, "grad_norm": 6.731157302856445, "learning_rate": 1.7084611794612094e-06, "loss": 0.0736, "step": 673250 }, { "epoch": 6.62, "grad_norm": 6.870739459991455, "learning_rate": 1.708337057006961e-06, "loss": 0.0231, "step": 673275 }, { "epoch": 6.62, "grad_norm": 2.712232828140259, "learning_rate": 1.7082129345527124e-06, "loss": 0.0689, "step": 673300 }, { "epoch": 6.62, "grad_norm": 8.795035362243652, "learning_rate": 1.708088812098464e-06, "loss": 0.0226, "step": 673325 }, { "epoch": 6.62, "grad_norm": 2.170388698577881, "learning_rate": 1.7079646896442157e-06, "loss": 0.0688, "step": 673350 }, { "epoch": 6.62, "grad_norm": 9.662581443786621, "learning_rate": 1.707840567189967e-06, "loss": 0.0245, "step": 673375 }, { "epoch": 6.62, "grad_norm": 3.334106206893921, "learning_rate": 1.7077164447357185e-06, "loss": 0.0671, "step": 673400 }, { "epoch": 6.62, "grad_norm": 1.7054409980773926, "learning_rate": 1.7075923222814702e-06, "loss": 0.026, "step": 673425 }, { "epoch": 6.62, "grad_norm": 7.255122184753418, "learning_rate": 1.7074681998272216e-06, "loss": 0.0743, "step": 673450 }, { "epoch": 6.62, "grad_norm": 7.396598815917969, "learning_rate": 1.7073440773729732e-06, "loss": 0.0222, "step": 673475 }, { "epoch": 6.62, "grad_norm": 1.7632908821105957, "learning_rate": 1.7072199549187249e-06, "loss": 0.0929, "step": 673500 }, { "epoch": 6.62, "grad_norm": 8.873424530029297, "learning_rate": 1.7070958324644763e-06, "loss": 0.0466, "step": 673525 }, { "epoch": 6.62, "grad_norm": 3.6010584831237793, "learning_rate": 1.706971710010228e-06, "loss": 0.0735, "step": 673550 }, { "epoch": 6.62, "grad_norm": 7.530845642089844, "learning_rate": 1.7068475875559791e-06, "loss": 0.0235, "step": 673575 }, { "epoch": 6.62, "grad_norm": 1.5863405466079712, "learning_rate": 1.7067234651017308e-06, "loss": 0.0989, "step": 673600 }, { "epoch": 6.62, "grad_norm": 8.393933296203613, "learning_rate": 1.7065993426474824e-06, "loss": 0.0327, "step": 673625 }, { "epoch": 6.62, "grad_norm": 3.1902334690093994, "learning_rate": 1.7064752201932338e-06, "loss": 0.0601, "step": 673650 }, { "epoch": 6.62, "grad_norm": 7.358281135559082, "learning_rate": 1.7063510977389855e-06, "loss": 0.0297, "step": 673675 }, { "epoch": 6.62, "grad_norm": 2.6865992546081543, "learning_rate": 1.706226975284737e-06, "loss": 0.082, "step": 673700 }, { "epoch": 6.62, "grad_norm": 0.590065062046051, "learning_rate": 1.7061028528304885e-06, "loss": 0.0258, "step": 673725 }, { "epoch": 6.62, "grad_norm": 6.004244327545166, "learning_rate": 1.7059787303762402e-06, "loss": 0.094, "step": 673750 }, { "epoch": 6.62, "grad_norm": 4.928676128387451, "learning_rate": 1.7058546079219918e-06, "loss": 0.0226, "step": 673775 }, { "epoch": 6.62, "grad_norm": 0.2722936272621155, "learning_rate": 1.7057304854677432e-06, "loss": 0.0613, "step": 673800 }, { "epoch": 6.63, "grad_norm": 17.34476089477539, "learning_rate": 1.7056063630134948e-06, "loss": 0.0354, "step": 673825 }, { "epoch": 6.63, "grad_norm": 1.8434725999832153, "learning_rate": 1.7054822405592465e-06, "loss": 0.0861, "step": 673850 }, { "epoch": 6.63, "grad_norm": 6.154715061187744, "learning_rate": 1.7053581181049977e-06, "loss": 0.0183, "step": 673875 }, { "epoch": 6.63, "grad_norm": 0.3870536684989929, "learning_rate": 1.7052339956507493e-06, "loss": 0.0972, "step": 673900 }, { "epoch": 6.63, "grad_norm": 0.9087314605712891, "learning_rate": 1.705109873196501e-06, "loss": 0.0317, "step": 673925 }, { "epoch": 6.63, "grad_norm": 5.204329490661621, "learning_rate": 1.7049857507422524e-06, "loss": 0.0884, "step": 673950 }, { "epoch": 6.63, "grad_norm": 7.614473819732666, "learning_rate": 1.704861628288004e-06, "loss": 0.0154, "step": 673975 }, { "epoch": 6.63, "grad_norm": 0.5902886986732483, "learning_rate": 1.7047375058337554e-06, "loss": 0.0895, "step": 674000 }, { "epoch": 6.63, "grad_norm": 1.3130850791931152, "learning_rate": 1.704613383379507e-06, "loss": 0.0177, "step": 674025 }, { "epoch": 6.63, "grad_norm": 10.382697105407715, "learning_rate": 1.7044892609252587e-06, "loss": 0.0722, "step": 674050 }, { "epoch": 6.63, "grad_norm": 2.938547372817993, "learning_rate": 1.70436513847101e-06, "loss": 0.0202, "step": 674075 }, { "epoch": 6.63, "grad_norm": 0.4327467679977417, "learning_rate": 1.7042410160167616e-06, "loss": 0.0697, "step": 674100 }, { "epoch": 6.63, "grad_norm": 1.4431860446929932, "learning_rate": 1.7041168935625132e-06, "loss": 0.0346, "step": 674125 }, { "epoch": 6.63, "grad_norm": 1.0974681377410889, "learning_rate": 1.7039927711082646e-06, "loss": 0.0616, "step": 674150 }, { "epoch": 6.63, "grad_norm": 8.7650146484375, "learning_rate": 1.7038686486540163e-06, "loss": 0.023, "step": 674175 }, { "epoch": 6.63, "grad_norm": 0.29002809524536133, "learning_rate": 1.7037445261997679e-06, "loss": 0.0836, "step": 674200 }, { "epoch": 6.63, "grad_norm": 0.3356037735939026, "learning_rate": 1.7036204037455193e-06, "loss": 0.0199, "step": 674225 }, { "epoch": 6.63, "grad_norm": 0.4887915849685669, "learning_rate": 1.7035012461894408e-06, "loss": 0.1003, "step": 674250 }, { "epoch": 6.63, "grad_norm": 6.848803997039795, "learning_rate": 1.7033771237351924e-06, "loss": 0.0292, "step": 674275 }, { "epoch": 6.63, "grad_norm": 3.89878249168396, "learning_rate": 1.703253001280944e-06, "loss": 0.0514, "step": 674300 }, { "epoch": 6.63, "grad_norm": 3.5938985347747803, "learning_rate": 1.7031288788266952e-06, "loss": 0.0182, "step": 674325 }, { "epoch": 6.63, "grad_norm": 1.4682674407958984, "learning_rate": 1.7030047563724469e-06, "loss": 0.0763, "step": 674350 }, { "epoch": 6.63, "grad_norm": 0.10282639414072037, "learning_rate": 1.7028806339181985e-06, "loss": 0.0188, "step": 674375 }, { "epoch": 6.63, "grad_norm": 0.501876950263977, "learning_rate": 1.70275651146395e-06, "loss": 0.0684, "step": 674400 }, { "epoch": 6.63, "grad_norm": 7.587891101837158, "learning_rate": 1.7026323890097016e-06, "loss": 0.0232, "step": 674425 }, { "epoch": 6.63, "grad_norm": 4.392755031585693, "learning_rate": 1.7025082665554532e-06, "loss": 0.0788, "step": 674450 }, { "epoch": 6.63, "grad_norm": 17.09015464782715, "learning_rate": 1.7023841441012046e-06, "loss": 0.0284, "step": 674475 }, { "epoch": 6.63, "grad_norm": 5.455296516418457, "learning_rate": 1.7022600216469563e-06, "loss": 0.1026, "step": 674500 }, { "epoch": 6.63, "grad_norm": 11.957182884216309, "learning_rate": 1.702135899192708e-06, "loss": 0.0289, "step": 674525 }, { "epoch": 6.63, "grad_norm": 3.3663384914398193, "learning_rate": 1.7020117767384591e-06, "loss": 0.0649, "step": 674550 }, { "epoch": 6.63, "grad_norm": 10.343778610229492, "learning_rate": 1.7018876542842107e-06, "loss": 0.0271, "step": 674575 }, { "epoch": 6.63, "grad_norm": 4.906841278076172, "learning_rate": 1.7017635318299622e-06, "loss": 0.0641, "step": 674600 }, { "epoch": 6.63, "grad_norm": 5.898107528686523, "learning_rate": 1.7016394093757138e-06, "loss": 0.0218, "step": 674625 }, { "epoch": 6.63, "grad_norm": 2.769904613494873, "learning_rate": 1.7015152869214654e-06, "loss": 0.0802, "step": 674650 }, { "epoch": 6.63, "grad_norm": 15.169692039489746, "learning_rate": 1.7013911644672169e-06, "loss": 0.024, "step": 674675 }, { "epoch": 6.63, "grad_norm": 8.366921424865723, "learning_rate": 1.7012670420129685e-06, "loss": 0.0849, "step": 674700 }, { "epoch": 6.63, "grad_norm": 1.7400346994400024, "learning_rate": 1.7011429195587201e-06, "loss": 0.0202, "step": 674725 }, { "epoch": 6.63, "grad_norm": 3.5230579376220703, "learning_rate": 1.7010187971044713e-06, "loss": 0.0961, "step": 674750 }, { "epoch": 6.63, "grad_norm": 8.089924812316895, "learning_rate": 1.700894674650223e-06, "loss": 0.016, "step": 674775 }, { "epoch": 6.63, "grad_norm": 2.0144312381744385, "learning_rate": 1.7007705521959746e-06, "loss": 0.0733, "step": 674800 }, { "epoch": 6.63, "grad_norm": 4.969233512878418, "learning_rate": 1.700646429741726e-06, "loss": 0.0311, "step": 674825 }, { "epoch": 6.64, "grad_norm": 4.525395393371582, "learning_rate": 1.7005223072874777e-06, "loss": 0.0772, "step": 674850 }, { "epoch": 6.64, "grad_norm": 2.1241729259490967, "learning_rate": 1.7003981848332293e-06, "loss": 0.0238, "step": 674875 }, { "epoch": 6.64, "grad_norm": 8.22649097442627, "learning_rate": 1.7002740623789807e-06, "loss": 0.0708, "step": 674900 }, { "epoch": 6.64, "grad_norm": 8.128144264221191, "learning_rate": 1.7001499399247324e-06, "loss": 0.0296, "step": 674925 }, { "epoch": 6.64, "grad_norm": 1.4487457275390625, "learning_rate": 1.700025817470484e-06, "loss": 0.0738, "step": 674950 }, { "epoch": 6.64, "grad_norm": 8.598106384277344, "learning_rate": 1.6999016950162352e-06, "loss": 0.0138, "step": 674975 }, { "epoch": 6.64, "grad_norm": 0.1747054159641266, "learning_rate": 1.6997775725619869e-06, "loss": 0.0728, "step": 675000 }, { "epoch": 6.64, "grad_norm": 15.431863784790039, "learning_rate": 1.6996534501077383e-06, "loss": 0.0286, "step": 675025 }, { "epoch": 6.64, "grad_norm": 4.225667476654053, "learning_rate": 1.69952932765349e-06, "loss": 0.074, "step": 675050 }, { "epoch": 6.64, "grad_norm": 2.669872283935547, "learning_rate": 1.6994052051992415e-06, "loss": 0.0127, "step": 675075 }, { "epoch": 6.64, "grad_norm": 1.2547255754470825, "learning_rate": 1.699281082744993e-06, "loss": 0.068, "step": 675100 }, { "epoch": 6.64, "grad_norm": 1.2648369073867798, "learning_rate": 1.6991569602907446e-06, "loss": 0.0299, "step": 675125 }, { "epoch": 6.64, "grad_norm": 5.349027156829834, "learning_rate": 1.6990328378364962e-06, "loss": 0.0749, "step": 675150 }, { "epoch": 6.64, "grad_norm": 11.2060546875, "learning_rate": 1.6989087153822474e-06, "loss": 0.0262, "step": 675175 }, { "epoch": 6.64, "grad_norm": 8.194090843200684, "learning_rate": 1.698784592927999e-06, "loss": 0.0671, "step": 675200 }, { "epoch": 6.64, "grad_norm": 10.599175453186035, "learning_rate": 1.6986604704737507e-06, "loss": 0.0281, "step": 675225 }, { "epoch": 6.64, "grad_norm": 4.8314948081970215, "learning_rate": 1.6985363480195021e-06, "loss": 0.0695, "step": 675250 }, { "epoch": 6.64, "grad_norm": 14.438639640808105, "learning_rate": 1.6984122255652538e-06, "loss": 0.0342, "step": 675275 }, { "epoch": 6.64, "grad_norm": 0.03562144190073013, "learning_rate": 1.6982881031110054e-06, "loss": 0.0634, "step": 675300 }, { "epoch": 6.64, "grad_norm": 8.216049194335938, "learning_rate": 1.6981639806567568e-06, "loss": 0.0231, "step": 675325 }, { "epoch": 6.64, "grad_norm": 0.5213754773139954, "learning_rate": 1.6980398582025085e-06, "loss": 0.0734, "step": 675350 }, { "epoch": 6.64, "grad_norm": 6.132664680480957, "learning_rate": 1.69791573574826e-06, "loss": 0.0249, "step": 675375 }, { "epoch": 6.64, "grad_norm": 2.0839202404022217, "learning_rate": 1.6977916132940115e-06, "loss": 0.1029, "step": 675400 }, { "epoch": 6.64, "grad_norm": 12.595940589904785, "learning_rate": 1.6976674908397632e-06, "loss": 0.0271, "step": 675425 }, { "epoch": 6.64, "grad_norm": 0.5177949666976929, "learning_rate": 1.6975433683855144e-06, "loss": 0.098, "step": 675450 }, { "epoch": 6.64, "grad_norm": 8.589967727661133, "learning_rate": 1.697419245931266e-06, "loss": 0.0143, "step": 675475 }, { "epoch": 6.64, "grad_norm": 5.910844326019287, "learning_rate": 1.6972951234770176e-06, "loss": 0.0581, "step": 675500 }, { "epoch": 6.64, "grad_norm": 12.154361724853516, "learning_rate": 1.697171001022769e-06, "loss": 0.0421, "step": 675525 }, { "epoch": 6.64, "grad_norm": 4.970438003540039, "learning_rate": 1.6970468785685207e-06, "loss": 0.0976, "step": 675550 }, { "epoch": 6.64, "grad_norm": 1.1521680355072021, "learning_rate": 1.6969227561142723e-06, "loss": 0.0186, "step": 675575 }, { "epoch": 6.64, "grad_norm": 1.1882190704345703, "learning_rate": 1.6967986336600238e-06, "loss": 0.0855, "step": 675600 }, { "epoch": 6.64, "grad_norm": 3.4921045303344727, "learning_rate": 1.6966745112057754e-06, "loss": 0.0174, "step": 675625 }, { "epoch": 6.64, "grad_norm": 2.3740270137786865, "learning_rate": 1.696550388751527e-06, "loss": 0.0664, "step": 675650 }, { "epoch": 6.64, "grad_norm": 6.880645275115967, "learning_rate": 1.6964262662972782e-06, "loss": 0.0218, "step": 675675 }, { "epoch": 6.64, "grad_norm": 0.027946535497903824, "learning_rate": 1.6963021438430299e-06, "loss": 0.0588, "step": 675700 }, { "epoch": 6.64, "grad_norm": 12.040345191955566, "learning_rate": 1.6961780213887815e-06, "loss": 0.0176, "step": 675725 }, { "epoch": 6.64, "grad_norm": 0.05050742253661156, "learning_rate": 1.696053898934533e-06, "loss": 0.08, "step": 675750 }, { "epoch": 6.64, "grad_norm": 18.35555648803711, "learning_rate": 1.6959297764802846e-06, "loss": 0.0221, "step": 675775 }, { "epoch": 6.64, "grad_norm": 0.784205973148346, "learning_rate": 1.6958056540260362e-06, "loss": 0.0676, "step": 675800 }, { "epoch": 6.64, "grad_norm": 7.282340049743652, "learning_rate": 1.6956815315717876e-06, "loss": 0.0142, "step": 675825 }, { "epoch": 6.65, "grad_norm": 9.212477684020996, "learning_rate": 1.6955574091175393e-06, "loss": 0.0813, "step": 675850 }, { "epoch": 6.65, "grad_norm": 6.350776195526123, "learning_rate": 1.6954332866632905e-06, "loss": 0.0223, "step": 675875 }, { "epoch": 6.65, "grad_norm": 0.4343571066856384, "learning_rate": 1.6953091642090421e-06, "loss": 0.0566, "step": 675900 }, { "epoch": 6.65, "grad_norm": 4.256581783294678, "learning_rate": 1.6951850417547937e-06, "loss": 0.0225, "step": 675925 }, { "epoch": 6.65, "grad_norm": 3.4417686462402344, "learning_rate": 1.6950609193005452e-06, "loss": 0.0679, "step": 675950 }, { "epoch": 6.65, "grad_norm": 8.000195503234863, "learning_rate": 1.6949367968462968e-06, "loss": 0.0289, "step": 675975 }, { "epoch": 6.65, "grad_norm": 0.43997710943222046, "learning_rate": 1.6948126743920484e-06, "loss": 0.0696, "step": 676000 }, { "epoch": 6.65, "grad_norm": 1.7690695524215698, "learning_rate": 1.6946885519377999e-06, "loss": 0.0145, "step": 676025 }, { "epoch": 6.65, "grad_norm": 0.3747553527355194, "learning_rate": 1.6945644294835515e-06, "loss": 0.083, "step": 676050 }, { "epoch": 6.65, "grad_norm": 15.901900291442871, "learning_rate": 1.6944403070293031e-06, "loss": 0.0193, "step": 676075 }, { "epoch": 6.65, "grad_norm": 0.8184356093406677, "learning_rate": 1.6943161845750543e-06, "loss": 0.0862, "step": 676100 }, { "epoch": 6.65, "grad_norm": 4.727110862731934, "learning_rate": 1.694192062120806e-06, "loss": 0.0212, "step": 676125 }, { "epoch": 6.65, "grad_norm": 3.4305062294006348, "learning_rate": 1.6940679396665576e-06, "loss": 0.085, "step": 676150 }, { "epoch": 6.65, "grad_norm": 7.123827934265137, "learning_rate": 1.693943817212309e-06, "loss": 0.0278, "step": 676175 }, { "epoch": 6.65, "grad_norm": 0.5614389181137085, "learning_rate": 1.6938196947580607e-06, "loss": 0.0608, "step": 676200 }, { "epoch": 6.65, "grad_norm": 10.881176948547363, "learning_rate": 1.6936955723038123e-06, "loss": 0.032, "step": 676225 }, { "epoch": 6.65, "grad_norm": 0.07836862653493881, "learning_rate": 1.6935714498495637e-06, "loss": 0.051, "step": 676250 }, { "epoch": 6.65, "grad_norm": 8.109244346618652, "learning_rate": 1.6934473273953154e-06, "loss": 0.0276, "step": 676275 }, { "epoch": 6.65, "grad_norm": 2.933147668838501, "learning_rate": 1.6933232049410666e-06, "loss": 0.0721, "step": 676300 }, { "epoch": 6.65, "grad_norm": 1.1058204174041748, "learning_rate": 1.6931990824868182e-06, "loss": 0.0229, "step": 676325 }, { "epoch": 6.65, "grad_norm": 2.1656923294067383, "learning_rate": 1.6930749600325698e-06, "loss": 0.099, "step": 676350 }, { "epoch": 6.65, "grad_norm": 6.258525848388672, "learning_rate": 1.6929508375783213e-06, "loss": 0.035, "step": 676375 }, { "epoch": 6.65, "grad_norm": 5.010159492492676, "learning_rate": 1.692826715124073e-06, "loss": 0.0476, "step": 676400 }, { "epoch": 6.65, "grad_norm": 7.782423496246338, "learning_rate": 1.6927025926698245e-06, "loss": 0.0134, "step": 676425 }, { "epoch": 6.65, "grad_norm": 2.1533377170562744, "learning_rate": 1.692578470215576e-06, "loss": 0.0988, "step": 676450 }, { "epoch": 6.65, "grad_norm": 5.395029544830322, "learning_rate": 1.6924543477613276e-06, "loss": 0.0191, "step": 676475 }, { "epoch": 6.65, "grad_norm": 3.7828574180603027, "learning_rate": 1.6923302253070792e-06, "loss": 0.0666, "step": 676500 }, { "epoch": 6.65, "grad_norm": 7.228082180023193, "learning_rate": 1.6922061028528304e-06, "loss": 0.0285, "step": 676525 }, { "epoch": 6.65, "grad_norm": 3.7004921436309814, "learning_rate": 1.692081980398582e-06, "loss": 0.0676, "step": 676550 }, { "epoch": 6.65, "grad_norm": 0.507213294506073, "learning_rate": 1.6919578579443337e-06, "loss": 0.0159, "step": 676575 }, { "epoch": 6.65, "grad_norm": 3.8281984329223633, "learning_rate": 1.6918337354900851e-06, "loss": 0.0909, "step": 676600 }, { "epoch": 6.65, "grad_norm": 9.069202423095703, "learning_rate": 1.6917096130358368e-06, "loss": 0.018, "step": 676625 }, { "epoch": 6.65, "grad_norm": 0.09916295111179352, "learning_rate": 1.6915854905815884e-06, "loss": 0.0576, "step": 676650 }, { "epoch": 6.65, "grad_norm": 9.327463150024414, "learning_rate": 1.6914613681273398e-06, "loss": 0.0256, "step": 676675 }, { "epoch": 6.65, "grad_norm": 1.983794093132019, "learning_rate": 1.6913372456730915e-06, "loss": 0.0727, "step": 676700 }, { "epoch": 6.65, "grad_norm": 0.35754460096359253, "learning_rate": 1.6912131232188429e-06, "loss": 0.0307, "step": 676725 }, { "epoch": 6.65, "grad_norm": 7.3830485343933105, "learning_rate": 1.6910890007645945e-06, "loss": 0.0738, "step": 676750 }, { "epoch": 6.65, "grad_norm": 10.701825141906738, "learning_rate": 1.6909648783103462e-06, "loss": 0.0222, "step": 676775 }, { "epoch": 6.65, "grad_norm": 7.934971332550049, "learning_rate": 1.6908407558560974e-06, "loss": 0.0917, "step": 676800 }, { "epoch": 6.65, "grad_norm": 3.4525325298309326, "learning_rate": 1.690716633401849e-06, "loss": 0.0255, "step": 676825 }, { "epoch": 6.65, "grad_norm": 6.324721336364746, "learning_rate": 1.6905925109476006e-06, "loss": 0.0812, "step": 676850 }, { "epoch": 6.66, "grad_norm": 4.133553981781006, "learning_rate": 1.690468388493352e-06, "loss": 0.0199, "step": 676875 }, { "epoch": 6.66, "grad_norm": Infinity, "learning_rate": 1.6903492309372735e-06, "loss": 0.0799, "step": 676900 }, { "epoch": 6.66, "grad_norm": 9.572452545166016, "learning_rate": 1.6902251084830251e-06, "loss": 0.0252, "step": 676925 }, { "epoch": 6.66, "grad_norm": 1.193224310874939, "learning_rate": 1.6901009860287768e-06, "loss": 0.0638, "step": 676950 }, { "epoch": 6.66, "grad_norm": 0.7757886052131653, "learning_rate": 1.6899768635745282e-06, "loss": 0.0228, "step": 676975 }, { "epoch": 6.66, "grad_norm": 2.522348642349243, "learning_rate": 1.6898527411202798e-06, "loss": 0.0919, "step": 677000 }, { "epoch": 6.66, "grad_norm": 16.18935775756836, "learning_rate": 1.6897286186660315e-06, "loss": 0.0252, "step": 677025 }, { "epoch": 6.66, "grad_norm": 2.722158908843994, "learning_rate": 1.6896044962117827e-06, "loss": 0.0645, "step": 677050 }, { "epoch": 6.66, "grad_norm": 4.8946213722229, "learning_rate": 1.6894803737575343e-06, "loss": 0.0283, "step": 677075 }, { "epoch": 6.66, "grad_norm": 2.2081620693206787, "learning_rate": 1.689356251303286e-06, "loss": 0.093, "step": 677100 }, { "epoch": 6.66, "grad_norm": 12.628691673278809, "learning_rate": 1.6892321288490374e-06, "loss": 0.0299, "step": 677125 }, { "epoch": 6.66, "grad_norm": 2.1393654346466064, "learning_rate": 1.689108006394789e-06, "loss": 0.0672, "step": 677150 }, { "epoch": 6.66, "grad_norm": 7.177721977233887, "learning_rate": 1.6889838839405407e-06, "loss": 0.0164, "step": 677175 }, { "epoch": 6.66, "grad_norm": 1.6276150941848755, "learning_rate": 1.688859761486292e-06, "loss": 0.0722, "step": 677200 }, { "epoch": 6.66, "grad_norm": 6.357193946838379, "learning_rate": 1.6887356390320437e-06, "loss": 0.0226, "step": 677225 }, { "epoch": 6.66, "grad_norm": 0.7338398694992065, "learning_rate": 1.6886115165777953e-06, "loss": 0.0772, "step": 677250 }, { "epoch": 6.66, "grad_norm": 3.191977024078369, "learning_rate": 1.6884873941235466e-06, "loss": 0.0253, "step": 677275 }, { "epoch": 6.66, "grad_norm": 2.9116687774658203, "learning_rate": 1.6883632716692982e-06, "loss": 0.0767, "step": 677300 }, { "epoch": 6.66, "grad_norm": 1.6973687410354614, "learning_rate": 1.6882391492150496e-06, "loss": 0.0213, "step": 677325 }, { "epoch": 6.66, "grad_norm": 0.4379626214504242, "learning_rate": 1.6881150267608012e-06, "loss": 0.0706, "step": 677350 }, { "epoch": 6.66, "grad_norm": 5.322381496429443, "learning_rate": 1.6879909043065529e-06, "loss": 0.0179, "step": 677375 }, { "epoch": 6.66, "grad_norm": 6.264275550842285, "learning_rate": 1.6878667818523043e-06, "loss": 0.0811, "step": 677400 }, { "epoch": 6.66, "grad_norm": 6.216852188110352, "learning_rate": 1.687742659398056e-06, "loss": 0.0296, "step": 677425 }, { "epoch": 6.66, "grad_norm": 5.744719505310059, "learning_rate": 1.6876185369438076e-06, "loss": 0.0593, "step": 677450 }, { "epoch": 6.66, "grad_norm": 6.414968013763428, "learning_rate": 1.6874944144895588e-06, "loss": 0.0203, "step": 677475 }, { "epoch": 6.66, "grad_norm": 1.5348151922225952, "learning_rate": 1.6873702920353104e-06, "loss": 0.0951, "step": 677500 }, { "epoch": 6.66, "grad_norm": 5.577597141265869, "learning_rate": 1.687246169581062e-06, "loss": 0.0249, "step": 677525 }, { "epoch": 6.66, "grad_norm": 6.422967433929443, "learning_rate": 1.6871220471268135e-06, "loss": 0.0908, "step": 677550 }, { "epoch": 6.66, "grad_norm": 2.197962522506714, "learning_rate": 1.6869979246725651e-06, "loss": 0.0315, "step": 677575 }, { "epoch": 6.66, "grad_norm": 0.963341474533081, "learning_rate": 1.6868738022183168e-06, "loss": 0.0696, "step": 677600 }, { "epoch": 6.66, "grad_norm": 1.7258003950119019, "learning_rate": 1.6867496797640682e-06, "loss": 0.0339, "step": 677625 }, { "epoch": 6.66, "grad_norm": 1.267060399055481, "learning_rate": 1.6866255573098198e-06, "loss": 0.0896, "step": 677650 }, { "epoch": 6.66, "grad_norm": 12.164834022521973, "learning_rate": 1.6865014348555714e-06, "loss": 0.0258, "step": 677675 }, { "epoch": 6.66, "grad_norm": 1.725782036781311, "learning_rate": 1.6863773124013227e-06, "loss": 0.0536, "step": 677700 }, { "epoch": 6.66, "grad_norm": 6.117409706115723, "learning_rate": 1.6862531899470743e-06, "loss": 0.0365, "step": 677725 }, { "epoch": 6.66, "grad_norm": 6.336020469665527, "learning_rate": 1.6861290674928257e-06, "loss": 0.0766, "step": 677750 }, { "epoch": 6.66, "grad_norm": 5.911416530609131, "learning_rate": 1.6860049450385773e-06, "loss": 0.0272, "step": 677775 }, { "epoch": 6.66, "grad_norm": 1.1029282808303833, "learning_rate": 1.685880822584329e-06, "loss": 0.0815, "step": 677800 }, { "epoch": 6.66, "grad_norm": 1.8470863103866577, "learning_rate": 1.6857567001300804e-06, "loss": 0.033, "step": 677825 }, { "epoch": 6.66, "grad_norm": 1.3946589231491089, "learning_rate": 1.685632577675832e-06, "loss": 0.0649, "step": 677850 }, { "epoch": 6.66, "grad_norm": 5.443732738494873, "learning_rate": 1.6855084552215837e-06, "loss": 0.0178, "step": 677875 }, { "epoch": 6.67, "grad_norm": 0.36050572991371155, "learning_rate": 1.6853843327673349e-06, "loss": 0.0944, "step": 677900 }, { "epoch": 6.67, "grad_norm": 0.897510290145874, "learning_rate": 1.6852602103130865e-06, "loss": 0.022, "step": 677925 }, { "epoch": 6.67, "grad_norm": 1.2727018594741821, "learning_rate": 1.6851360878588382e-06, "loss": 0.0706, "step": 677950 }, { "epoch": 6.67, "grad_norm": 4.063177585601807, "learning_rate": 1.6850119654045896e-06, "loss": 0.028, "step": 677975 }, { "epoch": 6.67, "grad_norm": 5.3292412757873535, "learning_rate": 1.6848878429503412e-06, "loss": 0.0747, "step": 678000 }, { "epoch": 6.67, "grad_norm": 7.668383598327637, "learning_rate": 1.6847637204960929e-06, "loss": 0.0129, "step": 678025 }, { "epoch": 6.67, "grad_norm": 5.631478309631348, "learning_rate": 1.6846395980418443e-06, "loss": 0.0665, "step": 678050 }, { "epoch": 6.67, "grad_norm": 1.3313560485839844, "learning_rate": 1.684515475587596e-06, "loss": 0.0221, "step": 678075 }, { "epoch": 6.67, "grad_norm": 4.37306547164917, "learning_rate": 1.6843913531333475e-06, "loss": 0.0957, "step": 678100 }, { "epoch": 6.67, "grad_norm": 3.407593011856079, "learning_rate": 1.684267230679099e-06, "loss": 0.0178, "step": 678125 }, { "epoch": 6.67, "grad_norm": 1.2306196689605713, "learning_rate": 1.6841431082248506e-06, "loss": 0.0846, "step": 678150 }, { "epoch": 6.67, "grad_norm": 10.649615287780762, "learning_rate": 1.6840189857706018e-06, "loss": 0.0318, "step": 678175 }, { "epoch": 6.67, "grad_norm": 7.024864196777344, "learning_rate": 1.6838948633163534e-06, "loss": 0.0703, "step": 678200 }, { "epoch": 6.67, "grad_norm": 8.415526390075684, "learning_rate": 1.683770740862105e-06, "loss": 0.0349, "step": 678225 }, { "epoch": 6.67, "grad_norm": 4.342426300048828, "learning_rate": 1.6836466184078565e-06, "loss": 0.1122, "step": 678250 }, { "epoch": 6.67, "grad_norm": 17.714508056640625, "learning_rate": 1.6835224959536081e-06, "loss": 0.0273, "step": 678275 }, { "epoch": 6.67, "grad_norm": 2.579000234603882, "learning_rate": 1.6833983734993598e-06, "loss": 0.0829, "step": 678300 }, { "epoch": 6.67, "grad_norm": 2.7546849250793457, "learning_rate": 1.6832742510451112e-06, "loss": 0.0227, "step": 678325 }, { "epoch": 6.67, "grad_norm": 8.560188293457031, "learning_rate": 1.6831501285908628e-06, "loss": 0.0797, "step": 678350 }, { "epoch": 6.67, "grad_norm": 9.950658798217773, "learning_rate": 1.6830260061366145e-06, "loss": 0.0264, "step": 678375 }, { "epoch": 6.67, "grad_norm": 1.1463323831558228, "learning_rate": 1.6829018836823657e-06, "loss": 0.0592, "step": 678400 }, { "epoch": 6.67, "grad_norm": 10.06787109375, "learning_rate": 1.6827777612281173e-06, "loss": 0.0308, "step": 678425 }, { "epoch": 6.67, "grad_norm": 0.9972434043884277, "learning_rate": 1.682653638773869e-06, "loss": 0.0865, "step": 678450 }, { "epoch": 6.67, "grad_norm": 2.6532487869262695, "learning_rate": 1.6825295163196204e-06, "loss": 0.0247, "step": 678475 }, { "epoch": 6.67, "grad_norm": 1.7290318012237549, "learning_rate": 1.682405393865372e-06, "loss": 0.0668, "step": 678500 }, { "epoch": 6.67, "grad_norm": 5.797769069671631, "learning_rate": 1.6822812714111236e-06, "loss": 0.0177, "step": 678525 }, { "epoch": 6.67, "grad_norm": 1.3164364099502563, "learning_rate": 1.682157148956875e-06, "loss": 0.0869, "step": 678550 }, { "epoch": 6.67, "grad_norm": 5.215024948120117, "learning_rate": 1.6820330265026267e-06, "loss": 0.0173, "step": 678575 }, { "epoch": 6.67, "grad_norm": 0.4036096930503845, "learning_rate": 1.681908904048378e-06, "loss": 0.0614, "step": 678600 }, { "epoch": 6.67, "grad_norm": 5.11163330078125, "learning_rate": 1.6817847815941295e-06, "loss": 0.022, "step": 678625 }, { "epoch": 6.67, "grad_norm": 1.64369535446167, "learning_rate": 1.6816606591398812e-06, "loss": 0.103, "step": 678650 }, { "epoch": 6.67, "grad_norm": 8.248393058776855, "learning_rate": 1.6815365366856326e-06, "loss": 0.0228, "step": 678675 }, { "epoch": 6.67, "grad_norm": 3.4868204593658447, "learning_rate": 1.6814124142313842e-06, "loss": 0.0681, "step": 678700 }, { "epoch": 6.67, "grad_norm": 1.6483203172683716, "learning_rate": 1.6812882917771359e-06, "loss": 0.029, "step": 678725 }, { "epoch": 6.67, "grad_norm": 2.5616273880004883, "learning_rate": 1.6811641693228873e-06, "loss": 0.0748, "step": 678750 }, { "epoch": 6.67, "grad_norm": 6.314433574676514, "learning_rate": 1.681040046868639e-06, "loss": 0.0223, "step": 678775 }, { "epoch": 6.67, "grad_norm": 0.34572941064834595, "learning_rate": 1.6809159244143906e-06, "loss": 0.0495, "step": 678800 }, { "epoch": 6.67, "grad_norm": 6.672109603881836, "learning_rate": 1.6807918019601418e-06, "loss": 0.0172, "step": 678825 }, { "epoch": 6.67, "grad_norm": 2.9214000701904297, "learning_rate": 1.6806676795058934e-06, "loss": 0.0919, "step": 678850 }, { "epoch": 6.67, "grad_norm": 2.9352407455444336, "learning_rate": 1.680543557051645e-06, "loss": 0.0184, "step": 678875 }, { "epoch": 6.68, "grad_norm": 15.336584091186523, "learning_rate": 1.6804194345973965e-06, "loss": 0.0681, "step": 678900 }, { "epoch": 6.68, "grad_norm": 8.189777374267578, "learning_rate": 1.6802953121431481e-06, "loss": 0.0241, "step": 678925 }, { "epoch": 6.68, "grad_norm": 0.5636671185493469, "learning_rate": 1.6801761545870696e-06, "loss": 0.0718, "step": 678950 }, { "epoch": 6.68, "grad_norm": 4.793712139129639, "learning_rate": 1.6800520321328212e-06, "loss": 0.0256, "step": 678975 }, { "epoch": 6.68, "grad_norm": 0.1720699816942215, "learning_rate": 1.6799279096785726e-06, "loss": 0.0685, "step": 679000 }, { "epoch": 6.68, "grad_norm": 19.650447845458984, "learning_rate": 1.6798037872243243e-06, "loss": 0.0273, "step": 679025 }, { "epoch": 6.68, "grad_norm": 3.9152796268463135, "learning_rate": 1.6796796647700759e-06, "loss": 0.0761, "step": 679050 }, { "epoch": 6.68, "grad_norm": 11.967676162719727, "learning_rate": 1.679555542315827e-06, "loss": 0.0236, "step": 679075 }, { "epoch": 6.68, "grad_norm": 1.929141879081726, "learning_rate": 1.6794314198615787e-06, "loss": 0.0782, "step": 679100 }, { "epoch": 6.68, "grad_norm": 8.399786949157715, "learning_rate": 1.6793072974073304e-06, "loss": 0.0265, "step": 679125 }, { "epoch": 6.68, "grad_norm": 0.02253740280866623, "learning_rate": 1.6791831749530818e-06, "loss": 0.0705, "step": 679150 }, { "epoch": 6.68, "grad_norm": 6.584884166717529, "learning_rate": 1.6790590524988334e-06, "loss": 0.0167, "step": 679175 }, { "epoch": 6.68, "grad_norm": 4.514960765838623, "learning_rate": 1.6789349300445849e-06, "loss": 0.0853, "step": 679200 }, { "epoch": 6.68, "grad_norm": 5.661526679992676, "learning_rate": 1.6788108075903365e-06, "loss": 0.0295, "step": 679225 }, { "epoch": 6.68, "grad_norm": 4.478940963745117, "learning_rate": 1.6786866851360881e-06, "loss": 0.0799, "step": 679250 }, { "epoch": 6.68, "grad_norm": 5.974176406860352, "learning_rate": 1.6785625626818393e-06, "loss": 0.0217, "step": 679275 }, { "epoch": 6.68, "grad_norm": 0.3288547694683075, "learning_rate": 1.678438440227591e-06, "loss": 0.076, "step": 679300 }, { "epoch": 6.68, "grad_norm": 11.16016960144043, "learning_rate": 1.6783143177733426e-06, "loss": 0.0301, "step": 679325 }, { "epoch": 6.68, "grad_norm": 3.865699529647827, "learning_rate": 1.678190195319094e-06, "loss": 0.0711, "step": 679350 }, { "epoch": 6.68, "grad_norm": 6.87874174118042, "learning_rate": 1.6780660728648457e-06, "loss": 0.0246, "step": 679375 }, { "epoch": 6.68, "grad_norm": 1.2445178031921387, "learning_rate": 1.6779419504105973e-06, "loss": 0.0719, "step": 679400 }, { "epoch": 6.68, "grad_norm": 10.622961044311523, "learning_rate": 1.6778178279563487e-06, "loss": 0.0328, "step": 679425 }, { "epoch": 6.68, "grad_norm": 0.24425628781318665, "learning_rate": 1.6776937055021004e-06, "loss": 0.0902, "step": 679450 }, { "epoch": 6.68, "grad_norm": 1.8649812936782837, "learning_rate": 1.677569583047852e-06, "loss": 0.0196, "step": 679475 }, { "epoch": 6.68, "grad_norm": 2.7250030040740967, "learning_rate": 1.6774454605936032e-06, "loss": 0.0519, "step": 679500 }, { "epoch": 6.68, "grad_norm": 7.077282905578613, "learning_rate": 1.6773213381393548e-06, "loss": 0.0305, "step": 679525 }, { "epoch": 6.68, "grad_norm": 0.6477802395820618, "learning_rate": 1.6771972156851065e-06, "loss": 0.0824, "step": 679550 }, { "epoch": 6.68, "grad_norm": 2.5235393047332764, "learning_rate": 1.677073093230858e-06, "loss": 0.0254, "step": 679575 }, { "epoch": 6.68, "grad_norm": 6.291944980621338, "learning_rate": 1.6769489707766095e-06, "loss": 0.0982, "step": 679600 }, { "epoch": 6.68, "grad_norm": 11.190606117248535, "learning_rate": 1.676824848322361e-06, "loss": 0.0306, "step": 679625 }, { "epoch": 6.68, "grad_norm": 0.5447517037391663, "learning_rate": 1.6767007258681126e-06, "loss": 0.0879, "step": 679650 }, { "epoch": 6.68, "grad_norm": 9.744784355163574, "learning_rate": 1.6765766034138642e-06, "loss": 0.0218, "step": 679675 }, { "epoch": 6.68, "grad_norm": 1.959312081336975, "learning_rate": 1.6764524809596156e-06, "loss": 0.082, "step": 679700 }, { "epoch": 6.68, "grad_norm": 6.990779876708984, "learning_rate": 1.6763283585053673e-06, "loss": 0.032, "step": 679725 }, { "epoch": 6.68, "grad_norm": 6.722965240478516, "learning_rate": 1.676204236051119e-06, "loss": 0.0802, "step": 679750 }, { "epoch": 6.68, "grad_norm": 11.80063533782959, "learning_rate": 1.6760801135968701e-06, "loss": 0.0211, "step": 679775 }, { "epoch": 6.68, "grad_norm": 1.2501994371414185, "learning_rate": 1.6759559911426218e-06, "loss": 0.1067, "step": 679800 }, { "epoch": 6.68, "grad_norm": 9.267934799194336, "learning_rate": 1.6758318686883734e-06, "loss": 0.0305, "step": 679825 }, { "epoch": 6.68, "grad_norm": 3.4210903644561768, "learning_rate": 1.6757077462341248e-06, "loss": 0.1038, "step": 679850 }, { "epoch": 6.68, "grad_norm": 3.932401657104492, "learning_rate": 1.6755836237798765e-06, "loss": 0.0396, "step": 679875 }, { "epoch": 6.68, "grad_norm": 3.9565062522888184, "learning_rate": 1.675459501325628e-06, "loss": 0.0808, "step": 679900 }, { "epoch": 6.69, "grad_norm": 8.252156257629395, "learning_rate": 1.6753353788713795e-06, "loss": 0.0221, "step": 679925 }, { "epoch": 6.69, "grad_norm": 3.9832780361175537, "learning_rate": 1.6752112564171311e-06, "loss": 0.0885, "step": 679950 }, { "epoch": 6.69, "grad_norm": 1.5790281295776367, "learning_rate": 1.6750871339628828e-06, "loss": 0.0228, "step": 679975 }, { "epoch": 6.69, "grad_norm": 4.262242317199707, "learning_rate": 1.674963011508634e-06, "loss": 0.0652, "step": 680000 }, { "epoch": 6.69, "eval_loss": 0.8267467021942139, "eval_runtime": 6135.6737, "eval_samples_per_second": 1.543, "eval_steps_per_second": 0.193, "eval_wer": 0.11341981587587717, "step": 680000 }, { "epoch": 6.69, "grad_norm": 1.5535058975219727, "learning_rate": 1.6748388890543856e-06, "loss": 0.0215, "step": 680025 }, { "epoch": 6.69, "grad_norm": 0.1415526270866394, "learning_rate": 1.674714766600137e-06, "loss": 0.0598, "step": 680050 }, { "epoch": 6.69, "grad_norm": 0.1305982768535614, "learning_rate": 1.6745906441458887e-06, "loss": 0.0172, "step": 680075 }, { "epoch": 6.69, "grad_norm": 1.0115551948547363, "learning_rate": 1.6744665216916403e-06, "loss": 0.0812, "step": 680100 }, { "epoch": 6.69, "grad_norm": 2.958463430404663, "learning_rate": 1.6743423992373917e-06, "loss": 0.0271, "step": 680125 }, { "epoch": 6.69, "grad_norm": 0.08637543022632599, "learning_rate": 1.6742182767831434e-06, "loss": 0.0718, "step": 680150 }, { "epoch": 6.69, "grad_norm": 1.5187337398529053, "learning_rate": 1.674094154328895e-06, "loss": 0.0267, "step": 680175 }, { "epoch": 6.69, "grad_norm": 0.7431491613388062, "learning_rate": 1.6739700318746462e-06, "loss": 0.0848, "step": 680200 }, { "epoch": 6.69, "grad_norm": 7.312399387359619, "learning_rate": 1.6738459094203979e-06, "loss": 0.0191, "step": 680225 }, { "epoch": 6.69, "grad_norm": 6.529848575592041, "learning_rate": 1.6737217869661495e-06, "loss": 0.0883, "step": 680250 }, { "epoch": 6.69, "grad_norm": 10.216995239257812, "learning_rate": 1.673597664511901e-06, "loss": 0.0257, "step": 680275 }, { "epoch": 6.69, "grad_norm": 0.03218092769384384, "learning_rate": 1.6734735420576526e-06, "loss": 0.0717, "step": 680300 }, { "epoch": 6.69, "grad_norm": 0.41504955291748047, "learning_rate": 1.6733494196034042e-06, "loss": 0.0145, "step": 680325 }, { "epoch": 6.69, "grad_norm": 5.181674003601074, "learning_rate": 1.6732252971491556e-06, "loss": 0.0737, "step": 680350 }, { "epoch": 6.69, "grad_norm": 6.0407514572143555, "learning_rate": 1.6731011746949072e-06, "loss": 0.019, "step": 680375 }, { "epoch": 6.69, "grad_norm": 4.922051906585693, "learning_rate": 1.6729770522406589e-06, "loss": 0.0575, "step": 680400 }, { "epoch": 6.69, "grad_norm": 4.292418003082275, "learning_rate": 1.67285292978641e-06, "loss": 0.0242, "step": 680425 }, { "epoch": 6.69, "grad_norm": 4.588592529296875, "learning_rate": 1.6727288073321617e-06, "loss": 0.0667, "step": 680450 }, { "epoch": 6.69, "grad_norm": 2.2229480743408203, "learning_rate": 1.6726046848779132e-06, "loss": 0.0177, "step": 680475 }, { "epoch": 6.69, "grad_norm": 2.5095930099487305, "learning_rate": 1.6724805624236648e-06, "loss": 0.0778, "step": 680500 }, { "epoch": 6.69, "grad_norm": 12.34144115447998, "learning_rate": 1.6723564399694164e-06, "loss": 0.0178, "step": 680525 }, { "epoch": 6.69, "grad_norm": 1.599936842918396, "learning_rate": 1.6722323175151678e-06, "loss": 0.0597, "step": 680550 }, { "epoch": 6.69, "grad_norm": 8.269831657409668, "learning_rate": 1.6721081950609195e-06, "loss": 0.0268, "step": 680575 }, { "epoch": 6.69, "grad_norm": 2.4425418376922607, "learning_rate": 1.6719840726066711e-06, "loss": 0.0828, "step": 680600 }, { "epoch": 6.69, "grad_norm": 10.942980766296387, "learning_rate": 1.6718599501524223e-06, "loss": 0.0212, "step": 680625 }, { "epoch": 6.69, "grad_norm": 3.7532997131347656, "learning_rate": 1.671735827698174e-06, "loss": 0.0683, "step": 680650 }, { "epoch": 6.69, "grad_norm": 5.498628616333008, "learning_rate": 1.6716117052439256e-06, "loss": 0.0126, "step": 680675 }, { "epoch": 6.69, "grad_norm": 2.9866697788238525, "learning_rate": 1.671487582789677e-06, "loss": 0.0878, "step": 680700 }, { "epoch": 6.69, "grad_norm": 5.391308784484863, "learning_rate": 1.6713634603354287e-06, "loss": 0.0162, "step": 680725 }, { "epoch": 6.69, "grad_norm": 2.5753817558288574, "learning_rate": 1.6712393378811803e-06, "loss": 0.068, "step": 680750 }, { "epoch": 6.69, "grad_norm": 0.8213198781013489, "learning_rate": 1.6711152154269317e-06, "loss": 0.0256, "step": 680775 }, { "epoch": 6.69, "grad_norm": 0.005887779872864485, "learning_rate": 1.6709910929726834e-06, "loss": 0.0819, "step": 680800 }, { "epoch": 6.69, "grad_norm": 12.938850402832031, "learning_rate": 1.670866970518435e-06, "loss": 0.0165, "step": 680825 }, { "epoch": 6.69, "grad_norm": 2.1497490406036377, "learning_rate": 1.6707428480641862e-06, "loss": 0.0728, "step": 680850 }, { "epoch": 6.69, "grad_norm": 10.958934783935547, "learning_rate": 1.6706187256099378e-06, "loss": 0.0184, "step": 680875 }, { "epoch": 6.69, "grad_norm": 0.7115979194641113, "learning_rate": 1.6704946031556893e-06, "loss": 0.0663, "step": 680900 }, { "epoch": 6.69, "grad_norm": 10.905889511108398, "learning_rate": 1.6703704807014409e-06, "loss": 0.0198, "step": 680925 }, { "epoch": 6.7, "grad_norm": 9.118010520935059, "learning_rate": 1.6702463582471925e-06, "loss": 0.0868, "step": 680950 }, { "epoch": 6.7, "grad_norm": 7.57177209854126, "learning_rate": 1.670122235792944e-06, "loss": 0.0286, "step": 680975 }, { "epoch": 6.7, "grad_norm": 5.055151462554932, "learning_rate": 1.6699981133386956e-06, "loss": 0.0807, "step": 681000 }, { "epoch": 6.7, "grad_norm": 11.245821952819824, "learning_rate": 1.6698739908844472e-06, "loss": 0.0324, "step": 681025 }, { "epoch": 6.7, "grad_norm": 3.5956695079803467, "learning_rate": 1.6697498684301986e-06, "loss": 0.0677, "step": 681050 }, { "epoch": 6.7, "grad_norm": 0.13632889091968536, "learning_rate": 1.6696257459759503e-06, "loss": 0.0317, "step": 681075 }, { "epoch": 6.7, "grad_norm": 1.2107466459274292, "learning_rate": 1.669501623521702e-06, "loss": 0.0745, "step": 681100 }, { "epoch": 6.7, "grad_norm": 4.632457256317139, "learning_rate": 1.6693775010674531e-06, "loss": 0.0248, "step": 681125 }, { "epoch": 6.7, "grad_norm": 2.7874815464019775, "learning_rate": 1.6692533786132048e-06, "loss": 0.0678, "step": 681150 }, { "epoch": 6.7, "grad_norm": 1.040181279182434, "learning_rate": 1.6691292561589564e-06, "loss": 0.0244, "step": 681175 }, { "epoch": 6.7, "grad_norm": 3.377138614654541, "learning_rate": 1.6690100986028778e-06, "loss": 0.0697, "step": 681200 }, { "epoch": 6.7, "grad_norm": 5.166538238525391, "learning_rate": 1.6688859761486293e-06, "loss": 0.0204, "step": 681225 }, { "epoch": 6.7, "grad_norm": 2.404233694076538, "learning_rate": 1.668761853694381e-06, "loss": 0.0661, "step": 681250 }, { "epoch": 6.7, "grad_norm": 5.811405658721924, "learning_rate": 1.6686377312401325e-06, "loss": 0.0231, "step": 681275 }, { "epoch": 6.7, "grad_norm": 1.3764607906341553, "learning_rate": 1.668513608785884e-06, "loss": 0.0883, "step": 681300 }, { "epoch": 6.7, "grad_norm": 2.8516063690185547, "learning_rate": 1.6683894863316356e-06, "loss": 0.0157, "step": 681325 }, { "epoch": 6.7, "grad_norm": 0.45008739829063416, "learning_rate": 1.6682653638773872e-06, "loss": 0.0809, "step": 681350 }, { "epoch": 6.7, "grad_norm": 1.7606781721115112, "learning_rate": 1.6681412414231384e-06, "loss": 0.0284, "step": 681375 }, { "epoch": 6.7, "grad_norm": 0.026728855445981026, "learning_rate": 1.66801711896889e-06, "loss": 0.0673, "step": 681400 }, { "epoch": 6.7, "grad_norm": 15.42542552947998, "learning_rate": 1.6678929965146415e-06, "loss": 0.0337, "step": 681425 }, { "epoch": 6.7, "grad_norm": 0.9964784979820251, "learning_rate": 1.6677688740603931e-06, "loss": 0.0786, "step": 681450 }, { "epoch": 6.7, "grad_norm": 2.331629514694214, "learning_rate": 1.6676447516061448e-06, "loss": 0.012, "step": 681475 }, { "epoch": 6.7, "grad_norm": 2.8660597801208496, "learning_rate": 1.6675206291518962e-06, "loss": 0.0791, "step": 681500 }, { "epoch": 6.7, "grad_norm": 5.215603828430176, "learning_rate": 1.6673965066976478e-06, "loss": 0.0348, "step": 681525 }, { "epoch": 6.7, "grad_norm": 1.7780835628509521, "learning_rate": 1.6672723842433995e-06, "loss": 0.0739, "step": 681550 }, { "epoch": 6.7, "grad_norm": 4.796206474304199, "learning_rate": 1.6671482617891507e-06, "loss": 0.0157, "step": 681575 }, { "epoch": 6.7, "grad_norm": 5.4523024559021, "learning_rate": 1.6670241393349023e-06, "loss": 0.1022, "step": 681600 }, { "epoch": 6.7, "grad_norm": 9.071488380432129, "learning_rate": 1.666900016880654e-06, "loss": 0.0208, "step": 681625 }, { "epoch": 6.7, "grad_norm": 1.216083288192749, "learning_rate": 1.6667758944264054e-06, "loss": 0.0819, "step": 681650 }, { "epoch": 6.7, "grad_norm": 16.766080856323242, "learning_rate": 1.666651771972157e-06, "loss": 0.0223, "step": 681675 }, { "epoch": 6.7, "grad_norm": 4.595904350280762, "learning_rate": 1.6665276495179086e-06, "loss": 0.0995, "step": 681700 }, { "epoch": 6.7, "grad_norm": 12.292693138122559, "learning_rate": 1.66640352706366e-06, "loss": 0.0296, "step": 681725 }, { "epoch": 6.7, "grad_norm": 0.39185646176338196, "learning_rate": 1.6662794046094117e-06, "loss": 0.0684, "step": 681750 }, { "epoch": 6.7, "grad_norm": 7.520886421203613, "learning_rate": 1.6661552821551633e-06, "loss": 0.0352, "step": 681775 }, { "epoch": 6.7, "grad_norm": 5.032987594604492, "learning_rate": 1.6660311597009145e-06, "loss": 0.0723, "step": 681800 }, { "epoch": 6.7, "grad_norm": 9.909835815429688, "learning_rate": 1.6659070372466662e-06, "loss": 0.0269, "step": 681825 }, { "epoch": 6.7, "grad_norm": 1.335146188735962, "learning_rate": 1.6657829147924178e-06, "loss": 0.0624, "step": 681850 }, { "epoch": 6.7, "grad_norm": 12.426833152770996, "learning_rate": 1.6656587923381692e-06, "loss": 0.0316, "step": 681875 }, { "epoch": 6.7, "grad_norm": 4.00663423538208, "learning_rate": 1.6655346698839209e-06, "loss": 0.0829, "step": 681900 }, { "epoch": 6.7, "grad_norm": 15.895898818969727, "learning_rate": 1.6654105474296723e-06, "loss": 0.0164, "step": 681925 }, { "epoch": 6.71, "grad_norm": 0.2303258329629898, "learning_rate": 1.665286424975424e-06, "loss": 0.0634, "step": 681950 }, { "epoch": 6.71, "grad_norm": 11.688129425048828, "learning_rate": 1.6651623025211756e-06, "loss": 0.0197, "step": 681975 }, { "epoch": 6.71, "grad_norm": 3.99362850189209, "learning_rate": 1.6650381800669268e-06, "loss": 0.0674, "step": 682000 }, { "epoch": 6.71, "grad_norm": 4.511620044708252, "learning_rate": 1.6649140576126784e-06, "loss": 0.0226, "step": 682025 }, { "epoch": 6.71, "grad_norm": 1.2382498979568481, "learning_rate": 1.66478993515843e-06, "loss": 0.0788, "step": 682050 }, { "epoch": 6.71, "grad_norm": 10.245896339416504, "learning_rate": 1.6646658127041815e-06, "loss": 0.0221, "step": 682075 }, { "epoch": 6.71, "grad_norm": 4.15673828125, "learning_rate": 1.664541690249933e-06, "loss": 0.1047, "step": 682100 }, { "epoch": 6.71, "grad_norm": 0.639319658279419, "learning_rate": 1.6644175677956847e-06, "loss": 0.0148, "step": 682125 }, { "epoch": 6.71, "grad_norm": 3.1912682056427, "learning_rate": 1.6642934453414362e-06, "loss": 0.062, "step": 682150 }, { "epoch": 6.71, "grad_norm": 11.10041332244873, "learning_rate": 1.6641693228871878e-06, "loss": 0.0399, "step": 682175 }, { "epoch": 6.71, "grad_norm": 1.0987516641616821, "learning_rate": 1.6640452004329394e-06, "loss": 0.0606, "step": 682200 }, { "epoch": 6.71, "grad_norm": 13.190505027770996, "learning_rate": 1.6639210779786906e-06, "loss": 0.0169, "step": 682225 }, { "epoch": 6.71, "grad_norm": 2.4784960746765137, "learning_rate": 1.6637969555244423e-06, "loss": 0.0691, "step": 682250 }, { "epoch": 6.71, "grad_norm": 10.973413467407227, "learning_rate": 1.663672833070194e-06, "loss": 0.0318, "step": 682275 }, { "epoch": 6.71, "grad_norm": 5.085402965545654, "learning_rate": 1.6635487106159453e-06, "loss": 0.089, "step": 682300 }, { "epoch": 6.71, "grad_norm": 4.222289085388184, "learning_rate": 1.663424588161697e-06, "loss": 0.0158, "step": 682325 }, { "epoch": 6.71, "grad_norm": 9.429658889770508, "learning_rate": 1.6633004657074484e-06, "loss": 0.1035, "step": 682350 }, { "epoch": 6.71, "grad_norm": 14.023848533630371, "learning_rate": 1.6631763432532e-06, "loss": 0.031, "step": 682375 }, { "epoch": 6.71, "grad_norm": 0.3940577208995819, "learning_rate": 1.6630522207989517e-06, "loss": 0.0839, "step": 682400 }, { "epoch": 6.71, "grad_norm": 3.7232778072357178, "learning_rate": 1.6629280983447029e-06, "loss": 0.0268, "step": 682425 }, { "epoch": 6.71, "grad_norm": 2.49838924407959, "learning_rate": 1.6628039758904545e-06, "loss": 0.0682, "step": 682450 }, { "epoch": 6.71, "grad_norm": 19.44001579284668, "learning_rate": 1.6626798534362061e-06, "loss": 0.0298, "step": 682475 }, { "epoch": 6.71, "grad_norm": 4.412814140319824, "learning_rate": 1.6625557309819576e-06, "loss": 0.0956, "step": 682500 }, { "epoch": 6.71, "grad_norm": 5.962761878967285, "learning_rate": 1.6624316085277092e-06, "loss": 0.021, "step": 682525 }, { "epoch": 6.71, "grad_norm": 0.40053048729896545, "learning_rate": 1.6623074860734608e-06, "loss": 0.0902, "step": 682550 }, { "epoch": 6.71, "grad_norm": 8.634879112243652, "learning_rate": 1.6621833636192123e-06, "loss": 0.0268, "step": 682575 }, { "epoch": 6.71, "grad_norm": 1.8365205526351929, "learning_rate": 1.662059241164964e-06, "loss": 0.0758, "step": 682600 }, { "epoch": 6.71, "grad_norm": 16.054237365722656, "learning_rate": 1.6619351187107155e-06, "loss": 0.0179, "step": 682625 }, { "epoch": 6.71, "grad_norm": 6.488502502441406, "learning_rate": 1.661810996256467e-06, "loss": 0.1093, "step": 682650 }, { "epoch": 6.71, "grad_norm": 5.278752326965332, "learning_rate": 1.6616868738022186e-06, "loss": 0.0135, "step": 682675 }, { "epoch": 6.71, "grad_norm": 0.05749412253499031, "learning_rate": 1.6615627513479702e-06, "loss": 0.0563, "step": 682700 }, { "epoch": 6.71, "grad_norm": 9.08558177947998, "learning_rate": 1.6614386288937214e-06, "loss": 0.0341, "step": 682725 }, { "epoch": 6.71, "grad_norm": 7.819126129150391, "learning_rate": 1.661314506439473e-06, "loss": 0.0965, "step": 682750 }, { "epoch": 6.71, "grad_norm": 7.712095737457275, "learning_rate": 1.6611903839852245e-06, "loss": 0.0222, "step": 682775 }, { "epoch": 6.71, "grad_norm": 3.8106181621551514, "learning_rate": 1.6610662615309761e-06, "loss": 0.0476, "step": 682800 }, { "epoch": 6.71, "grad_norm": 1.9166656732559204, "learning_rate": 1.6609421390767278e-06, "loss": 0.0181, "step": 682825 }, { "epoch": 6.71, "grad_norm": 9.156437873840332, "learning_rate": 1.6608180166224792e-06, "loss": 0.0857, "step": 682850 }, { "epoch": 6.71, "grad_norm": 0.20151321589946747, "learning_rate": 1.6606938941682308e-06, "loss": 0.0196, "step": 682875 }, { "epoch": 6.71, "grad_norm": 0.07763426005840302, "learning_rate": 1.6605697717139825e-06, "loss": 0.0729, "step": 682900 }, { "epoch": 6.71, "grad_norm": 5.891745090484619, "learning_rate": 1.6604456492597337e-06, "loss": 0.017, "step": 682925 }, { "epoch": 6.71, "grad_norm": 2.727383613586426, "learning_rate": 1.6603215268054853e-06, "loss": 0.0664, "step": 682950 }, { "epoch": 6.72, "grad_norm": 9.328913688659668, "learning_rate": 1.660197404351237e-06, "loss": 0.0157, "step": 682975 }, { "epoch": 6.72, "grad_norm": 0.4942895472049713, "learning_rate": 1.6600732818969884e-06, "loss": 0.0519, "step": 683000 }, { "epoch": 6.72, "grad_norm": 0.8705186247825623, "learning_rate": 1.65994915944274e-06, "loss": 0.0241, "step": 683025 }, { "epoch": 6.72, "grad_norm": 7.3742194175720215, "learning_rate": 1.6598250369884916e-06, "loss": 0.0629, "step": 683050 }, { "epoch": 6.72, "grad_norm": 19.361572265625, "learning_rate": 1.659700914534243e-06, "loss": 0.0316, "step": 683075 }, { "epoch": 6.72, "grad_norm": 1.032612681388855, "learning_rate": 1.6595767920799947e-06, "loss": 0.0609, "step": 683100 }, { "epoch": 6.72, "grad_norm": 16.515260696411133, "learning_rate": 1.6594526696257463e-06, "loss": 0.0371, "step": 683125 }, { "epoch": 6.72, "grad_norm": 3.001492500305176, "learning_rate": 1.6593285471714975e-06, "loss": 0.0872, "step": 683150 }, { "epoch": 6.72, "grad_norm": 7.636008262634277, "learning_rate": 1.6592044247172492e-06, "loss": 0.0225, "step": 683175 }, { "epoch": 6.72, "grad_norm": 0.4289184510707855, "learning_rate": 1.6590803022630006e-06, "loss": 0.053, "step": 683200 }, { "epoch": 6.72, "grad_norm": 9.509406089782715, "learning_rate": 1.6589561798087522e-06, "loss": 0.0342, "step": 683225 }, { "epoch": 6.72, "grad_norm": 5.18288516998291, "learning_rate": 1.6588320573545039e-06, "loss": 0.085, "step": 683250 }, { "epoch": 6.72, "grad_norm": 4.695738792419434, "learning_rate": 1.6587079349002553e-06, "loss": 0.0219, "step": 683275 }, { "epoch": 6.72, "grad_norm": 2.3109290599823, "learning_rate": 1.658583812446007e-06, "loss": 0.0905, "step": 683300 }, { "epoch": 6.72, "grad_norm": 9.292299270629883, "learning_rate": 1.6584596899917586e-06, "loss": 0.0335, "step": 683325 }, { "epoch": 6.72, "grad_norm": 7.160823345184326, "learning_rate": 1.6583355675375098e-06, "loss": 0.0975, "step": 683350 }, { "epoch": 6.72, "grad_norm": 0.9741831421852112, "learning_rate": 1.6582114450832614e-06, "loss": 0.0269, "step": 683375 }, { "epoch": 6.72, "grad_norm": 0.7245192527770996, "learning_rate": 1.658087322629013e-06, "loss": 0.064, "step": 683400 }, { "epoch": 6.72, "grad_norm": 4.997773170471191, "learning_rate": 1.6579632001747645e-06, "loss": 0.0287, "step": 683425 }, { "epoch": 6.72, "grad_norm": 0.04209000989794731, "learning_rate": 1.657839077720516e-06, "loss": 0.1101, "step": 683450 }, { "epoch": 6.72, "grad_norm": 6.505993366241455, "learning_rate": 1.6577149552662677e-06, "loss": 0.0208, "step": 683475 }, { "epoch": 6.72, "grad_norm": 3.311086654663086, "learning_rate": 1.6575908328120192e-06, "loss": 0.0745, "step": 683500 }, { "epoch": 6.72, "grad_norm": 13.58708381652832, "learning_rate": 1.6574667103577708e-06, "loss": 0.0197, "step": 683525 }, { "epoch": 6.72, "grad_norm": 1.4715193510055542, "learning_rate": 1.6573425879035224e-06, "loss": 0.0823, "step": 683550 }, { "epoch": 6.72, "grad_norm": 7.805494785308838, "learning_rate": 1.6572184654492736e-06, "loss": 0.019, "step": 683575 }, { "epoch": 6.72, "grad_norm": 3.816849946975708, "learning_rate": 1.6570943429950253e-06, "loss": 0.0649, "step": 683600 }, { "epoch": 6.72, "grad_norm": 23.172685623168945, "learning_rate": 1.6569702205407767e-06, "loss": 0.0315, "step": 683625 }, { "epoch": 6.72, "grad_norm": 2.3153653144836426, "learning_rate": 1.6568460980865283e-06, "loss": 0.0805, "step": 683650 }, { "epoch": 6.72, "grad_norm": 8.257074356079102, "learning_rate": 1.65672197563228e-06, "loss": 0.0135, "step": 683675 }, { "epoch": 6.72, "grad_norm": 4.07957124710083, "learning_rate": 1.6565978531780314e-06, "loss": 0.0882, "step": 683700 }, { "epoch": 6.72, "grad_norm": 0.939885675907135, "learning_rate": 1.656473730723783e-06, "loss": 0.0388, "step": 683725 }, { "epoch": 6.72, "grad_norm": 1.332030177116394, "learning_rate": 1.6563496082695347e-06, "loss": 0.0943, "step": 683750 }, { "epoch": 6.72, "grad_norm": 14.722288131713867, "learning_rate": 1.6562254858152859e-06, "loss": 0.0222, "step": 683775 }, { "epoch": 6.72, "grad_norm": 4.034804344177246, "learning_rate": 1.6561013633610375e-06, "loss": 0.0853, "step": 683800 }, { "epoch": 6.72, "grad_norm": 10.104656219482422, "learning_rate": 1.6559772409067891e-06, "loss": 0.02, "step": 683825 }, { "epoch": 6.72, "grad_norm": 2.1051876544952393, "learning_rate": 1.6558531184525406e-06, "loss": 0.0684, "step": 683850 }, { "epoch": 6.72, "grad_norm": 2.9528846740722656, "learning_rate": 1.6557289959982922e-06, "loss": 0.0156, "step": 683875 }, { "epoch": 6.72, "grad_norm": 1.2601370811462402, "learning_rate": 1.6556048735440438e-06, "loss": 0.0746, "step": 683900 }, { "epoch": 6.72, "grad_norm": 0.2325662076473236, "learning_rate": 1.6554807510897953e-06, "loss": 0.0216, "step": 683925 }, { "epoch": 6.72, "grad_norm": 3.504847288131714, "learning_rate": 1.6553566286355469e-06, "loss": 0.0824, "step": 683950 }, { "epoch": 6.72, "grad_norm": 9.7081298828125, "learning_rate": 1.6552325061812985e-06, "loss": 0.0262, "step": 683975 }, { "epoch": 6.73, "grad_norm": 3.3135933876037598, "learning_rate": 1.65510838372705e-06, "loss": 0.0725, "step": 684000 }, { "epoch": 6.73, "grad_norm": 7.1392903327941895, "learning_rate": 1.6549842612728016e-06, "loss": 0.0169, "step": 684025 }, { "epoch": 6.73, "grad_norm": 0.8736053109169006, "learning_rate": 1.6548601388185528e-06, "loss": 0.0796, "step": 684050 }, { "epoch": 6.73, "grad_norm": 4.321681976318359, "learning_rate": 1.6547360163643044e-06, "loss": 0.027, "step": 684075 }, { "epoch": 6.73, "grad_norm": 0.31564581394195557, "learning_rate": 1.654611893910056e-06, "loss": 0.0734, "step": 684100 }, { "epoch": 6.73, "grad_norm": 3.2120466232299805, "learning_rate": 1.6544877714558075e-06, "loss": 0.0147, "step": 684125 }, { "epoch": 6.73, "grad_norm": 4.693608283996582, "learning_rate": 1.6543636490015591e-06, "loss": 0.0714, "step": 684150 }, { "epoch": 6.73, "grad_norm": 4.023394584655762, "learning_rate": 1.6542395265473108e-06, "loss": 0.0196, "step": 684175 }, { "epoch": 6.73, "grad_norm": 0.9356736540794373, "learning_rate": 1.6541154040930622e-06, "loss": 0.071, "step": 684200 }, { "epoch": 6.73, "grad_norm": 6.236721038818359, "learning_rate": 1.6539912816388138e-06, "loss": 0.0248, "step": 684225 }, { "epoch": 6.73, "grad_norm": 0.12253942340612411, "learning_rate": 1.6538671591845655e-06, "loss": 0.0691, "step": 684250 }, { "epoch": 6.73, "grad_norm": 7.2427239418029785, "learning_rate": 1.6537430367303167e-06, "loss": 0.0215, "step": 684275 }, { "epoch": 6.73, "grad_norm": 3.1620566844940186, "learning_rate": 1.6536238791742381e-06, "loss": 0.0935, "step": 684300 }, { "epoch": 6.73, "grad_norm": 5.447587966918945, "learning_rate": 1.6534997567199897e-06, "loss": 0.0195, "step": 684325 }, { "epoch": 6.73, "grad_norm": 5.741435527801514, "learning_rate": 1.6533756342657414e-06, "loss": 0.0856, "step": 684350 }, { "epoch": 6.73, "grad_norm": 0.7821307182312012, "learning_rate": 1.6532515118114928e-06, "loss": 0.0123, "step": 684375 }, { "epoch": 6.73, "grad_norm": 3.3779892921447754, "learning_rate": 1.6531273893572444e-06, "loss": 0.0796, "step": 684400 }, { "epoch": 6.73, "grad_norm": 25.25537872314453, "learning_rate": 1.653003266902996e-06, "loss": 0.032, "step": 684425 }, { "epoch": 6.73, "grad_norm": 0.7002432942390442, "learning_rate": 1.6528791444487475e-06, "loss": 0.0603, "step": 684450 }, { "epoch": 6.73, "grad_norm": 4.689559459686279, "learning_rate": 1.6527550219944991e-06, "loss": 0.0198, "step": 684475 }, { "epoch": 6.73, "grad_norm": 10.236876487731934, "learning_rate": 1.6526308995402508e-06, "loss": 0.0593, "step": 684500 }, { "epoch": 6.73, "grad_norm": 6.576087951660156, "learning_rate": 1.652506777086002e-06, "loss": 0.0377, "step": 684525 }, { "epoch": 6.73, "grad_norm": 0.3236995339393616, "learning_rate": 1.6523826546317536e-06, "loss": 0.0567, "step": 684550 }, { "epoch": 6.73, "grad_norm": 0.6815367937088013, "learning_rate": 1.652258532177505e-06, "loss": 0.0469, "step": 684575 }, { "epoch": 6.73, "grad_norm": 3.0607051849365234, "learning_rate": 1.6521344097232567e-06, "loss": 0.098, "step": 684600 }, { "epoch": 6.73, "grad_norm": 9.787720680236816, "learning_rate": 1.6520102872690083e-06, "loss": 0.017, "step": 684625 }, { "epoch": 6.73, "grad_norm": 7.177537441253662, "learning_rate": 1.6518861648147597e-06, "loss": 0.0916, "step": 684650 }, { "epoch": 6.73, "grad_norm": 9.155383110046387, "learning_rate": 1.6517620423605114e-06, "loss": 0.0313, "step": 684675 }, { "epoch": 6.73, "grad_norm": 3.6063079833984375, "learning_rate": 1.651637919906263e-06, "loss": 0.0637, "step": 684700 }, { "epoch": 6.73, "grad_norm": 1.6524226665496826, "learning_rate": 1.6515137974520142e-06, "loss": 0.0221, "step": 684725 }, { "epoch": 6.73, "grad_norm": 5.743736267089844, "learning_rate": 1.6513896749977658e-06, "loss": 0.0657, "step": 684750 }, { "epoch": 6.73, "grad_norm": 2.3125879764556885, "learning_rate": 1.6512655525435175e-06, "loss": 0.0226, "step": 684775 }, { "epoch": 6.73, "grad_norm": 2.775810480117798, "learning_rate": 1.651141430089269e-06, "loss": 0.0672, "step": 684800 }, { "epoch": 6.73, "grad_norm": 6.1974687576293945, "learning_rate": 1.6510173076350205e-06, "loss": 0.0144, "step": 684825 }, { "epoch": 6.73, "grad_norm": 5.478618144989014, "learning_rate": 1.6508931851807722e-06, "loss": 0.0725, "step": 684850 }, { "epoch": 6.73, "grad_norm": 12.91516399383545, "learning_rate": 1.6507690627265236e-06, "loss": 0.0199, "step": 684875 }, { "epoch": 6.73, "grad_norm": 1.9798551797866821, "learning_rate": 1.6506449402722752e-06, "loss": 0.0704, "step": 684900 }, { "epoch": 6.73, "grad_norm": 4.262415409088135, "learning_rate": 1.6505208178180269e-06, "loss": 0.0108, "step": 684925 }, { "epoch": 6.73, "grad_norm": 8.561869621276855, "learning_rate": 1.650396695363778e-06, "loss": 0.0751, "step": 684950 }, { "epoch": 6.73, "grad_norm": 0.41551515460014343, "learning_rate": 1.6502725729095297e-06, "loss": 0.0375, "step": 684975 }, { "epoch": 6.74, "grad_norm": 1.742174506187439, "learning_rate": 1.6501484504552811e-06, "loss": 0.0734, "step": 685000 }, { "epoch": 6.74, "grad_norm": 11.864574432373047, "learning_rate": 1.6500243280010328e-06, "loss": 0.0161, "step": 685025 }, { "epoch": 6.74, "grad_norm": 1.1462160348892212, "learning_rate": 1.6499002055467844e-06, "loss": 0.0698, "step": 685050 }, { "epoch": 6.74, "grad_norm": 12.000062942504883, "learning_rate": 1.6497760830925358e-06, "loss": 0.026, "step": 685075 }, { "epoch": 6.74, "grad_norm": 6.354269504547119, "learning_rate": 1.6496519606382875e-06, "loss": 0.0874, "step": 685100 }, { "epoch": 6.74, "grad_norm": 1.256982684135437, "learning_rate": 1.649527838184039e-06, "loss": 0.0244, "step": 685125 }, { "epoch": 6.74, "grad_norm": 0.6858804821968079, "learning_rate": 1.6494037157297903e-06, "loss": 0.0643, "step": 685150 }, { "epoch": 6.74, "grad_norm": 10.958889961242676, "learning_rate": 1.649279593275542e-06, "loss": 0.0243, "step": 685175 }, { "epoch": 6.74, "grad_norm": 4.151858329772949, "learning_rate": 1.6491554708212936e-06, "loss": 0.1024, "step": 685200 }, { "epoch": 6.74, "grad_norm": 8.439026832580566, "learning_rate": 1.649031348367045e-06, "loss": 0.0201, "step": 685225 }, { "epoch": 6.74, "grad_norm": 2.7406585216522217, "learning_rate": 1.6489072259127966e-06, "loss": 0.0757, "step": 685250 }, { "epoch": 6.74, "grad_norm": 8.466950416564941, "learning_rate": 1.6487831034585483e-06, "loss": 0.0242, "step": 685275 }, { "epoch": 6.74, "grad_norm": 8.278154373168945, "learning_rate": 1.6486589810042997e-06, "loss": 0.0789, "step": 685300 }, { "epoch": 6.74, "grad_norm": 6.985311031341553, "learning_rate": 1.6485348585500513e-06, "loss": 0.0223, "step": 685325 }, { "epoch": 6.74, "grad_norm": 4.346076965332031, "learning_rate": 1.648410736095803e-06, "loss": 0.0629, "step": 685350 }, { "epoch": 6.74, "grad_norm": 15.47170639038086, "learning_rate": 1.6482866136415544e-06, "loss": 0.0244, "step": 685375 }, { "epoch": 6.74, "grad_norm": 2.763808250427246, "learning_rate": 1.648162491187306e-06, "loss": 0.0628, "step": 685400 }, { "epoch": 6.74, "grad_norm": 11.181102752685547, "learning_rate": 1.6480383687330572e-06, "loss": 0.0184, "step": 685425 }, { "epoch": 6.74, "grad_norm": 3.0931789875030518, "learning_rate": 1.6479142462788089e-06, "loss": 0.0972, "step": 685450 }, { "epoch": 6.74, "grad_norm": 6.810793399810791, "learning_rate": 1.6477901238245605e-06, "loss": 0.0346, "step": 685475 }, { "epoch": 6.74, "grad_norm": 0.043067313730716705, "learning_rate": 1.647666001370312e-06, "loss": 0.0546, "step": 685500 }, { "epoch": 6.74, "grad_norm": 1.4370450973510742, "learning_rate": 1.6475418789160636e-06, "loss": 0.0232, "step": 685525 }, { "epoch": 6.74, "grad_norm": 2.116342306137085, "learning_rate": 1.6474177564618152e-06, "loss": 0.0901, "step": 685550 }, { "epoch": 6.74, "grad_norm": 2.891669273376465, "learning_rate": 1.6472936340075666e-06, "loss": 0.0279, "step": 685575 }, { "epoch": 6.74, "grad_norm": 3.1654396057128906, "learning_rate": 1.6471695115533183e-06, "loss": 0.0858, "step": 685600 }, { "epoch": 6.74, "grad_norm": 8.709150314331055, "learning_rate": 1.64704538909907e-06, "loss": 0.0234, "step": 685625 }, { "epoch": 6.74, "grad_norm": 1.800400972366333, "learning_rate": 1.6469212666448211e-06, "loss": 0.0875, "step": 685650 }, { "epoch": 6.74, "grad_norm": 12.533344268798828, "learning_rate": 1.6467971441905727e-06, "loss": 0.0314, "step": 685675 }, { "epoch": 6.74, "grad_norm": 2.161979913711548, "learning_rate": 1.6466730217363244e-06, "loss": 0.0701, "step": 685700 }, { "epoch": 6.74, "grad_norm": 10.431004524230957, "learning_rate": 1.6465488992820758e-06, "loss": 0.0254, "step": 685725 }, { "epoch": 6.74, "grad_norm": 0.3627760112285614, "learning_rate": 1.6464247768278274e-06, "loss": 0.0683, "step": 685750 }, { "epoch": 6.74, "grad_norm": 0.18685053288936615, "learning_rate": 1.646300654373579e-06, "loss": 0.018, "step": 685775 }, { "epoch": 6.74, "grad_norm": 2.662152051925659, "learning_rate": 1.6461765319193305e-06, "loss": 0.0911, "step": 685800 }, { "epoch": 6.74, "grad_norm": 1.2318997383117676, "learning_rate": 1.6460524094650821e-06, "loss": 0.0129, "step": 685825 }, { "epoch": 6.74, "grad_norm": 3.8024344444274902, "learning_rate": 1.6459282870108338e-06, "loss": 0.0814, "step": 685850 }, { "epoch": 6.74, "grad_norm": 14.330939292907715, "learning_rate": 1.645804164556585e-06, "loss": 0.0172, "step": 685875 }, { "epoch": 6.74, "grad_norm": 1.6153244972229004, "learning_rate": 1.6456800421023366e-06, "loss": 0.0733, "step": 685900 }, { "epoch": 6.74, "grad_norm": 2.2455193996429443, "learning_rate": 1.645555919648088e-06, "loss": 0.0176, "step": 685925 }, { "epoch": 6.74, "grad_norm": 2.1219892501831055, "learning_rate": 1.6454317971938397e-06, "loss": 0.062, "step": 685950 }, { "epoch": 6.74, "grad_norm": 14.389238357543945, "learning_rate": 1.6453076747395913e-06, "loss": 0.0204, "step": 685975 }, { "epoch": 6.74, "grad_norm": 7.9507036209106445, "learning_rate": 1.6451835522853427e-06, "loss": 0.0917, "step": 686000 }, { "epoch": 6.75, "grad_norm": 0.31563812494277954, "learning_rate": 1.6450594298310944e-06, "loss": 0.0386, "step": 686025 }, { "epoch": 6.75, "grad_norm": 2.253833055496216, "learning_rate": 1.644935307376846e-06, "loss": 0.0871, "step": 686050 }, { "epoch": 6.75, "grad_norm": 11.951864242553711, "learning_rate": 1.6448111849225972e-06, "loss": 0.0283, "step": 686075 }, { "epoch": 6.75, "grad_norm": 1.1353015899658203, "learning_rate": 1.6446870624683488e-06, "loss": 0.0966, "step": 686100 }, { "epoch": 6.75, "grad_norm": 7.405513763427734, "learning_rate": 1.6445629400141005e-06, "loss": 0.0309, "step": 686125 }, { "epoch": 6.75, "grad_norm": 3.2533633708953857, "learning_rate": 1.644438817559852e-06, "loss": 0.0843, "step": 686150 }, { "epoch": 6.75, "grad_norm": 1.9616066217422485, "learning_rate": 1.6443146951056035e-06, "loss": 0.0302, "step": 686175 }, { "epoch": 6.75, "grad_norm": 1.6599657535552979, "learning_rate": 1.6441905726513552e-06, "loss": 0.0691, "step": 686200 }, { "epoch": 6.75, "grad_norm": 16.77489471435547, "learning_rate": 1.6440664501971066e-06, "loss": 0.0249, "step": 686225 }, { "epoch": 6.75, "grad_norm": 4.819264888763428, "learning_rate": 1.6439423277428582e-06, "loss": 0.0796, "step": 686250 }, { "epoch": 6.75, "grad_norm": 11.211082458496094, "learning_rate": 1.6438182052886099e-06, "loss": 0.0291, "step": 686275 }, { "epoch": 6.75, "grad_norm": 8.717096328735352, "learning_rate": 1.643694082834361e-06, "loss": 0.0739, "step": 686300 }, { "epoch": 6.75, "grad_norm": 8.676280975341797, "learning_rate": 1.6435699603801127e-06, "loss": 0.0304, "step": 686325 }, { "epoch": 6.75, "grad_norm": 0.010733052156865597, "learning_rate": 1.6434458379258641e-06, "loss": 0.0711, "step": 686350 }, { "epoch": 6.75, "grad_norm": 5.936467170715332, "learning_rate": 1.6433217154716158e-06, "loss": 0.0197, "step": 686375 }, { "epoch": 6.75, "grad_norm": 3.2406704425811768, "learning_rate": 1.6431975930173674e-06, "loss": 0.0654, "step": 686400 }, { "epoch": 6.75, "grad_norm": 19.31289291381836, "learning_rate": 1.6430734705631188e-06, "loss": 0.0184, "step": 686425 }, { "epoch": 6.75, "grad_norm": 1.2749232053756714, "learning_rate": 1.6429493481088705e-06, "loss": 0.101, "step": 686450 }, { "epoch": 6.75, "grad_norm": 2.434375286102295, "learning_rate": 1.642825225654622e-06, "loss": 0.0155, "step": 686475 }, { "epoch": 6.75, "grad_norm": 1.2891629934310913, "learning_rate": 1.6427011032003733e-06, "loss": 0.0892, "step": 686500 }, { "epoch": 6.75, "grad_norm": 10.136670112609863, "learning_rate": 1.642576980746125e-06, "loss": 0.0225, "step": 686525 }, { "epoch": 6.75, "grad_norm": 11.939311027526855, "learning_rate": 1.6424578231900464e-06, "loss": 0.085, "step": 686550 }, { "epoch": 6.75, "grad_norm": 5.641979694366455, "learning_rate": 1.642333700735798e-06, "loss": 0.019, "step": 686575 }, { "epoch": 6.75, "grad_norm": 0.5769978761672974, "learning_rate": 1.6422095782815495e-06, "loss": 0.1009, "step": 686600 }, { "epoch": 6.75, "grad_norm": 8.890376091003418, "learning_rate": 1.642085455827301e-06, "loss": 0.0354, "step": 686625 }, { "epoch": 6.75, "grad_norm": 5.6428399085998535, "learning_rate": 1.6419613333730527e-06, "loss": 0.0734, "step": 686650 }, { "epoch": 6.75, "grad_norm": 1.3643847703933716, "learning_rate": 1.6418372109188041e-06, "loss": 0.022, "step": 686675 }, { "epoch": 6.75, "grad_norm": 3.506326198577881, "learning_rate": 1.6417130884645558e-06, "loss": 0.0614, "step": 686700 }, { "epoch": 6.75, "grad_norm": 11.089332580566406, "learning_rate": 1.6415889660103074e-06, "loss": 0.0178, "step": 686725 }, { "epoch": 6.75, "grad_norm": 3.151294708251953, "learning_rate": 1.6414648435560586e-06, "loss": 0.093, "step": 686750 }, { "epoch": 6.75, "grad_norm": 6.2032084465026855, "learning_rate": 1.6413407211018103e-06, "loss": 0.0259, "step": 686775 }, { "epoch": 6.75, "grad_norm": 5.007933139801025, "learning_rate": 1.641216598647562e-06, "loss": 0.0676, "step": 686800 }, { "epoch": 6.75, "grad_norm": 2.7336528301239014, "learning_rate": 1.6410924761933133e-06, "loss": 0.0285, "step": 686825 }, { "epoch": 6.75, "grad_norm": 1.9148228168487549, "learning_rate": 1.640968353739065e-06, "loss": 0.0619, "step": 686850 }, { "epoch": 6.75, "grad_norm": 4.441849231719971, "learning_rate": 1.6408442312848164e-06, "loss": 0.0151, "step": 686875 }, { "epoch": 6.75, "grad_norm": 0.2507716119289398, "learning_rate": 1.640720108830568e-06, "loss": 0.0719, "step": 686900 }, { "epoch": 6.75, "grad_norm": 8.140451431274414, "learning_rate": 1.6405959863763196e-06, "loss": 0.0279, "step": 686925 }, { "epoch": 6.75, "grad_norm": 2.686521530151367, "learning_rate": 1.640471863922071e-06, "loss": 0.0747, "step": 686950 }, { "epoch": 6.75, "grad_norm": 10.929335594177246, "learning_rate": 1.6403477414678227e-06, "loss": 0.03, "step": 686975 }, { "epoch": 6.75, "grad_norm": 4.607414245605469, "learning_rate": 1.6402236190135743e-06, "loss": 0.057, "step": 687000 }, { "epoch": 6.75, "grad_norm": 5.958776473999023, "learning_rate": 1.6400994965593256e-06, "loss": 0.0205, "step": 687025 }, { "epoch": 6.76, "grad_norm": 0.6575372815132141, "learning_rate": 1.6399753741050772e-06, "loss": 0.0659, "step": 687050 }, { "epoch": 6.76, "grad_norm": 1.8180526494979858, "learning_rate": 1.6398512516508288e-06, "loss": 0.0246, "step": 687075 }, { "epoch": 6.76, "grad_norm": 0.031575821340084076, "learning_rate": 1.6397271291965802e-06, "loss": 0.1012, "step": 687100 }, { "epoch": 6.76, "grad_norm": 5.2665114402771, "learning_rate": 1.6396030067423319e-06, "loss": 0.0256, "step": 687125 }, { "epoch": 6.76, "grad_norm": 3.834953546524048, "learning_rate": 1.6394788842880835e-06, "loss": 0.082, "step": 687150 }, { "epoch": 6.76, "grad_norm": 5.269959449768066, "learning_rate": 1.639354761833835e-06, "loss": 0.0125, "step": 687175 }, { "epoch": 6.76, "grad_norm": 1.5402095317840576, "learning_rate": 1.6392306393795866e-06, "loss": 0.0671, "step": 687200 }, { "epoch": 6.76, "grad_norm": 0.27165335416793823, "learning_rate": 1.6391065169253382e-06, "loss": 0.0189, "step": 687225 }, { "epoch": 6.76, "grad_norm": 0.8377663493156433, "learning_rate": 1.6389823944710894e-06, "loss": 0.0685, "step": 687250 }, { "epoch": 6.76, "grad_norm": 4.168988227844238, "learning_rate": 1.638858272016841e-06, "loss": 0.0225, "step": 687275 }, { "epoch": 6.76, "grad_norm": 6.788985729217529, "learning_rate": 1.6387341495625925e-06, "loss": 0.0816, "step": 687300 }, { "epoch": 6.76, "grad_norm": 8.969337463378906, "learning_rate": 1.6386100271083441e-06, "loss": 0.0218, "step": 687325 }, { "epoch": 6.76, "grad_norm": 2.1194803714752197, "learning_rate": 1.6384859046540958e-06, "loss": 0.0755, "step": 687350 }, { "epoch": 6.76, "grad_norm": 10.388110160827637, "learning_rate": 1.6383617821998472e-06, "loss": 0.0234, "step": 687375 }, { "epoch": 6.76, "grad_norm": 2.203376054763794, "learning_rate": 1.6382376597455988e-06, "loss": 0.0922, "step": 687400 }, { "epoch": 6.76, "grad_norm": 2.80348539352417, "learning_rate": 1.6381135372913504e-06, "loss": 0.023, "step": 687425 }, { "epoch": 6.76, "grad_norm": 0.316133588552475, "learning_rate": 1.6379894148371017e-06, "loss": 0.0618, "step": 687450 }, { "epoch": 6.76, "grad_norm": 14.750738143920898, "learning_rate": 1.6378652923828533e-06, "loss": 0.0331, "step": 687475 }, { "epoch": 6.76, "grad_norm": 0.5480672717094421, "learning_rate": 1.637741169928605e-06, "loss": 0.1111, "step": 687500 }, { "epoch": 6.76, "grad_norm": 5.808746337890625, "learning_rate": 1.6376170474743563e-06, "loss": 0.0382, "step": 687525 }, { "epoch": 6.76, "grad_norm": 0.7018790245056152, "learning_rate": 1.637492925020108e-06, "loss": 0.0671, "step": 687550 }, { "epoch": 6.76, "grad_norm": 7.784654140472412, "learning_rate": 1.6373688025658596e-06, "loss": 0.0162, "step": 687575 }, { "epoch": 6.76, "grad_norm": 0.5385494828224182, "learning_rate": 1.637244680111611e-06, "loss": 0.0754, "step": 687600 }, { "epoch": 6.76, "grad_norm": 1.24319589138031, "learning_rate": 1.6371205576573627e-06, "loss": 0.0212, "step": 687625 }, { "epoch": 6.76, "grad_norm": 2.2369582653045654, "learning_rate": 1.6369964352031143e-06, "loss": 0.0606, "step": 687650 }, { "epoch": 6.76, "grad_norm": 11.580633163452148, "learning_rate": 1.6368723127488655e-06, "loss": 0.0323, "step": 687675 }, { "epoch": 6.76, "grad_norm": 0.6683939695358276, "learning_rate": 1.6367481902946172e-06, "loss": 0.0786, "step": 687700 }, { "epoch": 6.76, "grad_norm": 3.311739921569824, "learning_rate": 1.6366240678403686e-06, "loss": 0.018, "step": 687725 }, { "epoch": 6.76, "grad_norm": 5.119847774505615, "learning_rate": 1.6364999453861202e-06, "loss": 0.0867, "step": 687750 }, { "epoch": 6.76, "grad_norm": 5.79502010345459, "learning_rate": 1.6363758229318719e-06, "loss": 0.0157, "step": 687775 }, { "epoch": 6.76, "grad_norm": 1.514922022819519, "learning_rate": 1.6362517004776233e-06, "loss": 0.0903, "step": 687800 }, { "epoch": 6.76, "grad_norm": 1.2921247482299805, "learning_rate": 1.636127578023375e-06, "loss": 0.0276, "step": 687825 }, { "epoch": 6.76, "grad_norm": 2.8987975120544434, "learning_rate": 1.6360034555691265e-06, "loss": 0.0726, "step": 687850 }, { "epoch": 6.76, "grad_norm": 7.886936187744141, "learning_rate": 1.6358793331148778e-06, "loss": 0.0243, "step": 687875 }, { "epoch": 6.76, "grad_norm": 9.404348373413086, "learning_rate": 1.6357552106606294e-06, "loss": 0.114, "step": 687900 }, { "epoch": 6.76, "grad_norm": 0.9560495018959045, "learning_rate": 1.635631088206381e-06, "loss": 0.0215, "step": 687925 }, { "epoch": 6.76, "grad_norm": 0.11750469356775284, "learning_rate": 1.6355069657521324e-06, "loss": 0.0731, "step": 687950 }, { "epoch": 6.76, "grad_norm": 4.860391139984131, "learning_rate": 1.635382843297884e-06, "loss": 0.0262, "step": 687975 }, { "epoch": 6.76, "grad_norm": 1.101871132850647, "learning_rate": 1.6352587208436357e-06, "loss": 0.0809, "step": 688000 }, { "epoch": 6.76, "grad_norm": 11.055747985839844, "learning_rate": 1.6351345983893871e-06, "loss": 0.032, "step": 688025 }, { "epoch": 6.77, "grad_norm": 0.30603280663490295, "learning_rate": 1.6350104759351388e-06, "loss": 0.0755, "step": 688050 }, { "epoch": 6.77, "grad_norm": 6.811429977416992, "learning_rate": 1.6348863534808904e-06, "loss": 0.0389, "step": 688075 }, { "epoch": 6.77, "grad_norm": 2.0771191120147705, "learning_rate": 1.6347622310266416e-06, "loss": 0.0852, "step": 688100 }, { "epoch": 6.77, "grad_norm": 17.060338973999023, "learning_rate": 1.6346381085723933e-06, "loss": 0.0192, "step": 688125 }, { "epoch": 6.77, "grad_norm": 3.158137798309326, "learning_rate": 1.6345139861181447e-06, "loss": 0.0946, "step": 688150 }, { "epoch": 6.77, "grad_norm": 10.520021438598633, "learning_rate": 1.6343898636638963e-06, "loss": 0.0229, "step": 688175 }, { "epoch": 6.77, "grad_norm": 2.224273681640625, "learning_rate": 1.634265741209648e-06, "loss": 0.0774, "step": 688200 }, { "epoch": 6.77, "grad_norm": 4.934027194976807, "learning_rate": 1.6341416187553994e-06, "loss": 0.0207, "step": 688225 }, { "epoch": 6.77, "grad_norm": 10.076253890991211, "learning_rate": 1.634017496301151e-06, "loss": 0.0918, "step": 688250 }, { "epoch": 6.77, "grad_norm": 0.3452954888343811, "learning_rate": 1.6338933738469026e-06, "loss": 0.0234, "step": 688275 }, { "epoch": 6.77, "grad_norm": 0.27285367250442505, "learning_rate": 1.633769251392654e-06, "loss": 0.0626, "step": 688300 }, { "epoch": 6.77, "grad_norm": 6.5327229499816895, "learning_rate": 1.6336451289384057e-06, "loss": 0.0255, "step": 688325 }, { "epoch": 6.77, "grad_norm": 1.1064953804016113, "learning_rate": 1.6335210064841573e-06, "loss": 0.1017, "step": 688350 }, { "epoch": 6.77, "grad_norm": 5.011139392852783, "learning_rate": 1.6333968840299085e-06, "loss": 0.0161, "step": 688375 }, { "epoch": 6.77, "grad_norm": 9.002628326416016, "learning_rate": 1.6332727615756602e-06, "loss": 0.0677, "step": 688400 }, { "epoch": 6.77, "grad_norm": 11.752777099609375, "learning_rate": 1.6331486391214118e-06, "loss": 0.0171, "step": 688425 }, { "epoch": 6.77, "grad_norm": 1.4324299097061157, "learning_rate": 1.6330245166671632e-06, "loss": 0.0833, "step": 688450 }, { "epoch": 6.77, "grad_norm": 1.0217241048812866, "learning_rate": 1.6329003942129149e-06, "loss": 0.0256, "step": 688475 }, { "epoch": 6.77, "grad_norm": 9.026888847351074, "learning_rate": 1.6327762717586665e-06, "loss": 0.0781, "step": 688500 }, { "epoch": 6.77, "grad_norm": 6.831273078918457, "learning_rate": 1.632652149304418e-06, "loss": 0.0219, "step": 688525 }, { "epoch": 6.77, "grad_norm": 2.4887821674346924, "learning_rate": 1.6325280268501696e-06, "loss": 0.0752, "step": 688550 }, { "epoch": 6.77, "grad_norm": 3.03666353225708, "learning_rate": 1.6324039043959208e-06, "loss": 0.0331, "step": 688575 }, { "epoch": 6.77, "grad_norm": 1.4353445768356323, "learning_rate": 1.6322797819416724e-06, "loss": 0.0688, "step": 688600 }, { "epoch": 6.77, "grad_norm": 9.810355186462402, "learning_rate": 1.632155659487424e-06, "loss": 0.0248, "step": 688625 }, { "epoch": 6.77, "grad_norm": 0.2180452197790146, "learning_rate": 1.6320315370331755e-06, "loss": 0.0829, "step": 688650 }, { "epoch": 6.77, "grad_norm": 4.6220903396606445, "learning_rate": 1.6319074145789271e-06, "loss": 0.0224, "step": 688675 }, { "epoch": 6.77, "grad_norm": 1.6632201671600342, "learning_rate": 1.6317832921246787e-06, "loss": 0.0857, "step": 688700 }, { "epoch": 6.77, "grad_norm": 2.2088687419891357, "learning_rate": 1.6316591696704302e-06, "loss": 0.0188, "step": 688725 }, { "epoch": 6.77, "grad_norm": 2.5609240531921387, "learning_rate": 1.6315350472161818e-06, "loss": 0.0699, "step": 688750 }, { "epoch": 6.77, "grad_norm": 1.9968652725219727, "learning_rate": 1.6314109247619334e-06, "loss": 0.0226, "step": 688775 }, { "epoch": 6.77, "grad_norm": 2.4912643432617188, "learning_rate": 1.6312868023076846e-06, "loss": 0.0884, "step": 688800 }, { "epoch": 6.77, "grad_norm": 11.316240310668945, "learning_rate": 1.6311626798534363e-06, "loss": 0.0272, "step": 688825 }, { "epoch": 6.77, "grad_norm": 3.9545161724090576, "learning_rate": 1.631038557399188e-06, "loss": 0.0566, "step": 688850 }, { "epoch": 6.77, "grad_norm": 4.383011341094971, "learning_rate": 1.6309144349449393e-06, "loss": 0.0342, "step": 688875 }, { "epoch": 6.77, "grad_norm": 1.2604514360427856, "learning_rate": 1.630790312490691e-06, "loss": 0.0851, "step": 688900 }, { "epoch": 6.77, "grad_norm": 9.308673858642578, "learning_rate": 1.6306661900364426e-06, "loss": 0.034, "step": 688925 }, { "epoch": 6.77, "grad_norm": 0.09676840901374817, "learning_rate": 1.630542067582194e-06, "loss": 0.0644, "step": 688950 }, { "epoch": 6.77, "grad_norm": 12.896679878234863, "learning_rate": 1.6304179451279457e-06, "loss": 0.025, "step": 688975 }, { "epoch": 6.77, "grad_norm": 5.372415065765381, "learning_rate": 1.6302938226736969e-06, "loss": 0.0596, "step": 689000 }, { "epoch": 6.77, "grad_norm": 8.969083786010742, "learning_rate": 1.6301697002194485e-06, "loss": 0.0199, "step": 689025 }, { "epoch": 6.77, "grad_norm": 0.4242524206638336, "learning_rate": 1.6300455777652002e-06, "loss": 0.0948, "step": 689050 }, { "epoch": 6.78, "grad_norm": 11.146909713745117, "learning_rate": 1.6299214553109516e-06, "loss": 0.0163, "step": 689075 }, { "epoch": 6.78, "grad_norm": 2.6837620735168457, "learning_rate": 1.6297973328567032e-06, "loss": 0.0566, "step": 689100 }, { "epoch": 6.78, "grad_norm": 3.9042434692382812, "learning_rate": 1.6296732104024548e-06, "loss": 0.0167, "step": 689125 }, { "epoch": 6.78, "grad_norm": 4.334201335906982, "learning_rate": 1.6295490879482063e-06, "loss": 0.0696, "step": 689150 }, { "epoch": 6.78, "grad_norm": 4.248793125152588, "learning_rate": 1.629424965493958e-06, "loss": 0.0203, "step": 689175 }, { "epoch": 6.78, "grad_norm": 2.1825740337371826, "learning_rate": 1.6293008430397095e-06, "loss": 0.1064, "step": 689200 }, { "epoch": 6.78, "grad_norm": 2.6844727993011475, "learning_rate": 1.6291767205854608e-06, "loss": 0.0229, "step": 689225 }, { "epoch": 6.78, "grad_norm": 2.9436657428741455, "learning_rate": 1.6290525981312124e-06, "loss": 0.0723, "step": 689250 }, { "epoch": 6.78, "grad_norm": 2.8477132320404053, "learning_rate": 1.628928475676964e-06, "loss": 0.0206, "step": 689275 }, { "epoch": 6.78, "grad_norm": 1.0878088474273682, "learning_rate": 1.6288043532227154e-06, "loss": 0.0637, "step": 689300 }, { "epoch": 6.78, "grad_norm": 3.0468227863311768, "learning_rate": 1.628680230768467e-06, "loss": 0.0236, "step": 689325 }, { "epoch": 6.78, "grad_norm": 2.219043016433716, "learning_rate": 1.6285561083142187e-06, "loss": 0.0895, "step": 689350 }, { "epoch": 6.78, "grad_norm": 9.689054489135742, "learning_rate": 1.6284319858599701e-06, "loss": 0.0298, "step": 689375 }, { "epoch": 6.78, "grad_norm": 0.15304134786128998, "learning_rate": 1.6283078634057218e-06, "loss": 0.0747, "step": 689400 }, { "epoch": 6.78, "grad_norm": 11.764394760131836, "learning_rate": 1.6281837409514734e-06, "loss": 0.0264, "step": 689425 }, { "epoch": 6.78, "grad_norm": 1.5921353101730347, "learning_rate": 1.6280645833953949e-06, "loss": 0.0858, "step": 689450 }, { "epoch": 6.78, "grad_norm": 5.8333635330200195, "learning_rate": 1.627940460941146e-06, "loss": 0.0089, "step": 689475 }, { "epoch": 6.78, "grad_norm": 0.6001873016357422, "learning_rate": 1.6278163384868977e-06, "loss": 0.0698, "step": 689500 }, { "epoch": 6.78, "grad_norm": 2.2647361755371094, "learning_rate": 1.6276922160326493e-06, "loss": 0.0339, "step": 689525 }, { "epoch": 6.78, "grad_norm": 2.3350870609283447, "learning_rate": 1.6275680935784008e-06, "loss": 0.0626, "step": 689550 }, { "epoch": 6.78, "grad_norm": 0.34285596013069153, "learning_rate": 1.6274439711241524e-06, "loss": 0.0157, "step": 689575 }, { "epoch": 6.78, "grad_norm": 10.385191917419434, "learning_rate": 1.6273198486699038e-06, "loss": 0.0772, "step": 689600 }, { "epoch": 6.78, "grad_norm": 4.294051647186279, "learning_rate": 1.6271957262156555e-06, "loss": 0.0171, "step": 689625 }, { "epoch": 6.78, "grad_norm": 1.9533110857009888, "learning_rate": 1.627071603761407e-06, "loss": 0.0819, "step": 689650 }, { "epoch": 6.78, "grad_norm": 10.28952693939209, "learning_rate": 1.6269474813071583e-06, "loss": 0.0302, "step": 689675 }, { "epoch": 6.78, "grad_norm": 0.16611137986183167, "learning_rate": 1.62682335885291e-06, "loss": 0.0909, "step": 689700 }, { "epoch": 6.78, "grad_norm": 6.527792453765869, "learning_rate": 1.6266992363986616e-06, "loss": 0.0259, "step": 689725 }, { "epoch": 6.78, "grad_norm": 3.945478677749634, "learning_rate": 1.626575113944413e-06, "loss": 0.0656, "step": 689750 }, { "epoch": 6.78, "grad_norm": 0.7139987349510193, "learning_rate": 1.6264509914901646e-06, "loss": 0.0079, "step": 689775 }, { "epoch": 6.78, "grad_norm": 2.547624349594116, "learning_rate": 1.6263268690359163e-06, "loss": 0.078, "step": 689800 }, { "epoch": 6.78, "grad_norm": 4.270263671875, "learning_rate": 1.6262027465816677e-06, "loss": 0.0241, "step": 689825 }, { "epoch": 6.78, "grad_norm": 2.6092915534973145, "learning_rate": 1.6260786241274193e-06, "loss": 0.0872, "step": 689850 }, { "epoch": 6.78, "grad_norm": 7.332315444946289, "learning_rate": 1.625954501673171e-06, "loss": 0.0204, "step": 689875 }, { "epoch": 6.78, "grad_norm": 3.8806700706481934, "learning_rate": 1.6258303792189224e-06, "loss": 0.0869, "step": 689900 }, { "epoch": 6.78, "grad_norm": 8.093835830688477, "learning_rate": 1.625706256764674e-06, "loss": 0.0237, "step": 689925 }, { "epoch": 6.78, "grad_norm": 2.4642891883850098, "learning_rate": 1.6255821343104257e-06, "loss": 0.0809, "step": 689950 }, { "epoch": 6.78, "grad_norm": 7.134746551513672, "learning_rate": 1.6254580118561769e-06, "loss": 0.0328, "step": 689975 }, { "epoch": 6.78, "grad_norm": 1.104357361793518, "learning_rate": 1.6253338894019285e-06, "loss": 0.074, "step": 690000 }, { "epoch": 6.78, "grad_norm": 7.070130825042725, "learning_rate": 1.62520976694768e-06, "loss": 0.0205, "step": 690025 }, { "epoch": 6.78, "grad_norm": 5.0423665046691895, "learning_rate": 1.6250856444934316e-06, "loss": 0.0771, "step": 690050 }, { "epoch": 6.78, "grad_norm": 15.05020523071289, "learning_rate": 1.6249615220391832e-06, "loss": 0.0168, "step": 690075 }, { "epoch": 6.79, "grad_norm": 5.244797706604004, "learning_rate": 1.6248373995849346e-06, "loss": 0.0626, "step": 690100 }, { "epoch": 6.79, "grad_norm": 3.302184581756592, "learning_rate": 1.6247132771306862e-06, "loss": 0.0262, "step": 690125 }, { "epoch": 6.79, "grad_norm": 0.923573911190033, "learning_rate": 1.6245891546764379e-06, "loss": 0.0772, "step": 690150 }, { "epoch": 6.79, "grad_norm": 7.582603454589844, "learning_rate": 1.624465032222189e-06, "loss": 0.0367, "step": 690175 }, { "epoch": 6.79, "grad_norm": 5.776298522949219, "learning_rate": 1.6243409097679407e-06, "loss": 0.0718, "step": 690200 }, { "epoch": 6.79, "grad_norm": 1.8007104396820068, "learning_rate": 1.6242167873136924e-06, "loss": 0.0187, "step": 690225 }, { "epoch": 6.79, "grad_norm": 3.057049512863159, "learning_rate": 1.6240926648594438e-06, "loss": 0.1239, "step": 690250 }, { "epoch": 6.79, "grad_norm": 8.792790412902832, "learning_rate": 1.6239685424051954e-06, "loss": 0.03, "step": 690275 }, { "epoch": 6.79, "grad_norm": 1.290653109550476, "learning_rate": 1.623844419950947e-06, "loss": 0.0827, "step": 690300 }, { "epoch": 6.79, "grad_norm": 10.827369689941406, "learning_rate": 1.6237202974966985e-06, "loss": 0.0241, "step": 690325 }, { "epoch": 6.79, "grad_norm": 1.093374490737915, "learning_rate": 1.6235961750424501e-06, "loss": 0.0589, "step": 690350 }, { "epoch": 6.79, "grad_norm": 2.276918649673462, "learning_rate": 1.6234720525882018e-06, "loss": 0.0237, "step": 690375 }, { "epoch": 6.79, "grad_norm": 5.368831634521484, "learning_rate": 1.623347930133953e-06, "loss": 0.0853, "step": 690400 }, { "epoch": 6.79, "grad_norm": 8.45124626159668, "learning_rate": 1.6232238076797046e-06, "loss": 0.0449, "step": 690425 }, { "epoch": 6.79, "grad_norm": 12.67111587524414, "learning_rate": 1.623099685225456e-06, "loss": 0.068, "step": 690450 }, { "epoch": 6.79, "grad_norm": 0.35973355174064636, "learning_rate": 1.6229755627712077e-06, "loss": 0.0153, "step": 690475 }, { "epoch": 6.79, "grad_norm": 2.239191770553589, "learning_rate": 1.6228514403169593e-06, "loss": 0.0646, "step": 690500 }, { "epoch": 6.79, "grad_norm": 11.811753273010254, "learning_rate": 1.6227273178627107e-06, "loss": 0.0393, "step": 690525 }, { "epoch": 6.79, "grad_norm": 5.951990604400635, "learning_rate": 1.6226031954084623e-06, "loss": 0.0642, "step": 690550 }, { "epoch": 6.79, "grad_norm": 10.713234901428223, "learning_rate": 1.622479072954214e-06, "loss": 0.0271, "step": 690575 }, { "epoch": 6.79, "grad_norm": 3.3544905185699463, "learning_rate": 1.6223549504999652e-06, "loss": 0.0886, "step": 690600 }, { "epoch": 6.79, "grad_norm": 5.750500679016113, "learning_rate": 1.6222308280457168e-06, "loss": 0.0358, "step": 690625 }, { "epoch": 6.79, "grad_norm": 0.6221904754638672, "learning_rate": 1.6221067055914685e-06, "loss": 0.0834, "step": 690650 }, { "epoch": 6.79, "grad_norm": 5.079441070556641, "learning_rate": 1.6219825831372199e-06, "loss": 0.0192, "step": 690675 }, { "epoch": 6.79, "grad_norm": 1.214831829071045, "learning_rate": 1.6218584606829715e-06, "loss": 0.0614, "step": 690700 }, { "epoch": 6.79, "grad_norm": 7.918843746185303, "learning_rate": 1.6217343382287232e-06, "loss": 0.0221, "step": 690725 }, { "epoch": 6.79, "grad_norm": 7.776620388031006, "learning_rate": 1.6216102157744746e-06, "loss": 0.0907, "step": 690750 }, { "epoch": 6.79, "grad_norm": 7.7975358963012695, "learning_rate": 1.6214860933202262e-06, "loss": 0.0239, "step": 690775 }, { "epoch": 6.79, "grad_norm": 1.6472313404083252, "learning_rate": 1.6213619708659779e-06, "loss": 0.0779, "step": 690800 }, { "epoch": 6.79, "grad_norm": 3.588157892227173, "learning_rate": 1.621237848411729e-06, "loss": 0.0184, "step": 690825 }, { "epoch": 6.79, "grad_norm": 0.3265248239040375, "learning_rate": 1.6211137259574807e-06, "loss": 0.0639, "step": 690850 }, { "epoch": 6.79, "grad_norm": 4.216136455535889, "learning_rate": 1.6209896035032321e-06, "loss": 0.0265, "step": 690875 }, { "epoch": 6.79, "grad_norm": 5.517425060272217, "learning_rate": 1.6208654810489838e-06, "loss": 0.0936, "step": 690900 }, { "epoch": 6.79, "grad_norm": 5.0987091064453125, "learning_rate": 1.6207413585947354e-06, "loss": 0.0254, "step": 690925 }, { "epoch": 6.79, "grad_norm": 1.0773743391036987, "learning_rate": 1.6206172361404868e-06, "loss": 0.0896, "step": 690950 }, { "epoch": 6.79, "grad_norm": 16.67469596862793, "learning_rate": 1.6204931136862384e-06, "loss": 0.0198, "step": 690975 }, { "epoch": 6.79, "grad_norm": 6.653358459472656, "learning_rate": 1.62036899123199e-06, "loss": 0.0716, "step": 691000 }, { "epoch": 6.79, "grad_norm": 0.7415606379508972, "learning_rate": 1.6202448687777413e-06, "loss": 0.0173, "step": 691025 }, { "epoch": 6.79, "grad_norm": 6.857900142669678, "learning_rate": 1.620120746323493e-06, "loss": 0.1006, "step": 691050 }, { "epoch": 6.79, "grad_norm": 0.32644423842430115, "learning_rate": 1.6199966238692446e-06, "loss": 0.0127, "step": 691075 }, { "epoch": 6.8, "grad_norm": 5.22819185256958, "learning_rate": 1.619872501414996e-06, "loss": 0.0661, "step": 691100 }, { "epoch": 6.8, "grad_norm": 10.427464485168457, "learning_rate": 1.6197483789607476e-06, "loss": 0.0188, "step": 691125 }, { "epoch": 6.8, "grad_norm": 3.8571972846984863, "learning_rate": 1.6196242565064993e-06, "loss": 0.0725, "step": 691150 }, { "epoch": 6.8, "grad_norm": 5.873598575592041, "learning_rate": 1.6195001340522507e-06, "loss": 0.0166, "step": 691175 }, { "epoch": 6.8, "grad_norm": 5.254241943359375, "learning_rate": 1.6193760115980023e-06, "loss": 0.0661, "step": 691200 }, { "epoch": 6.8, "grad_norm": 3.985840320587158, "learning_rate": 1.619251889143754e-06, "loss": 0.03, "step": 691225 }, { "epoch": 6.8, "grad_norm": 3.0534908771514893, "learning_rate": 1.6191277666895054e-06, "loss": 0.0749, "step": 691250 }, { "epoch": 6.8, "grad_norm": 13.966054916381836, "learning_rate": 1.619003644235257e-06, "loss": 0.0389, "step": 691275 }, { "epoch": 6.8, "grad_norm": 1.007245421409607, "learning_rate": 1.6188795217810082e-06, "loss": 0.0697, "step": 691300 }, { "epoch": 6.8, "grad_norm": 7.2691426277160645, "learning_rate": 1.6187553993267599e-06, "loss": 0.0301, "step": 691325 }, { "epoch": 6.8, "grad_norm": 4.608279228210449, "learning_rate": 1.6186312768725115e-06, "loss": 0.0622, "step": 691350 }, { "epoch": 6.8, "grad_norm": 0.3264829218387604, "learning_rate": 1.618507154418263e-06, "loss": 0.0259, "step": 691375 }, { "epoch": 6.8, "grad_norm": 4.307607650756836, "learning_rate": 1.6183830319640146e-06, "loss": 0.0797, "step": 691400 }, { "epoch": 6.8, "grad_norm": 2.896238327026367, "learning_rate": 1.6182589095097662e-06, "loss": 0.0123, "step": 691425 }, { "epoch": 6.8, "grad_norm": 0.1620664894580841, "learning_rate": 1.6181347870555176e-06, "loss": 0.0843, "step": 691450 }, { "epoch": 6.8, "grad_norm": 8.963738441467285, "learning_rate": 1.6180106646012692e-06, "loss": 0.0367, "step": 691475 }, { "epoch": 6.8, "grad_norm": 0.741312563419342, "learning_rate": 1.6178865421470209e-06, "loss": 0.0619, "step": 691500 }, { "epoch": 6.8, "grad_norm": 5.294816970825195, "learning_rate": 1.617762419692772e-06, "loss": 0.025, "step": 691525 }, { "epoch": 6.8, "grad_norm": 3.3495635986328125, "learning_rate": 1.6176382972385237e-06, "loss": 0.0491, "step": 691550 }, { "epoch": 6.8, "grad_norm": 8.45802116394043, "learning_rate": 1.6175141747842754e-06, "loss": 0.0302, "step": 691575 }, { "epoch": 6.8, "grad_norm": 2.7254393100738525, "learning_rate": 1.6173950172281968e-06, "loss": 0.0771, "step": 691600 }, { "epoch": 6.8, "grad_norm": 10.302122116088867, "learning_rate": 1.6172708947739482e-06, "loss": 0.0424, "step": 691625 }, { "epoch": 6.8, "grad_norm": 0.3357149660587311, "learning_rate": 1.6171467723196999e-06, "loss": 0.0902, "step": 691650 }, { "epoch": 6.8, "grad_norm": 6.476406574249268, "learning_rate": 1.6170226498654515e-06, "loss": 0.0217, "step": 691675 }, { "epoch": 6.8, "grad_norm": 0.4183609187602997, "learning_rate": 1.616898527411203e-06, "loss": 0.063, "step": 691700 }, { "epoch": 6.8, "grad_norm": 9.287581443786621, "learning_rate": 1.6167744049569546e-06, "loss": 0.0218, "step": 691725 }, { "epoch": 6.8, "grad_norm": 0.8734222054481506, "learning_rate": 1.6166502825027062e-06, "loss": 0.0729, "step": 691750 }, { "epoch": 6.8, "grad_norm": 11.966442108154297, "learning_rate": 1.6165261600484574e-06, "loss": 0.0241, "step": 691775 }, { "epoch": 6.8, "grad_norm": 6.602664470672607, "learning_rate": 1.616402037594209e-06, "loss": 0.0812, "step": 691800 }, { "epoch": 6.8, "grad_norm": 10.698081970214844, "learning_rate": 1.6162779151399607e-06, "loss": 0.0247, "step": 691825 }, { "epoch": 6.8, "grad_norm": 4.190683841705322, "learning_rate": 1.616153792685712e-06, "loss": 0.076, "step": 691850 }, { "epoch": 6.8, "grad_norm": 6.121824741363525, "learning_rate": 1.6160296702314637e-06, "loss": 0.0227, "step": 691875 }, { "epoch": 6.8, "grad_norm": 1.9338440895080566, "learning_rate": 1.6159055477772152e-06, "loss": 0.0789, "step": 691900 }, { "epoch": 6.8, "grad_norm": 6.696660041809082, "learning_rate": 1.6157814253229668e-06, "loss": 0.0177, "step": 691925 }, { "epoch": 6.8, "grad_norm": 1.5194971561431885, "learning_rate": 1.6156573028687184e-06, "loss": 0.0613, "step": 691950 }, { "epoch": 6.8, "grad_norm": 6.719189643859863, "learning_rate": 1.6155331804144696e-06, "loss": 0.0223, "step": 691975 }, { "epoch": 6.8, "grad_norm": 0.9498947858810425, "learning_rate": 1.6154090579602213e-06, "loss": 0.0903, "step": 692000 }, { "epoch": 6.8, "grad_norm": 4.227344036102295, "learning_rate": 1.615284935505973e-06, "loss": 0.0199, "step": 692025 }, { "epoch": 6.8, "grad_norm": 5.047887802124023, "learning_rate": 1.6151608130517243e-06, "loss": 0.0821, "step": 692050 }, { "epoch": 6.8, "grad_norm": 2.2113518714904785, "learning_rate": 1.615036690597476e-06, "loss": 0.0213, "step": 692075 }, { "epoch": 6.8, "grad_norm": 4.328213214874268, "learning_rate": 1.6149125681432276e-06, "loss": 0.1001, "step": 692100 }, { "epoch": 6.81, "grad_norm": 7.070687770843506, "learning_rate": 1.614788445688979e-06, "loss": 0.0247, "step": 692125 }, { "epoch": 6.81, "grad_norm": 1.0614013671875, "learning_rate": 1.6146643232347307e-06, "loss": 0.0702, "step": 692150 }, { "epoch": 6.81, "grad_norm": 9.934976577758789, "learning_rate": 1.6145402007804823e-06, "loss": 0.031, "step": 692175 }, { "epoch": 6.81, "grad_norm": 0.05571422353386879, "learning_rate": 1.6144160783262335e-06, "loss": 0.0663, "step": 692200 }, { "epoch": 6.81, "grad_norm": 9.953255653381348, "learning_rate": 1.6142919558719851e-06, "loss": 0.0266, "step": 692225 }, { "epoch": 6.81, "grad_norm": 2.2368454933166504, "learning_rate": 1.6141678334177368e-06, "loss": 0.0921, "step": 692250 }, { "epoch": 6.81, "grad_norm": 2.844325065612793, "learning_rate": 1.6140437109634882e-06, "loss": 0.0215, "step": 692275 }, { "epoch": 6.81, "grad_norm": 1.441999077796936, "learning_rate": 1.6139195885092398e-06, "loss": 0.0745, "step": 692300 }, { "epoch": 6.81, "grad_norm": 0.40413859486579895, "learning_rate": 1.6137954660549913e-06, "loss": 0.021, "step": 692325 }, { "epoch": 6.81, "grad_norm": 0.9790470004081726, "learning_rate": 1.613671343600743e-06, "loss": 0.0739, "step": 692350 }, { "epoch": 6.81, "grad_norm": 10.131417274475098, "learning_rate": 1.6135472211464945e-06, "loss": 0.0368, "step": 692375 }, { "epoch": 6.81, "grad_norm": 1.257582187652588, "learning_rate": 1.6134230986922457e-06, "loss": 0.0929, "step": 692400 }, { "epoch": 6.81, "grad_norm": 6.83688497543335, "learning_rate": 1.6132989762379974e-06, "loss": 0.026, "step": 692425 }, { "epoch": 6.81, "grad_norm": 1.224721074104309, "learning_rate": 1.613174853783749e-06, "loss": 0.0614, "step": 692450 }, { "epoch": 6.81, "grad_norm": 7.602184295654297, "learning_rate": 1.6130507313295004e-06, "loss": 0.0265, "step": 692475 }, { "epoch": 6.81, "grad_norm": 0.5930587649345398, "learning_rate": 1.612926608875252e-06, "loss": 0.0709, "step": 692500 }, { "epoch": 6.81, "grad_norm": 8.633345603942871, "learning_rate": 1.6128024864210037e-06, "loss": 0.0354, "step": 692525 }, { "epoch": 6.81, "grad_norm": 4.254915237426758, "learning_rate": 1.6126783639667551e-06, "loss": 0.0816, "step": 692550 }, { "epoch": 6.81, "grad_norm": 11.867961883544922, "learning_rate": 1.6125542415125068e-06, "loss": 0.0258, "step": 692575 }, { "epoch": 6.81, "grad_norm": 3.116602897644043, "learning_rate": 1.6124301190582584e-06, "loss": 0.0931, "step": 692600 }, { "epoch": 6.81, "grad_norm": 3.837505578994751, "learning_rate": 1.6123059966040098e-06, "loss": 0.0207, "step": 692625 }, { "epoch": 6.81, "grad_norm": 7.472301483154297, "learning_rate": 1.6121818741497612e-06, "loss": 0.0824, "step": 692650 }, { "epoch": 6.81, "grad_norm": 2.257737874984741, "learning_rate": 1.6120577516955129e-06, "loss": 0.0135, "step": 692675 }, { "epoch": 6.81, "grad_norm": 1.058500051498413, "learning_rate": 1.6119336292412643e-06, "loss": 0.0657, "step": 692700 }, { "epoch": 6.81, "grad_norm": 7.6887969970703125, "learning_rate": 1.611809506787016e-06, "loss": 0.0214, "step": 692725 }, { "epoch": 6.81, "grad_norm": 0.4411424696445465, "learning_rate": 1.6116853843327674e-06, "loss": 0.0877, "step": 692750 }, { "epoch": 6.81, "grad_norm": 3.224202871322632, "learning_rate": 1.611561261878519e-06, "loss": 0.0227, "step": 692775 }, { "epoch": 6.81, "grad_norm": 4.44976282119751, "learning_rate": 1.6114371394242706e-06, "loss": 0.0568, "step": 692800 }, { "epoch": 6.81, "grad_norm": 18.635414123535156, "learning_rate": 1.611313016970022e-06, "loss": 0.0334, "step": 692825 }, { "epoch": 6.81, "grad_norm": 3.081387758255005, "learning_rate": 1.6111888945157737e-06, "loss": 0.0907, "step": 692850 }, { "epoch": 6.81, "grad_norm": 16.827789306640625, "learning_rate": 1.6110647720615253e-06, "loss": 0.0184, "step": 692875 }, { "epoch": 6.81, "grad_norm": 2.2002763748168945, "learning_rate": 1.6109406496072765e-06, "loss": 0.0884, "step": 692900 }, { "epoch": 6.81, "grad_norm": 2.5786283016204834, "learning_rate": 1.6108165271530282e-06, "loss": 0.025, "step": 692925 }, { "epoch": 6.81, "grad_norm": 3.2147676944732666, "learning_rate": 1.6106924046987798e-06, "loss": 0.0779, "step": 692950 }, { "epoch": 6.81, "grad_norm": 4.815947532653809, "learning_rate": 1.6105682822445312e-06, "loss": 0.0227, "step": 692975 }, { "epoch": 6.81, "grad_norm": 5.9258551597595215, "learning_rate": 1.6104441597902829e-06, "loss": 0.0675, "step": 693000 }, { "epoch": 6.81, "grad_norm": 6.9296064376831055, "learning_rate": 1.6103200373360345e-06, "loss": 0.0176, "step": 693025 }, { "epoch": 6.81, "grad_norm": 0.9018017649650574, "learning_rate": 1.610195914881786e-06, "loss": 0.0854, "step": 693050 }, { "epoch": 6.81, "grad_norm": 9.628878593444824, "learning_rate": 1.6100717924275376e-06, "loss": 0.0192, "step": 693075 }, { "epoch": 6.81, "grad_norm": 2.976559638977051, "learning_rate": 1.6099476699732892e-06, "loss": 0.0782, "step": 693100 }, { "epoch": 6.81, "grad_norm": 3.5136067867279053, "learning_rate": 1.6098235475190404e-06, "loss": 0.0085, "step": 693125 }, { "epoch": 6.82, "grad_norm": 13.242304801940918, "learning_rate": 1.609699425064792e-06, "loss": 0.0777, "step": 693150 }, { "epoch": 6.82, "grad_norm": 7.082400798797607, "learning_rate": 1.6095753026105435e-06, "loss": 0.0199, "step": 693175 }, { "epoch": 6.82, "grad_norm": 5.434147834777832, "learning_rate": 1.609451180156295e-06, "loss": 0.0672, "step": 693200 }, { "epoch": 6.82, "grad_norm": 13.69467544555664, "learning_rate": 1.6093270577020467e-06, "loss": 0.0311, "step": 693225 }, { "epoch": 6.82, "grad_norm": 1.33611261844635, "learning_rate": 1.6092029352477982e-06, "loss": 0.0654, "step": 693250 }, { "epoch": 6.82, "grad_norm": 5.819141387939453, "learning_rate": 1.6090788127935498e-06, "loss": 0.018, "step": 693275 }, { "epoch": 6.82, "grad_norm": 2.354623556137085, "learning_rate": 1.6089546903393014e-06, "loss": 0.0733, "step": 693300 }, { "epoch": 6.82, "grad_norm": 10.46753978729248, "learning_rate": 1.6088305678850526e-06, "loss": 0.0262, "step": 693325 }, { "epoch": 6.82, "grad_norm": 2.6045467853546143, "learning_rate": 1.6087064454308043e-06, "loss": 0.0683, "step": 693350 }, { "epoch": 6.82, "grad_norm": 12.394268035888672, "learning_rate": 1.608582322976556e-06, "loss": 0.0276, "step": 693375 }, { "epoch": 6.82, "grad_norm": 1.295881748199463, "learning_rate": 1.6084582005223073e-06, "loss": 0.0689, "step": 693400 }, { "epoch": 6.82, "grad_norm": 7.261262893676758, "learning_rate": 1.608334078068059e-06, "loss": 0.0383, "step": 693425 }, { "epoch": 6.82, "grad_norm": 0.9942946434020996, "learning_rate": 1.6082099556138106e-06, "loss": 0.085, "step": 693450 }, { "epoch": 6.82, "grad_norm": 5.576498508453369, "learning_rate": 1.608085833159562e-06, "loss": 0.0189, "step": 693475 }, { "epoch": 6.82, "grad_norm": 2.5103631019592285, "learning_rate": 1.6079617107053137e-06, "loss": 0.081, "step": 693500 }, { "epoch": 6.82, "grad_norm": 2.334932804107666, "learning_rate": 1.6078375882510653e-06, "loss": 0.0222, "step": 693525 }, { "epoch": 6.82, "grad_norm": 6.700027942657471, "learning_rate": 1.6077134657968165e-06, "loss": 0.0685, "step": 693550 }, { "epoch": 6.82, "grad_norm": 6.868645191192627, "learning_rate": 1.6075893433425681e-06, "loss": 0.0331, "step": 693575 }, { "epoch": 6.82, "grad_norm": 0.21734575927257538, "learning_rate": 1.6074652208883196e-06, "loss": 0.0644, "step": 693600 }, { "epoch": 6.82, "grad_norm": 4.358695030212402, "learning_rate": 1.6073410984340712e-06, "loss": 0.0266, "step": 693625 }, { "epoch": 6.82, "grad_norm": 5.913837432861328, "learning_rate": 1.6072169759798228e-06, "loss": 0.0716, "step": 693650 }, { "epoch": 6.82, "grad_norm": 20.70663833618164, "learning_rate": 1.6070928535255743e-06, "loss": 0.0405, "step": 693675 }, { "epoch": 6.82, "grad_norm": 4.4886932373046875, "learning_rate": 1.6069687310713259e-06, "loss": 0.0812, "step": 693700 }, { "epoch": 6.82, "grad_norm": 15.208491325378418, "learning_rate": 1.6068446086170775e-06, "loss": 0.0277, "step": 693725 }, { "epoch": 6.82, "grad_norm": 1.252619743347168, "learning_rate": 1.606725451060999e-06, "loss": 0.0707, "step": 693750 }, { "epoch": 6.82, "grad_norm": 16.7625732421875, "learning_rate": 1.6066013286067502e-06, "loss": 0.0266, "step": 693775 }, { "epoch": 6.82, "grad_norm": 1.5039106607437134, "learning_rate": 1.6064772061525018e-06, "loss": 0.062, "step": 693800 }, { "epoch": 6.82, "grad_norm": 3.492084503173828, "learning_rate": 1.6063530836982535e-06, "loss": 0.0235, "step": 693825 }, { "epoch": 6.82, "grad_norm": 2.286924123764038, "learning_rate": 1.6062289612440049e-06, "loss": 0.0798, "step": 693850 }, { "epoch": 6.82, "grad_norm": 9.347946166992188, "learning_rate": 1.6061048387897565e-06, "loss": 0.0272, "step": 693875 }, { "epoch": 6.82, "grad_norm": 0.9537120461463928, "learning_rate": 1.6059807163355082e-06, "loss": 0.08, "step": 693900 }, { "epoch": 6.82, "grad_norm": 14.389714241027832, "learning_rate": 1.6058565938812596e-06, "loss": 0.0287, "step": 693925 }, { "epoch": 6.82, "grad_norm": 3.5768678188323975, "learning_rate": 1.6057324714270112e-06, "loss": 0.0596, "step": 693950 }, { "epoch": 6.82, "grad_norm": 1.6157230138778687, "learning_rate": 1.6056083489727628e-06, "loss": 0.0182, "step": 693975 }, { "epoch": 6.82, "grad_norm": 4.7360992431640625, "learning_rate": 1.605484226518514e-06, "loss": 0.0799, "step": 694000 }, { "epoch": 6.82, "grad_norm": 1.5301882028579712, "learning_rate": 1.6053601040642657e-06, "loss": 0.0274, "step": 694025 }, { "epoch": 6.82, "grad_norm": 1.5277297496795654, "learning_rate": 1.6052359816100173e-06, "loss": 0.0807, "step": 694050 }, { "epoch": 6.82, "grad_norm": 8.261780738830566, "learning_rate": 1.6051118591557687e-06, "loss": 0.024, "step": 694075 }, { "epoch": 6.82, "grad_norm": 1.1751487255096436, "learning_rate": 1.6049877367015204e-06, "loss": 0.0901, "step": 694100 }, { "epoch": 6.82, "grad_norm": 8.96566390991211, "learning_rate": 1.604863614247272e-06, "loss": 0.02, "step": 694125 }, { "epoch": 6.82, "grad_norm": 2.398735284805298, "learning_rate": 1.6047394917930234e-06, "loss": 0.0817, "step": 694150 }, { "epoch": 6.83, "grad_norm": 7.406484603881836, "learning_rate": 1.604615369338775e-06, "loss": 0.0257, "step": 694175 }, { "epoch": 6.83, "grad_norm": 5.907413005828857, "learning_rate": 1.6044912468845265e-06, "loss": 0.0739, "step": 694200 }, { "epoch": 6.83, "grad_norm": 3.2703161239624023, "learning_rate": 1.6043671244302781e-06, "loss": 0.0208, "step": 694225 }, { "epoch": 6.83, "grad_norm": 1.3500045537948608, "learning_rate": 1.6042430019760298e-06, "loss": 0.0862, "step": 694250 }, { "epoch": 6.83, "grad_norm": 0.16314789652824402, "learning_rate": 1.604118879521781e-06, "loss": 0.0154, "step": 694275 }, { "epoch": 6.83, "grad_norm": 5.002839088439941, "learning_rate": 1.6039947570675326e-06, "loss": 0.0831, "step": 694300 }, { "epoch": 6.83, "grad_norm": 1.6906241178512573, "learning_rate": 1.6038706346132843e-06, "loss": 0.0312, "step": 694325 }, { "epoch": 6.83, "grad_norm": 6.498091697692871, "learning_rate": 1.6037465121590357e-06, "loss": 0.0764, "step": 694350 }, { "epoch": 6.83, "grad_norm": 14.642497062683105, "learning_rate": 1.6036223897047873e-06, "loss": 0.0269, "step": 694375 }, { "epoch": 6.83, "grad_norm": 7.202589988708496, "learning_rate": 1.603498267250539e-06, "loss": 0.0791, "step": 694400 }, { "epoch": 6.83, "grad_norm": 9.130158424377441, "learning_rate": 1.6033741447962904e-06, "loss": 0.0221, "step": 694425 }, { "epoch": 6.83, "grad_norm": 1.8994697332382202, "learning_rate": 1.603250022342042e-06, "loss": 0.0876, "step": 694450 }, { "epoch": 6.83, "grad_norm": 20.423789978027344, "learning_rate": 1.6031258998877936e-06, "loss": 0.038, "step": 694475 }, { "epoch": 6.83, "grad_norm": 0.9859779477119446, "learning_rate": 1.6030017774335448e-06, "loss": 0.0745, "step": 694500 }, { "epoch": 6.83, "grad_norm": 4.649537563323975, "learning_rate": 1.6028776549792965e-06, "loss": 0.0168, "step": 694525 }, { "epoch": 6.83, "grad_norm": 1.1650800704956055, "learning_rate": 1.6027535325250481e-06, "loss": 0.0865, "step": 694550 }, { "epoch": 6.83, "grad_norm": 18.997684478759766, "learning_rate": 1.6026294100707995e-06, "loss": 0.0227, "step": 694575 }, { "epoch": 6.83, "grad_norm": 7.388675689697266, "learning_rate": 1.6025052876165512e-06, "loss": 0.0724, "step": 694600 }, { "epoch": 6.83, "grad_norm": 6.327239036560059, "learning_rate": 1.6023811651623026e-06, "loss": 0.0263, "step": 694625 }, { "epoch": 6.83, "grad_norm": 2.5324647426605225, "learning_rate": 1.6022570427080542e-06, "loss": 0.1022, "step": 694650 }, { "epoch": 6.83, "grad_norm": 6.924366474151611, "learning_rate": 1.6021329202538059e-06, "loss": 0.0267, "step": 694675 }, { "epoch": 6.83, "grad_norm": 3.004310131072998, "learning_rate": 1.602008797799557e-06, "loss": 0.1014, "step": 694700 }, { "epoch": 6.83, "grad_norm": 6.48158073425293, "learning_rate": 1.6018846753453087e-06, "loss": 0.0316, "step": 694725 }, { "epoch": 6.83, "grad_norm": 1.5217210054397583, "learning_rate": 1.6017605528910604e-06, "loss": 0.0616, "step": 694750 }, { "epoch": 6.83, "grad_norm": 11.049263000488281, "learning_rate": 1.6016364304368118e-06, "loss": 0.0219, "step": 694775 }, { "epoch": 6.83, "grad_norm": 0.6176344156265259, "learning_rate": 1.6015123079825634e-06, "loss": 0.0917, "step": 694800 }, { "epoch": 6.83, "grad_norm": 8.998318672180176, "learning_rate": 1.601388185528315e-06, "loss": 0.0398, "step": 694825 }, { "epoch": 6.83, "grad_norm": 3.1098673343658447, "learning_rate": 1.6012640630740665e-06, "loss": 0.0948, "step": 694850 }, { "epoch": 6.83, "grad_norm": 7.467660903930664, "learning_rate": 1.601139940619818e-06, "loss": 0.0363, "step": 694875 }, { "epoch": 6.83, "grad_norm": 2.0832083225250244, "learning_rate": 1.6010158181655697e-06, "loss": 0.0651, "step": 694900 }, { "epoch": 6.83, "grad_norm": 7.8579792976379395, "learning_rate": 1.600891695711321e-06, "loss": 0.0291, "step": 694925 }, { "epoch": 6.83, "grad_norm": 0.45729953050613403, "learning_rate": 1.6007675732570726e-06, "loss": 0.0772, "step": 694950 }, { "epoch": 6.83, "grad_norm": 0.33015379309654236, "learning_rate": 1.6006434508028242e-06, "loss": 0.0193, "step": 694975 }, { "epoch": 6.83, "grad_norm": 2.2760252952575684, "learning_rate": 1.6005193283485756e-06, "loss": 0.0759, "step": 695000 }, { "epoch": 6.83, "grad_norm": 6.999947547912598, "learning_rate": 1.6003952058943273e-06, "loss": 0.0187, "step": 695025 }, { "epoch": 6.83, "grad_norm": 3.230487108230591, "learning_rate": 1.6002710834400787e-06, "loss": 0.0905, "step": 695050 }, { "epoch": 6.83, "grad_norm": 2.3889336585998535, "learning_rate": 1.6001469609858303e-06, "loss": 0.0245, "step": 695075 }, { "epoch": 6.83, "grad_norm": 1.6154686212539673, "learning_rate": 1.600022838531582e-06, "loss": 0.0839, "step": 695100 }, { "epoch": 6.83, "grad_norm": 2.255894660949707, "learning_rate": 1.5998987160773332e-06, "loss": 0.026, "step": 695125 }, { "epoch": 6.83, "grad_norm": 3.449191093444824, "learning_rate": 1.5997745936230848e-06, "loss": 0.0692, "step": 695150 }, { "epoch": 6.84, "grad_norm": 2.4150822162628174, "learning_rate": 1.5996504711688365e-06, "loss": 0.0263, "step": 695175 }, { "epoch": 6.84, "grad_norm": 1.7951366901397705, "learning_rate": 1.5995263487145879e-06, "loss": 0.0776, "step": 695200 }, { "epoch": 6.84, "grad_norm": 6.310841083526611, "learning_rate": 1.5994022262603395e-06, "loss": 0.0213, "step": 695225 }, { "epoch": 6.84, "grad_norm": 2.7537286281585693, "learning_rate": 1.5992781038060911e-06, "loss": 0.0576, "step": 695250 }, { "epoch": 6.84, "grad_norm": 2.7327721118927, "learning_rate": 1.5991539813518426e-06, "loss": 0.0305, "step": 695275 }, { "epoch": 6.84, "grad_norm": 1.7755849361419678, "learning_rate": 1.5990298588975942e-06, "loss": 0.0681, "step": 695300 }, { "epoch": 6.84, "grad_norm": 4.533075332641602, "learning_rate": 1.5989057364433458e-06, "loss": 0.0205, "step": 695325 }, { "epoch": 6.84, "grad_norm": 1.3825693130493164, "learning_rate": 1.598781613989097e-06, "loss": 0.0731, "step": 695350 }, { "epoch": 6.84, "grad_norm": 2.133716344833374, "learning_rate": 1.5986574915348487e-06, "loss": 0.0264, "step": 695375 }, { "epoch": 6.84, "grad_norm": 1.1398024559020996, "learning_rate": 1.5985333690806003e-06, "loss": 0.0718, "step": 695400 }, { "epoch": 6.84, "grad_norm": 4.810462474822998, "learning_rate": 1.5984092466263517e-06, "loss": 0.017, "step": 695425 }, { "epoch": 6.84, "grad_norm": 3.1389927864074707, "learning_rate": 1.5982851241721034e-06, "loss": 0.0664, "step": 695450 }, { "epoch": 6.84, "grad_norm": 13.515195846557617, "learning_rate": 1.5981610017178548e-06, "loss": 0.0274, "step": 695475 }, { "epoch": 6.84, "grad_norm": 0.05518261343240738, "learning_rate": 1.5980368792636064e-06, "loss": 0.095, "step": 695500 }, { "epoch": 6.84, "grad_norm": 12.068061828613281, "learning_rate": 1.597912756809358e-06, "loss": 0.0221, "step": 695525 }, { "epoch": 6.84, "grad_norm": 4.711225986480713, "learning_rate": 1.5977886343551095e-06, "loss": 0.0767, "step": 695550 }, { "epoch": 6.84, "grad_norm": 3.262218952178955, "learning_rate": 1.5976645119008611e-06, "loss": 0.0217, "step": 695575 }, { "epoch": 6.84, "grad_norm": 11.097817420959473, "learning_rate": 1.5975403894466128e-06, "loss": 0.0984, "step": 695600 }, { "epoch": 6.84, "grad_norm": 11.152510643005371, "learning_rate": 1.597416266992364e-06, "loss": 0.0302, "step": 695625 }, { "epoch": 6.84, "grad_norm": 1.5522795915603638, "learning_rate": 1.5972921445381156e-06, "loss": 0.0796, "step": 695650 }, { "epoch": 6.84, "grad_norm": 6.245821952819824, "learning_rate": 1.5971680220838672e-06, "loss": 0.0223, "step": 695675 }, { "epoch": 6.84, "grad_norm": 6.6748762130737305, "learning_rate": 1.5970438996296187e-06, "loss": 0.0837, "step": 695700 }, { "epoch": 6.84, "grad_norm": 0.5019834637641907, "learning_rate": 1.5969197771753703e-06, "loss": 0.0286, "step": 695725 }, { "epoch": 6.84, "grad_norm": 0.22363676130771637, "learning_rate": 1.596795654721122e-06, "loss": 0.0823, "step": 695750 }, { "epoch": 6.84, "grad_norm": 4.857510089874268, "learning_rate": 1.5966715322668734e-06, "loss": 0.0257, "step": 695775 }, { "epoch": 6.84, "grad_norm": 0.569378674030304, "learning_rate": 1.596547409812625e-06, "loss": 0.101, "step": 695800 }, { "epoch": 6.84, "grad_norm": 5.037980556488037, "learning_rate": 1.5964232873583766e-06, "loss": 0.0161, "step": 695825 }, { "epoch": 6.84, "grad_norm": 0.28936299681663513, "learning_rate": 1.5962991649041278e-06, "loss": 0.0618, "step": 695850 }, { "epoch": 6.84, "grad_norm": 12.833634376525879, "learning_rate": 1.5961750424498795e-06, "loss": 0.0413, "step": 695875 }, { "epoch": 6.84, "grad_norm": 4.456790447235107, "learning_rate": 1.596050919995631e-06, "loss": 0.0743, "step": 695900 }, { "epoch": 6.84, "grad_norm": 0.5108054280281067, "learning_rate": 1.5959267975413825e-06, "loss": 0.0125, "step": 695925 }, { "epoch": 6.84, "grad_norm": 1.883933663368225, "learning_rate": 1.5958026750871342e-06, "loss": 0.0823, "step": 695950 }, { "epoch": 6.84, "grad_norm": 8.954031944274902, "learning_rate": 1.5956785526328856e-06, "loss": 0.0302, "step": 695975 }, { "epoch": 6.84, "grad_norm": 3.2734529972076416, "learning_rate": 1.5955544301786372e-06, "loss": 0.0814, "step": 696000 }, { "epoch": 6.84, "grad_norm": 5.579872131347656, "learning_rate": 1.5954303077243889e-06, "loss": 0.0227, "step": 696025 }, { "epoch": 6.84, "grad_norm": 2.6773502826690674, "learning_rate": 1.5953111501683103e-06, "loss": 0.0985, "step": 696050 }, { "epoch": 6.84, "grad_norm": 2.5021135807037354, "learning_rate": 1.5951870277140615e-06, "loss": 0.0134, "step": 696075 }, { "epoch": 6.84, "grad_norm": 2.717581272125244, "learning_rate": 1.5950629052598132e-06, "loss": 0.0699, "step": 696100 }, { "epoch": 6.84, "grad_norm": 8.512435913085938, "learning_rate": 1.5949387828055648e-06, "loss": 0.0211, "step": 696125 }, { "epoch": 6.84, "grad_norm": 0.3777749240398407, "learning_rate": 1.5948146603513162e-06, "loss": 0.0958, "step": 696150 }, { "epoch": 6.84, "grad_norm": 9.425914764404297, "learning_rate": 1.5946905378970679e-06, "loss": 0.0413, "step": 696175 }, { "epoch": 6.85, "grad_norm": 1.302850604057312, "learning_rate": 1.5945664154428195e-06, "loss": 0.0701, "step": 696200 }, { "epoch": 6.85, "grad_norm": 8.103588104248047, "learning_rate": 1.594442292988571e-06, "loss": 0.0317, "step": 696225 }, { "epoch": 6.85, "grad_norm": 5.345479488372803, "learning_rate": 1.5943181705343225e-06, "loss": 0.0906, "step": 696250 }, { "epoch": 6.85, "grad_norm": 6.030215740203857, "learning_rate": 1.5941940480800742e-06, "loss": 0.0223, "step": 696275 }, { "epoch": 6.85, "grad_norm": 5.619185447692871, "learning_rate": 1.5940699256258254e-06, "loss": 0.0598, "step": 696300 }, { "epoch": 6.85, "grad_norm": 9.993063926696777, "learning_rate": 1.593945803171577e-06, "loss": 0.0219, "step": 696325 }, { "epoch": 6.85, "grad_norm": 1.8943392038345337, "learning_rate": 1.5938216807173287e-06, "loss": 0.0831, "step": 696350 }, { "epoch": 6.85, "grad_norm": 7.804492950439453, "learning_rate": 1.59369755826308e-06, "loss": 0.0229, "step": 696375 }, { "epoch": 6.85, "grad_norm": 4.562102317810059, "learning_rate": 1.5935734358088317e-06, "loss": 0.083, "step": 696400 }, { "epoch": 6.85, "grad_norm": 0.8474377393722534, "learning_rate": 1.5934493133545834e-06, "loss": 0.0202, "step": 696425 }, { "epoch": 6.85, "grad_norm": 0.5000454783439636, "learning_rate": 1.5933251909003348e-06, "loss": 0.0848, "step": 696450 }, { "epoch": 6.85, "grad_norm": 2.7421352863311768, "learning_rate": 1.5932010684460864e-06, "loss": 0.0199, "step": 696475 }, { "epoch": 6.85, "grad_norm": 0.6021493077278137, "learning_rate": 1.5930769459918376e-06, "loss": 0.0817, "step": 696500 }, { "epoch": 6.85, "grad_norm": 8.515087127685547, "learning_rate": 1.5929528235375893e-06, "loss": 0.0216, "step": 696525 }, { "epoch": 6.85, "grad_norm": 8.959542274475098, "learning_rate": 1.592828701083341e-06, "loss": 0.0476, "step": 696550 }, { "epoch": 6.85, "grad_norm": 8.051423072814941, "learning_rate": 1.5927045786290923e-06, "loss": 0.0346, "step": 696575 }, { "epoch": 6.85, "grad_norm": 1.6210894584655762, "learning_rate": 1.592580456174844e-06, "loss": 0.0762, "step": 696600 }, { "epoch": 6.85, "grad_norm": 12.058837890625, "learning_rate": 1.5924563337205956e-06, "loss": 0.0388, "step": 696625 }, { "epoch": 6.85, "grad_norm": 3.325705051422119, "learning_rate": 1.592332211266347e-06, "loss": 0.0609, "step": 696650 }, { "epoch": 6.85, "grad_norm": 9.936037063598633, "learning_rate": 1.5922080888120986e-06, "loss": 0.0313, "step": 696675 }, { "epoch": 6.85, "grad_norm": 3.920717239379883, "learning_rate": 1.5920839663578503e-06, "loss": 0.091, "step": 696700 }, { "epoch": 6.85, "grad_norm": 7.037431240081787, "learning_rate": 1.5919598439036015e-06, "loss": 0.0225, "step": 696725 }, { "epoch": 6.85, "grad_norm": 1.9680345058441162, "learning_rate": 1.5918357214493531e-06, "loss": 0.0684, "step": 696750 }, { "epoch": 6.85, "grad_norm": 6.884298324584961, "learning_rate": 1.5917115989951048e-06, "loss": 0.0181, "step": 696775 }, { "epoch": 6.85, "grad_norm": 1.9768697023391724, "learning_rate": 1.5915874765408562e-06, "loss": 0.0596, "step": 696800 }, { "epoch": 6.85, "grad_norm": 7.097586154937744, "learning_rate": 1.5914633540866078e-06, "loss": 0.0167, "step": 696825 }, { "epoch": 6.85, "grad_norm": 0.4580172598361969, "learning_rate": 1.5913392316323595e-06, "loss": 0.0939, "step": 696850 }, { "epoch": 6.85, "grad_norm": 1.0146043300628662, "learning_rate": 1.5912151091781109e-06, "loss": 0.0309, "step": 696875 }, { "epoch": 6.85, "grad_norm": 2.8138608932495117, "learning_rate": 1.5910909867238625e-06, "loss": 0.0619, "step": 696900 }, { "epoch": 6.85, "grad_norm": 9.047235488891602, "learning_rate": 1.5909668642696137e-06, "loss": 0.0345, "step": 696925 }, { "epoch": 6.85, "grad_norm": 10.757782936096191, "learning_rate": 1.5908427418153654e-06, "loss": 0.0966, "step": 696950 }, { "epoch": 6.85, "grad_norm": 8.611042976379395, "learning_rate": 1.590718619361117e-06, "loss": 0.048, "step": 696975 }, { "epoch": 6.85, "grad_norm": 1.3093550205230713, "learning_rate": 1.5905944969068684e-06, "loss": 0.0686, "step": 697000 }, { "epoch": 6.85, "grad_norm": 6.204124450683594, "learning_rate": 1.59047037445262e-06, "loss": 0.0203, "step": 697025 }, { "epoch": 6.85, "grad_norm": 4.946114540100098, "learning_rate": 1.5903462519983717e-06, "loss": 0.0696, "step": 697050 }, { "epoch": 6.85, "grad_norm": 6.2543044090271, "learning_rate": 1.5902221295441231e-06, "loss": 0.0194, "step": 697075 }, { "epoch": 6.85, "grad_norm": 9.966398239135742, "learning_rate": 1.5900980070898747e-06, "loss": 0.0602, "step": 697100 }, { "epoch": 6.85, "grad_norm": 8.619824409484863, "learning_rate": 1.5899738846356264e-06, "loss": 0.0193, "step": 697125 }, { "epoch": 6.85, "grad_norm": 0.568973183631897, "learning_rate": 1.5898497621813778e-06, "loss": 0.0774, "step": 697150 }, { "epoch": 6.85, "grad_norm": 2.8597772121429443, "learning_rate": 1.5897256397271294e-06, "loss": 0.0246, "step": 697175 }, { "epoch": 6.85, "grad_norm": 0.6922410726547241, "learning_rate": 1.589601517272881e-06, "loss": 0.0593, "step": 697200 }, { "epoch": 6.86, "grad_norm": 4.481533050537109, "learning_rate": 1.5894773948186323e-06, "loss": 0.0283, "step": 697225 }, { "epoch": 6.86, "grad_norm": 1.9312525987625122, "learning_rate": 1.589353272364384e-06, "loss": 0.0725, "step": 697250 }, { "epoch": 6.86, "grad_norm": 14.242471694946289, "learning_rate": 1.5892291499101356e-06, "loss": 0.0164, "step": 697275 }, { "epoch": 6.86, "grad_norm": 0.7805362343788147, "learning_rate": 1.589105027455887e-06, "loss": 0.075, "step": 697300 }, { "epoch": 6.86, "grad_norm": 4.161980628967285, "learning_rate": 1.5889809050016386e-06, "loss": 0.0268, "step": 697325 }, { "epoch": 6.86, "grad_norm": 5.003355979919434, "learning_rate": 1.58885678254739e-06, "loss": 0.0579, "step": 697350 }, { "epoch": 6.86, "grad_norm": 23.233858108520508, "learning_rate": 1.5887326600931417e-06, "loss": 0.0265, "step": 697375 }, { "epoch": 6.86, "grad_norm": 5.326773643493652, "learning_rate": 1.5886085376388933e-06, "loss": 0.0646, "step": 697400 }, { "epoch": 6.86, "grad_norm": 6.643249988555908, "learning_rate": 1.5884844151846445e-06, "loss": 0.0175, "step": 697425 }, { "epoch": 6.86, "grad_norm": 5.00677490234375, "learning_rate": 1.5883602927303962e-06, "loss": 0.0744, "step": 697450 }, { "epoch": 6.86, "grad_norm": 3.0707576274871826, "learning_rate": 1.5882361702761478e-06, "loss": 0.0254, "step": 697475 }, { "epoch": 6.86, "grad_norm": 2.512345552444458, "learning_rate": 1.5881120478218992e-06, "loss": 0.0682, "step": 697500 }, { "epoch": 6.86, "grad_norm": 6.778923511505127, "learning_rate": 1.5879879253676509e-06, "loss": 0.0188, "step": 697525 }, { "epoch": 6.86, "grad_norm": 3.0262017250061035, "learning_rate": 1.5878638029134025e-06, "loss": 0.0835, "step": 697550 }, { "epoch": 6.86, "grad_norm": 2.8023335933685303, "learning_rate": 1.587739680459154e-06, "loss": 0.0146, "step": 697575 }, { "epoch": 6.86, "grad_norm": 0.61484694480896, "learning_rate": 1.5876155580049055e-06, "loss": 0.0785, "step": 697600 }, { "epoch": 6.86, "grad_norm": 8.95858097076416, "learning_rate": 1.5874914355506572e-06, "loss": 0.026, "step": 697625 }, { "epoch": 6.86, "grad_norm": 1.3921375274658203, "learning_rate": 1.5873673130964084e-06, "loss": 0.0818, "step": 697650 }, { "epoch": 6.86, "grad_norm": 28.111024856567383, "learning_rate": 1.58724319064216e-06, "loss": 0.0304, "step": 697675 }, { "epoch": 6.86, "grad_norm": 0.597740888595581, "learning_rate": 1.5871190681879117e-06, "loss": 0.0681, "step": 697700 }, { "epoch": 6.86, "grad_norm": 3.1422841548919678, "learning_rate": 1.586994945733663e-06, "loss": 0.0264, "step": 697725 }, { "epoch": 6.86, "grad_norm": 2.6579549312591553, "learning_rate": 1.5868708232794147e-06, "loss": 0.0695, "step": 697750 }, { "epoch": 6.86, "grad_norm": 7.091208457946777, "learning_rate": 1.5867467008251661e-06, "loss": 0.0255, "step": 697775 }, { "epoch": 6.86, "grad_norm": 0.751067578792572, "learning_rate": 1.5866225783709178e-06, "loss": 0.0778, "step": 697800 }, { "epoch": 6.86, "grad_norm": 5.835856914520264, "learning_rate": 1.5864984559166694e-06, "loss": 0.019, "step": 697825 }, { "epoch": 6.86, "grad_norm": 0.036564990878105164, "learning_rate": 1.5863743334624206e-06, "loss": 0.0848, "step": 697850 }, { "epoch": 6.86, "grad_norm": 9.947407722473145, "learning_rate": 1.5862502110081723e-06, "loss": 0.0167, "step": 697875 }, { "epoch": 6.86, "grad_norm": 3.4494731426239014, "learning_rate": 1.5861260885539239e-06, "loss": 0.082, "step": 697900 }, { "epoch": 6.86, "grad_norm": 5.173383712768555, "learning_rate": 1.5860019660996753e-06, "loss": 0.0129, "step": 697925 }, { "epoch": 6.86, "grad_norm": 0.7039071321487427, "learning_rate": 1.585877843645427e-06, "loss": 0.072, "step": 697950 }, { "epoch": 6.86, "grad_norm": 10.55034351348877, "learning_rate": 1.5857537211911786e-06, "loss": 0.0158, "step": 697975 }, { "epoch": 6.86, "grad_norm": 0.5976617932319641, "learning_rate": 1.58562959873693e-06, "loss": 0.0725, "step": 698000 }, { "epoch": 6.86, "grad_norm": 11.025718688964844, "learning_rate": 1.5855054762826816e-06, "loss": 0.0275, "step": 698025 }, { "epoch": 6.86, "grad_norm": 6.16758918762207, "learning_rate": 1.5853813538284333e-06, "loss": 0.0655, "step": 698050 }, { "epoch": 6.86, "grad_norm": 2.254561424255371, "learning_rate": 1.5852572313741845e-06, "loss": 0.0158, "step": 698075 }, { "epoch": 6.86, "grad_norm": 2.5355026721954346, "learning_rate": 1.5851331089199361e-06, "loss": 0.0816, "step": 698100 }, { "epoch": 6.86, "grad_norm": 0.5892819166183472, "learning_rate": 1.5850089864656878e-06, "loss": 0.0181, "step": 698125 }, { "epoch": 6.86, "grad_norm": 0.9850940108299255, "learning_rate": 1.5848848640114392e-06, "loss": 0.0566, "step": 698150 }, { "epoch": 6.86, "grad_norm": 15.753676414489746, "learning_rate": 1.5847607415571908e-06, "loss": 0.0204, "step": 698175 }, { "epoch": 6.86, "grad_norm": 3.5950286388397217, "learning_rate": 1.5846366191029422e-06, "loss": 0.0662, "step": 698200 }, { "epoch": 6.87, "grad_norm": 10.621014595031738, "learning_rate": 1.5845124966486939e-06, "loss": 0.0319, "step": 698225 }, { "epoch": 6.87, "grad_norm": 2.2416350841522217, "learning_rate": 1.5843883741944455e-06, "loss": 0.0839, "step": 698250 }, { "epoch": 6.87, "grad_norm": 4.667062282562256, "learning_rate": 1.5842642517401967e-06, "loss": 0.0192, "step": 698275 }, { "epoch": 6.87, "grad_norm": 0.05032556504011154, "learning_rate": 1.5841401292859484e-06, "loss": 0.0923, "step": 698300 }, { "epoch": 6.87, "grad_norm": 10.99273681640625, "learning_rate": 1.5840160068317e-06, "loss": 0.0387, "step": 698325 }, { "epoch": 6.87, "grad_norm": 10.336926460266113, "learning_rate": 1.5838918843774514e-06, "loss": 0.074, "step": 698350 }, { "epoch": 6.87, "grad_norm": 8.788117408752441, "learning_rate": 1.583767761923203e-06, "loss": 0.0292, "step": 698375 }, { "epoch": 6.87, "grad_norm": 4.04503059387207, "learning_rate": 1.5836436394689547e-06, "loss": 0.0809, "step": 698400 }, { "epoch": 6.87, "grad_norm": 16.804288864135742, "learning_rate": 1.5835195170147061e-06, "loss": 0.0195, "step": 698425 }, { "epoch": 6.87, "grad_norm": 1.395042896270752, "learning_rate": 1.5833953945604577e-06, "loss": 0.0682, "step": 698450 }, { "epoch": 6.87, "grad_norm": 4.046686172485352, "learning_rate": 1.5832712721062094e-06, "loss": 0.0282, "step": 698475 }, { "epoch": 6.87, "grad_norm": 1.0838415622711182, "learning_rate": 1.5831471496519608e-06, "loss": 0.066, "step": 698500 }, { "epoch": 6.87, "grad_norm": 9.104425430297852, "learning_rate": 1.5830230271977124e-06, "loss": 0.0248, "step": 698525 }, { "epoch": 6.87, "grad_norm": 10.932881355285645, "learning_rate": 1.582898904743464e-06, "loss": 0.1021, "step": 698550 }, { "epoch": 6.87, "grad_norm": 24.828258514404297, "learning_rate": 1.5827747822892153e-06, "loss": 0.034, "step": 698575 }, { "epoch": 6.87, "grad_norm": 6.755196571350098, "learning_rate": 1.582650659834967e-06, "loss": 0.0627, "step": 698600 }, { "epoch": 6.87, "grad_norm": 9.767608642578125, "learning_rate": 1.5825265373807183e-06, "loss": 0.0206, "step": 698625 }, { "epoch": 6.87, "grad_norm": 1.159999132156372, "learning_rate": 1.58240241492647e-06, "loss": 0.0804, "step": 698650 }, { "epoch": 6.87, "grad_norm": 5.982708930969238, "learning_rate": 1.5822782924722216e-06, "loss": 0.0316, "step": 698675 }, { "epoch": 6.87, "grad_norm": 1.2637510299682617, "learning_rate": 1.582154170017973e-06, "loss": 0.0641, "step": 698700 }, { "epoch": 6.87, "grad_norm": 5.502474308013916, "learning_rate": 1.5820300475637247e-06, "loss": 0.019, "step": 698725 }, { "epoch": 6.87, "grad_norm": 1.9510453939437866, "learning_rate": 1.5819059251094763e-06, "loss": 0.0648, "step": 698750 }, { "epoch": 6.87, "grad_norm": 0.6795660257339478, "learning_rate": 1.5817818026552275e-06, "loss": 0.0257, "step": 698775 }, { "epoch": 6.87, "grad_norm": 1.0622186660766602, "learning_rate": 1.5816576802009792e-06, "loss": 0.0579, "step": 698800 }, { "epoch": 6.87, "grad_norm": 0.8526272177696228, "learning_rate": 1.5815335577467308e-06, "loss": 0.0211, "step": 698825 }, { "epoch": 6.87, "grad_norm": 2.4040396213531494, "learning_rate": 1.5814094352924822e-06, "loss": 0.1002, "step": 698850 }, { "epoch": 6.87, "grad_norm": 12.231901168823242, "learning_rate": 1.5812853128382338e-06, "loss": 0.037, "step": 698875 }, { "epoch": 6.87, "grad_norm": 0.13554216921329498, "learning_rate": 1.5811611903839855e-06, "loss": 0.078, "step": 698900 }, { "epoch": 6.87, "grad_norm": 1.4173543453216553, "learning_rate": 1.581037067929737e-06, "loss": 0.0209, "step": 698925 }, { "epoch": 6.87, "grad_norm": 7.1168437004089355, "learning_rate": 1.5809129454754885e-06, "loss": 0.0607, "step": 698950 }, { "epoch": 6.87, "grad_norm": 14.439072608947754, "learning_rate": 1.5807888230212402e-06, "loss": 0.0244, "step": 698975 }, { "epoch": 6.87, "grad_norm": 1.4146466255187988, "learning_rate": 1.5806647005669914e-06, "loss": 0.0934, "step": 699000 }, { "epoch": 6.87, "grad_norm": 4.58484411239624, "learning_rate": 1.580540578112743e-06, "loss": 0.0199, "step": 699025 }, { "epoch": 6.87, "grad_norm": 1.0240236520767212, "learning_rate": 1.5804164556584944e-06, "loss": 0.0766, "step": 699050 }, { "epoch": 6.87, "grad_norm": 6.505090236663818, "learning_rate": 1.580292333204246e-06, "loss": 0.0301, "step": 699075 }, { "epoch": 6.87, "grad_norm": 0.26550373435020447, "learning_rate": 1.5801682107499977e-06, "loss": 0.0702, "step": 699100 }, { "epoch": 6.87, "grad_norm": 9.184585571289062, "learning_rate": 1.5800440882957491e-06, "loss": 0.0346, "step": 699125 }, { "epoch": 6.87, "grad_norm": 0.748529314994812, "learning_rate": 1.5799199658415008e-06, "loss": 0.0723, "step": 699150 }, { "epoch": 6.87, "grad_norm": 5.154237747192383, "learning_rate": 1.5797958433872524e-06, "loss": 0.0234, "step": 699175 }, { "epoch": 6.87, "grad_norm": 1.2427136898040771, "learning_rate": 1.5796717209330036e-06, "loss": 0.0574, "step": 699200 }, { "epoch": 6.87, "grad_norm": 0.6670891046524048, "learning_rate": 1.5795475984787553e-06, "loss": 0.0248, "step": 699225 }, { "epoch": 6.88, "grad_norm": 1.8698779344558716, "learning_rate": 1.5794234760245069e-06, "loss": 0.0615, "step": 699250 }, { "epoch": 6.88, "grad_norm": 6.6332831382751465, "learning_rate": 1.5792993535702583e-06, "loss": 0.0187, "step": 699275 }, { "epoch": 6.88, "grad_norm": 0.2773459851741791, "learning_rate": 1.57917523111601e-06, "loss": 0.0639, "step": 699300 }, { "epoch": 6.88, "grad_norm": 12.459665298461914, "learning_rate": 1.5790511086617616e-06, "loss": 0.0376, "step": 699325 }, { "epoch": 6.88, "grad_norm": 2.979175567626953, "learning_rate": 1.578926986207513e-06, "loss": 0.0815, "step": 699350 }, { "epoch": 6.88, "grad_norm": 0.8025753498077393, "learning_rate": 1.5788028637532646e-06, "loss": 0.0131, "step": 699375 }, { "epoch": 6.88, "grad_norm": 3.8482608795166016, "learning_rate": 1.5786787412990163e-06, "loss": 0.0765, "step": 699400 }, { "epoch": 6.88, "grad_norm": 12.428739547729492, "learning_rate": 1.5785546188447675e-06, "loss": 0.036, "step": 699425 }, { "epoch": 6.88, "grad_norm": 0.5370901823043823, "learning_rate": 1.5784304963905191e-06, "loss": 0.0752, "step": 699450 }, { "epoch": 6.88, "grad_norm": 6.545009613037109, "learning_rate": 1.5783063739362705e-06, "loss": 0.0175, "step": 699475 }, { "epoch": 6.88, "grad_norm": 2.5134148597717285, "learning_rate": 1.5781822514820222e-06, "loss": 0.0716, "step": 699500 }, { "epoch": 6.88, "grad_norm": 2.632869005203247, "learning_rate": 1.5780581290277738e-06, "loss": 0.012, "step": 699525 }, { "epoch": 6.88, "grad_norm": 0.8065983653068542, "learning_rate": 1.5779340065735252e-06, "loss": 0.0834, "step": 699550 }, { "epoch": 6.88, "grad_norm": 0.7634251117706299, "learning_rate": 1.5778098841192769e-06, "loss": 0.0251, "step": 699575 }, { "epoch": 6.88, "grad_norm": 3.2852535247802734, "learning_rate": 1.5776857616650285e-06, "loss": 0.0694, "step": 699600 }, { "epoch": 6.88, "grad_norm": 5.208600044250488, "learning_rate": 1.5775616392107797e-06, "loss": 0.0187, "step": 699625 }, { "epoch": 6.88, "grad_norm": 2.1948325634002686, "learning_rate": 1.5774375167565314e-06, "loss": 0.0858, "step": 699650 }, { "epoch": 6.88, "grad_norm": 8.65114688873291, "learning_rate": 1.577313394302283e-06, "loss": 0.0146, "step": 699675 }, { "epoch": 6.88, "grad_norm": 3.561094284057617, "learning_rate": 1.5771892718480344e-06, "loss": 0.0651, "step": 699700 }, { "epoch": 6.88, "grad_norm": 7.6855926513671875, "learning_rate": 1.577065149393786e-06, "loss": 0.0255, "step": 699725 }, { "epoch": 6.88, "grad_norm": 0.24866428971290588, "learning_rate": 1.5769410269395377e-06, "loss": 0.0817, "step": 699750 }, { "epoch": 6.88, "grad_norm": 10.809806823730469, "learning_rate": 1.576816904485289e-06, "loss": 0.037, "step": 699775 }, { "epoch": 6.88, "grad_norm": 6.643016815185547, "learning_rate": 1.5766927820310407e-06, "loss": 0.097, "step": 699800 }, { "epoch": 6.88, "grad_norm": 6.9874267578125, "learning_rate": 1.5765686595767924e-06, "loss": 0.0301, "step": 699825 }, { "epoch": 6.88, "grad_norm": 3.279423952102661, "learning_rate": 1.5764445371225438e-06, "loss": 0.0754, "step": 699850 }, { "epoch": 6.88, "grad_norm": 7.627742290496826, "learning_rate": 1.5763204146682954e-06, "loss": 0.0357, "step": 699875 }, { "epoch": 6.88, "grad_norm": 1.5094162225723267, "learning_rate": 1.5761962922140466e-06, "loss": 0.123, "step": 699900 }, { "epoch": 6.88, "grad_norm": 11.841743469238281, "learning_rate": 1.5760721697597983e-06, "loss": 0.0282, "step": 699925 }, { "epoch": 6.88, "grad_norm": 4.119723796844482, "learning_rate": 1.57594804730555e-06, "loss": 0.0853, "step": 699950 }, { "epoch": 6.88, "grad_norm": 8.05634880065918, "learning_rate": 1.5758239248513013e-06, "loss": 0.0399, "step": 699975 }, { "epoch": 6.88, "grad_norm": 1.6822577714920044, "learning_rate": 1.575699802397053e-06, "loss": 0.1043, "step": 700000 }, { "epoch": 6.88, "eval_loss": 0.8254198431968689, "eval_runtime": 6115.4455, "eval_samples_per_second": 1.548, "eval_steps_per_second": 0.194, "eval_wer": 0.11218051889525527, "step": 700000 }, { "epoch": 6.88, "grad_norm": 12.345648765563965, "learning_rate": 1.5755756799428046e-06, "loss": 0.0301, "step": 700025 }, { "epoch": 6.88, "grad_norm": 1.6857198476791382, "learning_rate": 1.575456522386726e-06, "loss": 0.0588, "step": 700050 }, { "epoch": 6.88, "grad_norm": 8.00097370147705, "learning_rate": 1.5753323999324775e-06, "loss": 0.0346, "step": 700075 }, { "epoch": 6.88, "grad_norm": 3.0856831073760986, "learning_rate": 1.5752082774782291e-06, "loss": 0.0756, "step": 700100 }, { "epoch": 6.88, "grad_norm": 5.127658843994141, "learning_rate": 1.5750841550239808e-06, "loss": 0.0143, "step": 700125 }, { "epoch": 6.88, "grad_norm": 3.6675033569335938, "learning_rate": 1.574960032569732e-06, "loss": 0.0728, "step": 700150 }, { "epoch": 6.88, "grad_norm": 7.951765537261963, "learning_rate": 1.5748359101154836e-06, "loss": 0.0114, "step": 700175 }, { "epoch": 6.88, "grad_norm": 0.47130391001701355, "learning_rate": 1.5747117876612352e-06, "loss": 0.1071, "step": 700200 }, { "epoch": 6.88, "grad_norm": 18.807601928710938, "learning_rate": 1.5745876652069867e-06, "loss": 0.0291, "step": 700225 }, { "epoch": 6.88, "grad_norm": 2.80423903465271, "learning_rate": 1.5744635427527383e-06, "loss": 0.0795, "step": 700250 }, { "epoch": 6.89, "grad_norm": 6.33284854888916, "learning_rate": 1.57433942029849e-06, "loss": 0.0138, "step": 700275 }, { "epoch": 6.89, "grad_norm": 1.8984417915344238, "learning_rate": 1.5742152978442413e-06, "loss": 0.0528, "step": 700300 }, { "epoch": 6.89, "grad_norm": 12.390748023986816, "learning_rate": 1.574091175389993e-06, "loss": 0.0302, "step": 700325 }, { "epoch": 6.89, "grad_norm": 5.3354811668396, "learning_rate": 1.5739670529357446e-06, "loss": 0.0784, "step": 700350 }, { "epoch": 6.89, "grad_norm": 6.421738624572754, "learning_rate": 1.5738429304814958e-06, "loss": 0.0247, "step": 700375 }, { "epoch": 6.89, "grad_norm": 5.093413352966309, "learning_rate": 1.5737188080272475e-06, "loss": 0.0942, "step": 700400 }, { "epoch": 6.89, "grad_norm": 2.7637689113616943, "learning_rate": 1.573594685572999e-06, "loss": 0.0157, "step": 700425 }, { "epoch": 6.89, "grad_norm": 2.731513738632202, "learning_rate": 1.5734755280169206e-06, "loss": 0.0976, "step": 700450 }, { "epoch": 6.89, "grad_norm": 5.8194499015808105, "learning_rate": 1.573351405562672e-06, "loss": 0.0236, "step": 700475 }, { "epoch": 6.89, "grad_norm": 4.86795711517334, "learning_rate": 1.5732272831084236e-06, "loss": 0.0719, "step": 700500 }, { "epoch": 6.89, "grad_norm": 6.43988561630249, "learning_rate": 1.5731031606541752e-06, "loss": 0.0242, "step": 700525 }, { "epoch": 6.89, "grad_norm": 3.9639155864715576, "learning_rate": 1.5729790381999267e-06, "loss": 0.0783, "step": 700550 }, { "epoch": 6.89, "grad_norm": 2.161137342453003, "learning_rate": 1.5728549157456783e-06, "loss": 0.01, "step": 700575 }, { "epoch": 6.89, "grad_norm": 3.114201068878174, "learning_rate": 1.57273079329143e-06, "loss": 0.0604, "step": 700600 }, { "epoch": 6.89, "grad_norm": 1.661431908607483, "learning_rate": 1.5726066708371811e-06, "loss": 0.0097, "step": 700625 }, { "epoch": 6.89, "grad_norm": 1.9828475713729858, "learning_rate": 1.5724825483829328e-06, "loss": 0.0589, "step": 700650 }, { "epoch": 6.89, "grad_norm": 9.29601001739502, "learning_rate": 1.5723584259286842e-06, "loss": 0.0175, "step": 700675 }, { "epoch": 6.89, "grad_norm": 5.864659786224365, "learning_rate": 1.5722343034744358e-06, "loss": 0.066, "step": 700700 }, { "epoch": 6.89, "grad_norm": 9.189750671386719, "learning_rate": 1.5721101810201875e-06, "loss": 0.0196, "step": 700725 }, { "epoch": 6.89, "grad_norm": 3.3026812076568604, "learning_rate": 1.571986058565939e-06, "loss": 0.0798, "step": 700750 }, { "epoch": 6.89, "grad_norm": 1.3330743312835693, "learning_rate": 1.5718619361116905e-06, "loss": 0.0362, "step": 700775 }, { "epoch": 6.89, "grad_norm": 2.1687777042388916, "learning_rate": 1.5717378136574422e-06, "loss": 0.0878, "step": 700800 }, { "epoch": 6.89, "grad_norm": 1.0645955801010132, "learning_rate": 1.5716136912031934e-06, "loss": 0.0135, "step": 700825 }, { "epoch": 6.89, "grad_norm": 10.802363395690918, "learning_rate": 1.571489568748945e-06, "loss": 0.0625, "step": 700850 }, { "epoch": 6.89, "grad_norm": 6.216047763824463, "learning_rate": 1.5713654462946967e-06, "loss": 0.0228, "step": 700875 }, { "epoch": 6.89, "grad_norm": 0.3173828721046448, "learning_rate": 1.571241323840448e-06, "loss": 0.102, "step": 700900 }, { "epoch": 6.89, "grad_norm": 12.681836128234863, "learning_rate": 1.5711172013861997e-06, "loss": 0.0323, "step": 700925 }, { "epoch": 6.89, "grad_norm": 6.065628528594971, "learning_rate": 1.5709930789319513e-06, "loss": 0.0854, "step": 700950 }, { "epoch": 6.89, "grad_norm": 0.35435307025909424, "learning_rate": 1.5708689564777028e-06, "loss": 0.0254, "step": 700975 }, { "epoch": 6.89, "grad_norm": 3.8387420177459717, "learning_rate": 1.5707448340234544e-06, "loss": 0.0695, "step": 701000 }, { "epoch": 6.89, "grad_norm": 3.003495454788208, "learning_rate": 1.570620711569206e-06, "loss": 0.0166, "step": 701025 }, { "epoch": 6.89, "grad_norm": 0.3438723683357239, "learning_rate": 1.5704965891149572e-06, "loss": 0.0649, "step": 701050 }, { "epoch": 6.89, "grad_norm": 6.895633220672607, "learning_rate": 1.5703724666607089e-06, "loss": 0.0318, "step": 701075 }, { "epoch": 6.89, "grad_norm": 0.33986684679985046, "learning_rate": 1.5702483442064603e-06, "loss": 0.0717, "step": 701100 }, { "epoch": 6.89, "grad_norm": 8.290099143981934, "learning_rate": 1.570124221752212e-06, "loss": 0.0326, "step": 701125 }, { "epoch": 6.89, "grad_norm": 0.06568699330091476, "learning_rate": 1.5700000992979636e-06, "loss": 0.0735, "step": 701150 }, { "epoch": 6.89, "grad_norm": 5.39490270614624, "learning_rate": 1.569875976843715e-06, "loss": 0.0279, "step": 701175 }, { "epoch": 6.89, "grad_norm": 4.44813346862793, "learning_rate": 1.5697518543894666e-06, "loss": 0.0985, "step": 701200 }, { "epoch": 6.89, "grad_norm": 13.091313362121582, "learning_rate": 1.5696277319352183e-06, "loss": 0.0338, "step": 701225 }, { "epoch": 6.89, "grad_norm": 1.248266339302063, "learning_rate": 1.5695036094809695e-06, "loss": 0.0526, "step": 701250 }, { "epoch": 6.9, "grad_norm": 8.838913917541504, "learning_rate": 1.5693794870267211e-06, "loss": 0.019, "step": 701275 }, { "epoch": 6.9, "grad_norm": 0.8614417910575867, "learning_rate": 1.5692553645724728e-06, "loss": 0.0647, "step": 701300 }, { "epoch": 6.9, "grad_norm": 6.373592376708984, "learning_rate": 1.5691312421182242e-06, "loss": 0.0198, "step": 701325 }, { "epoch": 6.9, "grad_norm": 1.2880834341049194, "learning_rate": 1.5690071196639758e-06, "loss": 0.0563, "step": 701350 }, { "epoch": 6.9, "grad_norm": 1.4993113279342651, "learning_rate": 1.5688829972097274e-06, "loss": 0.0328, "step": 701375 }, { "epoch": 6.9, "grad_norm": 1.2782262563705444, "learning_rate": 1.5687588747554789e-06, "loss": 0.0771, "step": 701400 }, { "epoch": 6.9, "grad_norm": 1.5069520473480225, "learning_rate": 1.5686347523012305e-06, "loss": 0.0142, "step": 701425 }, { "epoch": 6.9, "grad_norm": 4.820407390594482, "learning_rate": 1.5685106298469821e-06, "loss": 0.0816, "step": 701450 }, { "epoch": 6.9, "grad_norm": 3.866237163543701, "learning_rate": 1.5683865073927336e-06, "loss": 0.017, "step": 701475 }, { "epoch": 6.9, "grad_norm": 2.3370251655578613, "learning_rate": 1.5682623849384852e-06, "loss": 0.0832, "step": 701500 }, { "epoch": 6.9, "grad_norm": 15.096442222595215, "learning_rate": 1.5681382624842364e-06, "loss": 0.0157, "step": 701525 }, { "epoch": 6.9, "grad_norm": 3.210897207260132, "learning_rate": 1.568014140029988e-06, "loss": 0.0642, "step": 701550 }, { "epoch": 6.9, "grad_norm": 10.231345176696777, "learning_rate": 1.5678900175757397e-06, "loss": 0.019, "step": 701575 }, { "epoch": 6.9, "grad_norm": 1.7636967897415161, "learning_rate": 1.567765895121491e-06, "loss": 0.0792, "step": 701600 }, { "epoch": 6.9, "grad_norm": 13.004716873168945, "learning_rate": 1.5676417726672427e-06, "loss": 0.0287, "step": 701625 }, { "epoch": 6.9, "grad_norm": 0.3100879490375519, "learning_rate": 1.5675176502129944e-06, "loss": 0.0539, "step": 701650 }, { "epoch": 6.9, "grad_norm": 5.609972953796387, "learning_rate": 1.5673935277587458e-06, "loss": 0.0228, "step": 701675 }, { "epoch": 6.9, "grad_norm": 2.627031087875366, "learning_rate": 1.5672694053044974e-06, "loss": 0.0669, "step": 701700 }, { "epoch": 6.9, "grad_norm": 13.63291072845459, "learning_rate": 1.567145282850249e-06, "loss": 0.0292, "step": 701725 }, { "epoch": 6.9, "grad_norm": 0.8348186612129211, "learning_rate": 1.5670211603960003e-06, "loss": 0.0673, "step": 701750 }, { "epoch": 6.9, "grad_norm": 8.975163459777832, "learning_rate": 1.566897037941752e-06, "loss": 0.0202, "step": 701775 }, { "epoch": 6.9, "grad_norm": 3.852736234664917, "learning_rate": 1.5667729154875035e-06, "loss": 0.0542, "step": 701800 }, { "epoch": 6.9, "grad_norm": 12.91942024230957, "learning_rate": 1.566648793033255e-06, "loss": 0.0194, "step": 701825 }, { "epoch": 6.9, "grad_norm": 1.1309313774108887, "learning_rate": 1.5665246705790066e-06, "loss": 0.0948, "step": 701850 }, { "epoch": 6.9, "grad_norm": 0.2395620197057724, "learning_rate": 1.5664005481247582e-06, "loss": 0.0164, "step": 701875 }, { "epoch": 6.9, "grad_norm": 2.8833577632904053, "learning_rate": 1.5662764256705097e-06, "loss": 0.0899, "step": 701900 }, { "epoch": 6.9, "grad_norm": 9.757553100585938, "learning_rate": 1.5661523032162613e-06, "loss": 0.0187, "step": 701925 }, { "epoch": 6.9, "grad_norm": 3.959827423095703, "learning_rate": 1.5660281807620125e-06, "loss": 0.0618, "step": 701950 }, { "epoch": 6.9, "grad_norm": 4.412043571472168, "learning_rate": 1.5659040583077641e-06, "loss": 0.0106, "step": 701975 }, { "epoch": 6.9, "grad_norm": 3.5839998722076416, "learning_rate": 1.5657799358535158e-06, "loss": 0.0909, "step": 702000 }, { "epoch": 6.9, "grad_norm": 13.069259643554688, "learning_rate": 1.5656558133992672e-06, "loss": 0.0248, "step": 702025 }, { "epoch": 6.9, "grad_norm": 3.103733539581299, "learning_rate": 1.5655316909450188e-06, "loss": 0.0711, "step": 702050 }, { "epoch": 6.9, "grad_norm": 10.496851921081543, "learning_rate": 1.5654075684907705e-06, "loss": 0.0177, "step": 702075 }, { "epoch": 6.9, "grad_norm": 3.54937744140625, "learning_rate": 1.565283446036522e-06, "loss": 0.0576, "step": 702100 }, { "epoch": 6.9, "grad_norm": 11.640142440795898, "learning_rate": 1.5651593235822735e-06, "loss": 0.0252, "step": 702125 }, { "epoch": 6.9, "grad_norm": 0.02751782536506653, "learning_rate": 1.5650352011280252e-06, "loss": 0.0556, "step": 702150 }, { "epoch": 6.9, "grad_norm": 15.384445190429688, "learning_rate": 1.5649110786737764e-06, "loss": 0.0269, "step": 702175 }, { "epoch": 6.9, "grad_norm": 1.3975321054458618, "learning_rate": 1.564786956219528e-06, "loss": 0.0532, "step": 702200 }, { "epoch": 6.9, "grad_norm": 3.9217400550842285, "learning_rate": 1.5646628337652796e-06, "loss": 0.0331, "step": 702225 }, { "epoch": 6.9, "grad_norm": 3.2633893489837646, "learning_rate": 1.564538711311031e-06, "loss": 0.0884, "step": 702250 }, { "epoch": 6.9, "grad_norm": 19.102724075317383, "learning_rate": 1.5644145888567827e-06, "loss": 0.0186, "step": 702275 }, { "epoch": 6.91, "grad_norm": 3.9715840816497803, "learning_rate": 1.5642904664025343e-06, "loss": 0.0653, "step": 702300 }, { "epoch": 6.91, "grad_norm": 6.4911346435546875, "learning_rate": 1.5641663439482858e-06, "loss": 0.0233, "step": 702325 }, { "epoch": 6.91, "grad_norm": 1.874297857284546, "learning_rate": 1.5640422214940374e-06, "loss": 0.0851, "step": 702350 }, { "epoch": 6.91, "grad_norm": 11.35919189453125, "learning_rate": 1.5639180990397886e-06, "loss": 0.0279, "step": 702375 }, { "epoch": 6.91, "grad_norm": 5.327411651611328, "learning_rate": 1.5637939765855402e-06, "loss": 0.0627, "step": 702400 }, { "epoch": 6.91, "grad_norm": 5.790736198425293, "learning_rate": 1.5636698541312919e-06, "loss": 0.0281, "step": 702425 }, { "epoch": 6.91, "grad_norm": 6.118747711181641, "learning_rate": 1.5635457316770433e-06, "loss": 0.0643, "step": 702450 }, { "epoch": 6.91, "grad_norm": 5.691955089569092, "learning_rate": 1.563421609222795e-06, "loss": 0.0176, "step": 702475 }, { "epoch": 6.91, "grad_norm": 3.5110161304473877, "learning_rate": 1.5632974867685466e-06, "loss": 0.0839, "step": 702500 }, { "epoch": 6.91, "grad_norm": 6.829853534698486, "learning_rate": 1.563173364314298e-06, "loss": 0.0312, "step": 702525 }, { "epoch": 6.91, "grad_norm": 3.6412923336029053, "learning_rate": 1.5630542067582194e-06, "loss": 0.0766, "step": 702550 }, { "epoch": 6.91, "grad_norm": 7.4591898918151855, "learning_rate": 1.562930084303971e-06, "loss": 0.0229, "step": 702575 }, { "epoch": 6.91, "grad_norm": 2.149153470993042, "learning_rate": 1.5628059618497227e-06, "loss": 0.0685, "step": 702600 }, { "epoch": 6.91, "grad_norm": 8.513665199279785, "learning_rate": 1.562681839395474e-06, "loss": 0.0294, "step": 702625 }, { "epoch": 6.91, "grad_norm": 0.4383265972137451, "learning_rate": 1.5625577169412256e-06, "loss": 0.0925, "step": 702650 }, { "epoch": 6.91, "grad_norm": 11.055891990661621, "learning_rate": 1.5624335944869772e-06, "loss": 0.0297, "step": 702675 }, { "epoch": 6.91, "grad_norm": 8.83934211730957, "learning_rate": 1.5623094720327286e-06, "loss": 0.0883, "step": 702700 }, { "epoch": 6.91, "grad_norm": 6.962925910949707, "learning_rate": 1.5621853495784803e-06, "loss": 0.0205, "step": 702725 }, { "epoch": 6.91, "grad_norm": 7.338497161865234, "learning_rate": 1.5620612271242319e-06, "loss": 0.0747, "step": 702750 }, { "epoch": 6.91, "grad_norm": 7.777942180633545, "learning_rate": 1.5619371046699833e-06, "loss": 0.0235, "step": 702775 }, { "epoch": 6.91, "grad_norm": 2.969273328781128, "learning_rate": 1.561812982215735e-06, "loss": 0.0585, "step": 702800 }, { "epoch": 6.91, "grad_norm": 4.617209434509277, "learning_rate": 1.5616888597614866e-06, "loss": 0.0195, "step": 702825 }, { "epoch": 6.91, "grad_norm": 4.615122318267822, "learning_rate": 1.5615647373072378e-06, "loss": 0.1098, "step": 702850 }, { "epoch": 6.91, "grad_norm": 13.794920921325684, "learning_rate": 1.5614406148529894e-06, "loss": 0.0365, "step": 702875 }, { "epoch": 6.91, "grad_norm": 5.8503336906433105, "learning_rate": 1.561316492398741e-06, "loss": 0.0743, "step": 702900 }, { "epoch": 6.91, "grad_norm": 10.439397811889648, "learning_rate": 1.5611923699444925e-06, "loss": 0.021, "step": 702925 }, { "epoch": 6.91, "grad_norm": 7.608665466308594, "learning_rate": 1.5610682474902441e-06, "loss": 0.0819, "step": 702950 }, { "epoch": 6.91, "grad_norm": 2.040459632873535, "learning_rate": 1.5609441250359955e-06, "loss": 0.0484, "step": 702975 }, { "epoch": 6.91, "grad_norm": 4.331628799438477, "learning_rate": 1.5608200025817472e-06, "loss": 0.0658, "step": 703000 }, { "epoch": 6.91, "grad_norm": 12.337823867797852, "learning_rate": 1.5606958801274988e-06, "loss": 0.0302, "step": 703025 }, { "epoch": 6.91, "grad_norm": 12.287278175354004, "learning_rate": 1.5605717576732502e-06, "loss": 0.0601, "step": 703050 }, { "epoch": 6.91, "grad_norm": 11.911629676818848, "learning_rate": 1.5604476352190019e-06, "loss": 0.0286, "step": 703075 }, { "epoch": 6.91, "grad_norm": 2.1112782955169678, "learning_rate": 1.5603235127647535e-06, "loss": 0.0801, "step": 703100 }, { "epoch": 6.91, "grad_norm": 2.407766103744507, "learning_rate": 1.5601993903105047e-06, "loss": 0.0195, "step": 703125 }, { "epoch": 6.91, "grad_norm": 2.2810750007629395, "learning_rate": 1.5600752678562564e-06, "loss": 0.0591, "step": 703150 }, { "epoch": 6.91, "grad_norm": 3.253011465072632, "learning_rate": 1.559951145402008e-06, "loss": 0.0175, "step": 703175 }, { "epoch": 6.91, "grad_norm": 6.085141181945801, "learning_rate": 1.5598270229477594e-06, "loss": 0.0826, "step": 703200 }, { "epoch": 6.91, "grad_norm": 2.5712757110595703, "learning_rate": 1.559702900493511e-06, "loss": 0.0151, "step": 703225 }, { "epoch": 6.91, "grad_norm": 7.375621318817139, "learning_rate": 1.5595787780392627e-06, "loss": 0.0898, "step": 703250 }, { "epoch": 6.91, "grad_norm": 14.312761306762695, "learning_rate": 1.5594546555850141e-06, "loss": 0.0237, "step": 703275 }, { "epoch": 6.91, "grad_norm": 0.12150570750236511, "learning_rate": 1.5593305331307657e-06, "loss": 0.0893, "step": 703300 }, { "epoch": 6.92, "grad_norm": 11.441879272460938, "learning_rate": 1.5592064106765174e-06, "loss": 0.0237, "step": 703325 }, { "epoch": 6.92, "grad_norm": 6.128257751464844, "learning_rate": 1.5590822882222686e-06, "loss": 0.0851, "step": 703350 }, { "epoch": 6.92, "grad_norm": 13.927759170532227, "learning_rate": 1.5589581657680202e-06, "loss": 0.0323, "step": 703375 }, { "epoch": 6.92, "grad_norm": 8.012165069580078, "learning_rate": 1.5588340433137716e-06, "loss": 0.0661, "step": 703400 }, { "epoch": 6.92, "grad_norm": 0.4016008973121643, "learning_rate": 1.5587099208595233e-06, "loss": 0.0192, "step": 703425 }, { "epoch": 6.92, "grad_norm": 7.424834251403809, "learning_rate": 1.558585798405275e-06, "loss": 0.0802, "step": 703450 }, { "epoch": 6.92, "grad_norm": 9.245019912719727, "learning_rate": 1.5584616759510263e-06, "loss": 0.0275, "step": 703475 }, { "epoch": 6.92, "grad_norm": 1.0848513841629028, "learning_rate": 1.558337553496778e-06, "loss": 0.0727, "step": 703500 }, { "epoch": 6.92, "grad_norm": 1.3242706060409546, "learning_rate": 1.5582134310425296e-06, "loss": 0.022, "step": 703525 }, { "epoch": 6.92, "grad_norm": 4.540891170501709, "learning_rate": 1.5580893085882808e-06, "loss": 0.0392, "step": 703550 }, { "epoch": 6.92, "grad_norm": 1.3540427684783936, "learning_rate": 1.5579651861340325e-06, "loss": 0.0212, "step": 703575 }, { "epoch": 6.92, "grad_norm": 0.5300036072731018, "learning_rate": 1.557841063679784e-06, "loss": 0.0824, "step": 703600 }, { "epoch": 6.92, "grad_norm": 1.9119658470153809, "learning_rate": 1.5577169412255355e-06, "loss": 0.0271, "step": 703625 }, { "epoch": 6.92, "grad_norm": 10.943825721740723, "learning_rate": 1.5575928187712871e-06, "loss": 0.0731, "step": 703650 }, { "epoch": 6.92, "grad_norm": 21.103065490722656, "learning_rate": 1.5574686963170388e-06, "loss": 0.0348, "step": 703675 }, { "epoch": 6.92, "grad_norm": 0.24045708775520325, "learning_rate": 1.5573445738627902e-06, "loss": 0.0821, "step": 703700 }, { "epoch": 6.92, "grad_norm": 10.849132537841797, "learning_rate": 1.5572204514085418e-06, "loss": 0.0299, "step": 703725 }, { "epoch": 6.92, "grad_norm": 3.5667598247528076, "learning_rate": 1.5570963289542935e-06, "loss": 0.0798, "step": 703750 }, { "epoch": 6.92, "grad_norm": 7.656249523162842, "learning_rate": 1.5569722065000447e-06, "loss": 0.0108, "step": 703775 }, { "epoch": 6.92, "grad_norm": 5.245134353637695, "learning_rate": 1.5568480840457963e-06, "loss": 0.0779, "step": 703800 }, { "epoch": 6.92, "grad_norm": 11.708715438842773, "learning_rate": 1.5567239615915477e-06, "loss": 0.0172, "step": 703825 }, { "epoch": 6.92, "grad_norm": 0.6393135190010071, "learning_rate": 1.5565998391372994e-06, "loss": 0.0912, "step": 703850 }, { "epoch": 6.92, "grad_norm": 1.3660054206848145, "learning_rate": 1.556475716683051e-06, "loss": 0.0129, "step": 703875 }, { "epoch": 6.92, "grad_norm": 1.5899100303649902, "learning_rate": 1.5563515942288024e-06, "loss": 0.0819, "step": 703900 }, { "epoch": 6.92, "grad_norm": 1.5601353645324707, "learning_rate": 1.556227471774554e-06, "loss": 0.0217, "step": 703925 }, { "epoch": 6.92, "grad_norm": 3.5531063079833984, "learning_rate": 1.5561033493203057e-06, "loss": 0.067, "step": 703950 }, { "epoch": 6.92, "grad_norm": 8.956422805786133, "learning_rate": 1.555979226866057e-06, "loss": 0.0223, "step": 703975 }, { "epoch": 6.92, "grad_norm": 0.006792112253606319, "learning_rate": 1.5558551044118086e-06, "loss": 0.0711, "step": 704000 }, { "epoch": 6.92, "grad_norm": 6.023087501525879, "learning_rate": 1.5557309819575602e-06, "loss": 0.0286, "step": 704025 }, { "epoch": 6.92, "grad_norm": 4.845843315124512, "learning_rate": 1.5556068595033116e-06, "loss": 0.0646, "step": 704050 }, { "epoch": 6.92, "grad_norm": 8.89605712890625, "learning_rate": 1.5554827370490633e-06, "loss": 0.0278, "step": 704075 }, { "epoch": 6.92, "grad_norm": 4.971871852874756, "learning_rate": 1.5553586145948149e-06, "loss": 0.0729, "step": 704100 }, { "epoch": 6.92, "grad_norm": 3.072777509689331, "learning_rate": 1.5552344921405663e-06, "loss": 0.0311, "step": 704125 }, { "epoch": 6.92, "grad_norm": 1.1894049644470215, "learning_rate": 1.555110369686318e-06, "loss": 0.0651, "step": 704150 }, { "epoch": 6.92, "grad_norm": 16.919984817504883, "learning_rate": 1.5549862472320696e-06, "loss": 0.0271, "step": 704175 }, { "epoch": 6.92, "grad_norm": 0.3251308798789978, "learning_rate": 1.5548621247778208e-06, "loss": 0.0814, "step": 704200 }, { "epoch": 6.92, "grad_norm": 11.086017608642578, "learning_rate": 1.5547380023235724e-06, "loss": 0.0394, "step": 704225 }, { "epoch": 6.92, "grad_norm": 5.226520538330078, "learning_rate": 1.5546138798693238e-06, "loss": 0.0872, "step": 704250 }, { "epoch": 6.92, "grad_norm": 7.216062545776367, "learning_rate": 1.5544897574150755e-06, "loss": 0.0169, "step": 704275 }, { "epoch": 6.92, "grad_norm": 3.297846555709839, "learning_rate": 1.5543656349608271e-06, "loss": 0.0547, "step": 704300 }, { "epoch": 6.93, "grad_norm": 4.409221649169922, "learning_rate": 1.5542415125065785e-06, "loss": 0.0237, "step": 704325 }, { "epoch": 6.93, "grad_norm": 3.0265796184539795, "learning_rate": 1.5541173900523302e-06, "loss": 0.0679, "step": 704350 }, { "epoch": 6.93, "grad_norm": 8.910055160522461, "learning_rate": 1.5539932675980818e-06, "loss": 0.026, "step": 704375 }, { "epoch": 6.93, "grad_norm": 4.9798150062561035, "learning_rate": 1.5538691451438332e-06, "loss": 0.0879, "step": 704400 }, { "epoch": 6.93, "grad_norm": 9.43425464630127, "learning_rate": 1.5537450226895849e-06, "loss": 0.0233, "step": 704425 }, { "epoch": 6.93, "grad_norm": 5.275140762329102, "learning_rate": 1.5536209002353365e-06, "loss": 0.0914, "step": 704450 }, { "epoch": 6.93, "grad_norm": 8.071229934692383, "learning_rate": 1.5534967777810877e-06, "loss": 0.0383, "step": 704475 }, { "epoch": 6.93, "grad_norm": 5.853013515472412, "learning_rate": 1.5533726553268394e-06, "loss": 0.0649, "step": 704500 }, { "epoch": 6.93, "grad_norm": 2.482908010482788, "learning_rate": 1.553248532872591e-06, "loss": 0.0187, "step": 704525 }, { "epoch": 6.93, "grad_norm": 4.4106879234313965, "learning_rate": 1.5531244104183424e-06, "loss": 0.0681, "step": 704550 }, { "epoch": 6.93, "grad_norm": 2.4208528995513916, "learning_rate": 1.553000287964094e-06, "loss": 0.03, "step": 704575 }, { "epoch": 6.93, "grad_norm": 1.1701234579086304, "learning_rate": 1.5528761655098457e-06, "loss": 0.0787, "step": 704600 }, { "epoch": 6.93, "grad_norm": 0.20349720120429993, "learning_rate": 1.552752043055597e-06, "loss": 0.0213, "step": 704625 }, { "epoch": 6.93, "grad_norm": 3.527132749557495, "learning_rate": 1.5526279206013487e-06, "loss": 0.0857, "step": 704650 }, { "epoch": 6.93, "grad_norm": 8.90556812286377, "learning_rate": 1.5525037981471e-06, "loss": 0.018, "step": 704675 }, { "epoch": 6.93, "grad_norm": 0.20612333714962006, "learning_rate": 1.5523796756928516e-06, "loss": 0.1011, "step": 704700 }, { "epoch": 6.93, "grad_norm": 9.209151268005371, "learning_rate": 1.5522555532386032e-06, "loss": 0.0178, "step": 704725 }, { "epoch": 6.93, "grad_norm": 2.407737970352173, "learning_rate": 1.5521314307843546e-06, "loss": 0.0897, "step": 704750 }, { "epoch": 6.93, "grad_norm": 7.775501251220703, "learning_rate": 1.5520073083301063e-06, "loss": 0.0263, "step": 704775 }, { "epoch": 6.93, "grad_norm": 1.4260585308074951, "learning_rate": 1.551883185875858e-06, "loss": 0.0865, "step": 704800 }, { "epoch": 6.93, "grad_norm": 4.656376361846924, "learning_rate": 1.5517590634216093e-06, "loss": 0.0215, "step": 704825 }, { "epoch": 6.93, "grad_norm": 4.120024681091309, "learning_rate": 1.551634940967361e-06, "loss": 0.0724, "step": 704850 }, { "epoch": 6.93, "grad_norm": 6.769830226898193, "learning_rate": 1.5515108185131126e-06, "loss": 0.0392, "step": 704875 }, { "epoch": 6.93, "grad_norm": 0.1937711387872696, "learning_rate": 1.5513866960588638e-06, "loss": 0.0719, "step": 704900 }, { "epoch": 6.93, "grad_norm": 1.0717746019363403, "learning_rate": 1.5512625736046155e-06, "loss": 0.0348, "step": 704925 }, { "epoch": 6.93, "grad_norm": 0.97788006067276, "learning_rate": 1.551138451150367e-06, "loss": 0.1054, "step": 704950 }, { "epoch": 6.93, "grad_norm": 11.651949882507324, "learning_rate": 1.5510143286961185e-06, "loss": 0.0317, "step": 704975 }, { "epoch": 6.93, "grad_norm": 5.488043785095215, "learning_rate": 1.5508902062418701e-06, "loss": 0.0781, "step": 705000 }, { "epoch": 6.93, "grad_norm": 22.730968475341797, "learning_rate": 1.5507660837876218e-06, "loss": 0.0079, "step": 705025 }, { "epoch": 6.93, "grad_norm": 1.6961097717285156, "learning_rate": 1.5506419613333732e-06, "loss": 0.0997, "step": 705050 }, { "epoch": 6.93, "grad_norm": 6.853060245513916, "learning_rate": 1.5505178388791248e-06, "loss": 0.0184, "step": 705075 }, { "epoch": 6.93, "grad_norm": 1.1977559328079224, "learning_rate": 1.5503986813230463e-06, "loss": 0.0958, "step": 705100 }, { "epoch": 6.93, "grad_norm": 3.0502634048461914, "learning_rate": 1.550274558868798e-06, "loss": 0.019, "step": 705125 }, { "epoch": 6.93, "grad_norm": 12.824238777160645, "learning_rate": 1.5501504364145491e-06, "loss": 0.0598, "step": 705150 }, { "epoch": 6.93, "grad_norm": 7.539175987243652, "learning_rate": 1.5500263139603008e-06, "loss": 0.0317, "step": 705175 }, { "epoch": 6.93, "grad_norm": 2.069429874420166, "learning_rate": 1.5499021915060522e-06, "loss": 0.0783, "step": 705200 }, { "epoch": 6.93, "grad_norm": 1.0554276704788208, "learning_rate": 1.5497780690518038e-06, "loss": 0.0208, "step": 705225 }, { "epoch": 6.93, "grad_norm": 0.44467031955718994, "learning_rate": 1.5496539465975555e-06, "loss": 0.0935, "step": 705250 }, { "epoch": 6.93, "grad_norm": 5.9344916343688965, "learning_rate": 1.5495298241433069e-06, "loss": 0.0244, "step": 705275 }, { "epoch": 6.93, "grad_norm": 0.5406456589698792, "learning_rate": 1.5494057016890585e-06, "loss": 0.0778, "step": 705300 }, { "epoch": 6.93, "grad_norm": 1.54872727394104, "learning_rate": 1.5492815792348102e-06, "loss": 0.0127, "step": 705325 }, { "epoch": 6.94, "grad_norm": 0.2885671555995941, "learning_rate": 1.5491574567805614e-06, "loss": 0.0934, "step": 705350 }, { "epoch": 6.94, "grad_norm": 7.387409687042236, "learning_rate": 1.549033334326313e-06, "loss": 0.0217, "step": 705375 }, { "epoch": 6.94, "grad_norm": 2.8955790996551514, "learning_rate": 1.5489092118720646e-06, "loss": 0.0672, "step": 705400 }, { "epoch": 6.94, "grad_norm": 0.09849485009908676, "learning_rate": 1.548785089417816e-06, "loss": 0.0203, "step": 705425 }, { "epoch": 6.94, "grad_norm": 4.991170406341553, "learning_rate": 1.5486609669635677e-06, "loss": 0.0705, "step": 705450 }, { "epoch": 6.94, "grad_norm": 6.655136585235596, "learning_rate": 1.5485368445093193e-06, "loss": 0.0269, "step": 705475 }, { "epoch": 6.94, "grad_norm": 0.20221936702728271, "learning_rate": 1.5484127220550708e-06, "loss": 0.1039, "step": 705500 }, { "epoch": 6.94, "grad_norm": 6.3945536613464355, "learning_rate": 1.5482885996008224e-06, "loss": 0.0292, "step": 705525 }, { "epoch": 6.94, "grad_norm": 2.8940200805664062, "learning_rate": 1.548164477146574e-06, "loss": 0.0535, "step": 705550 }, { "epoch": 6.94, "grad_norm": 10.274682998657227, "learning_rate": 1.5480403546923252e-06, "loss": 0.0162, "step": 705575 }, { "epoch": 6.94, "grad_norm": 2.4801363945007324, "learning_rate": 1.5479162322380769e-06, "loss": 0.0657, "step": 705600 }, { "epoch": 6.94, "grad_norm": 5.705660343170166, "learning_rate": 1.5477921097838283e-06, "loss": 0.0268, "step": 705625 }, { "epoch": 6.94, "grad_norm": 1.801081657409668, "learning_rate": 1.54766798732958e-06, "loss": 0.0781, "step": 705650 }, { "epoch": 6.94, "grad_norm": 0.87559974193573, "learning_rate": 1.5475438648753316e-06, "loss": 0.0213, "step": 705675 }, { "epoch": 6.94, "grad_norm": 1.495924711227417, "learning_rate": 1.547419742421083e-06, "loss": 0.0653, "step": 705700 }, { "epoch": 6.94, "grad_norm": 6.606054782867432, "learning_rate": 1.5472956199668346e-06, "loss": 0.0257, "step": 705725 }, { "epoch": 6.94, "grad_norm": 1.3595645427703857, "learning_rate": 1.5471714975125863e-06, "loss": 0.0795, "step": 705750 }, { "epoch": 6.94, "grad_norm": 9.264883995056152, "learning_rate": 1.5470473750583377e-06, "loss": 0.0153, "step": 705775 }, { "epoch": 6.94, "grad_norm": 4.212717056274414, "learning_rate": 1.5469232526040893e-06, "loss": 0.0867, "step": 705800 }, { "epoch": 6.94, "grad_norm": 0.5241020321846008, "learning_rate": 1.5467991301498407e-06, "loss": 0.0387, "step": 705825 }, { "epoch": 6.94, "grad_norm": 4.174755573272705, "learning_rate": 1.5466750076955922e-06, "loss": 0.0934, "step": 705850 }, { "epoch": 6.94, "grad_norm": 8.060585975646973, "learning_rate": 1.5465508852413438e-06, "loss": 0.0197, "step": 705875 }, { "epoch": 6.94, "grad_norm": 1.2551981210708618, "learning_rate": 1.5464267627870954e-06, "loss": 0.0704, "step": 705900 }, { "epoch": 6.94, "grad_norm": 7.235058307647705, "learning_rate": 1.5463026403328469e-06, "loss": 0.0213, "step": 705925 }, { "epoch": 6.94, "grad_norm": 4.150376796722412, "learning_rate": 1.5461785178785985e-06, "loss": 0.088, "step": 705950 }, { "epoch": 6.94, "grad_norm": 4.02766227722168, "learning_rate": 1.5460543954243501e-06, "loss": 0.0229, "step": 705975 }, { "epoch": 6.94, "grad_norm": 0.8956093192100525, "learning_rate": 1.5459302729701015e-06, "loss": 0.0568, "step": 706000 }, { "epoch": 6.94, "grad_norm": 9.231318473815918, "learning_rate": 1.5458061505158532e-06, "loss": 0.028, "step": 706025 }, { "epoch": 6.94, "grad_norm": 0.6284271478652954, "learning_rate": 1.5456820280616044e-06, "loss": 0.0971, "step": 706050 }, { "epoch": 6.94, "grad_norm": 8.02401065826416, "learning_rate": 1.545557905607356e-06, "loss": 0.0295, "step": 706075 }, { "epoch": 6.94, "grad_norm": 4.534983158111572, "learning_rate": 1.5454337831531077e-06, "loss": 0.0596, "step": 706100 }, { "epoch": 6.94, "grad_norm": 10.30069351196289, "learning_rate": 1.545309660698859e-06, "loss": 0.025, "step": 706125 }, { "epoch": 6.94, "grad_norm": 3.625185966491699, "learning_rate": 1.5451855382446107e-06, "loss": 0.0933, "step": 706150 }, { "epoch": 6.94, "grad_norm": 8.60116958618164, "learning_rate": 1.5450614157903624e-06, "loss": 0.023, "step": 706175 }, { "epoch": 6.94, "grad_norm": 3.2071990966796875, "learning_rate": 1.5449372933361138e-06, "loss": 0.0669, "step": 706200 }, { "epoch": 6.94, "grad_norm": 9.26677131652832, "learning_rate": 1.5448131708818654e-06, "loss": 0.0304, "step": 706225 }, { "epoch": 6.94, "grad_norm": 5.0303120613098145, "learning_rate": 1.544689048427617e-06, "loss": 0.0714, "step": 706250 }, { "epoch": 6.94, "grad_norm": 4.4385881423950195, "learning_rate": 1.5445649259733683e-06, "loss": 0.015, "step": 706275 }, { "epoch": 6.94, "grad_norm": 3.6374526023864746, "learning_rate": 1.54444080351912e-06, "loss": 0.0917, "step": 706300 }, { "epoch": 6.94, "grad_norm": 2.8082661628723145, "learning_rate": 1.5443166810648715e-06, "loss": 0.0178, "step": 706325 }, { "epoch": 6.94, "grad_norm": 3.986898422241211, "learning_rate": 1.544192558610623e-06, "loss": 0.0896, "step": 706350 }, { "epoch": 6.95, "grad_norm": 13.972304344177246, "learning_rate": 1.5440684361563746e-06, "loss": 0.041, "step": 706375 }, { "epoch": 6.95, "grad_norm": 6.320935249328613, "learning_rate": 1.5439443137021262e-06, "loss": 0.1119, "step": 706400 }, { "epoch": 6.95, "grad_norm": 4.877223014831543, "learning_rate": 1.5438201912478776e-06, "loss": 0.0378, "step": 706425 }, { "epoch": 6.95, "grad_norm": 3.413498640060425, "learning_rate": 1.5436960687936293e-06, "loss": 0.0892, "step": 706450 }, { "epoch": 6.95, "grad_norm": 10.659574508666992, "learning_rate": 1.5435719463393805e-06, "loss": 0.0242, "step": 706475 }, { "epoch": 6.95, "grad_norm": 2.9049246311187744, "learning_rate": 1.5434478238851321e-06, "loss": 0.0876, "step": 706500 }, { "epoch": 6.95, "grad_norm": 10.948680877685547, "learning_rate": 1.5433237014308838e-06, "loss": 0.0248, "step": 706525 }, { "epoch": 6.95, "grad_norm": 0.1322675347328186, "learning_rate": 1.5431995789766352e-06, "loss": 0.0706, "step": 706550 }, { "epoch": 6.95, "grad_norm": 3.9072799682617188, "learning_rate": 1.5430754565223868e-06, "loss": 0.0301, "step": 706575 }, { "epoch": 6.95, "grad_norm": 2.711761236190796, "learning_rate": 1.5429513340681385e-06, "loss": 0.0753, "step": 706600 }, { "epoch": 6.95, "grad_norm": 9.888717651367188, "learning_rate": 1.5428272116138899e-06, "loss": 0.0276, "step": 706625 }, { "epoch": 6.95, "grad_norm": 2.167973756790161, "learning_rate": 1.5427030891596415e-06, "loss": 0.0484, "step": 706650 }, { "epoch": 6.95, "grad_norm": 8.101339340209961, "learning_rate": 1.5425789667053932e-06, "loss": 0.02, "step": 706675 }, { "epoch": 6.95, "grad_norm": 1.1002967357635498, "learning_rate": 1.5424548442511444e-06, "loss": 0.0782, "step": 706700 }, { "epoch": 6.95, "grad_norm": 0.9285833239555359, "learning_rate": 1.542330721796896e-06, "loss": 0.0197, "step": 706725 }, { "epoch": 6.95, "grad_norm": 6.867640018463135, "learning_rate": 1.5422065993426476e-06, "loss": 0.1102, "step": 706750 }, { "epoch": 6.95, "grad_norm": 8.020964622497559, "learning_rate": 1.542082476888399e-06, "loss": 0.0181, "step": 706775 }, { "epoch": 6.95, "grad_norm": 0.6432938575744629, "learning_rate": 1.5419583544341507e-06, "loss": 0.0734, "step": 706800 }, { "epoch": 6.95, "grad_norm": 8.672629356384277, "learning_rate": 1.5418342319799023e-06, "loss": 0.0149, "step": 706825 }, { "epoch": 6.95, "grad_norm": 2.1908767223358154, "learning_rate": 1.5417101095256537e-06, "loss": 0.0652, "step": 706850 }, { "epoch": 6.95, "grad_norm": 8.077232360839844, "learning_rate": 1.5415859870714054e-06, "loss": 0.0165, "step": 706875 }, { "epoch": 6.95, "grad_norm": 2.2244696617126465, "learning_rate": 1.541461864617157e-06, "loss": 0.0879, "step": 706900 }, { "epoch": 6.95, "grad_norm": 10.04689884185791, "learning_rate": 1.5413377421629082e-06, "loss": 0.0258, "step": 706925 }, { "epoch": 6.95, "grad_norm": 2.1219396591186523, "learning_rate": 1.5412136197086599e-06, "loss": 0.0586, "step": 706950 }, { "epoch": 6.95, "grad_norm": 3.120028257369995, "learning_rate": 1.5410894972544113e-06, "loss": 0.0155, "step": 706975 }, { "epoch": 6.95, "grad_norm": 4.7660908699035645, "learning_rate": 1.540965374800163e-06, "loss": 0.078, "step": 707000 }, { "epoch": 6.95, "grad_norm": 0.4081408381462097, "learning_rate": 1.5408412523459146e-06, "loss": 0.0234, "step": 707025 }, { "epoch": 6.95, "grad_norm": 8.912281036376953, "learning_rate": 1.540717129891666e-06, "loss": 0.0662, "step": 707050 }, { "epoch": 6.95, "grad_norm": 1.3317424058914185, "learning_rate": 1.5405930074374176e-06, "loss": 0.024, "step": 707075 }, { "epoch": 6.95, "grad_norm": 1.844370722770691, "learning_rate": 1.5404688849831693e-06, "loss": 0.0634, "step": 707100 }, { "epoch": 6.95, "grad_norm": 6.825242042541504, "learning_rate": 1.5403447625289205e-06, "loss": 0.0283, "step": 707125 }, { "epoch": 6.95, "grad_norm": 1.9018207788467407, "learning_rate": 1.540220640074672e-06, "loss": 0.0607, "step": 707150 }, { "epoch": 6.95, "grad_norm": 5.24638557434082, "learning_rate": 1.5400965176204237e-06, "loss": 0.019, "step": 707175 }, { "epoch": 6.95, "grad_norm": 0.2302507907152176, "learning_rate": 1.5399723951661752e-06, "loss": 0.0756, "step": 707200 }, { "epoch": 6.95, "grad_norm": 12.781388282775879, "learning_rate": 1.5398482727119268e-06, "loss": 0.0365, "step": 707225 }, { "epoch": 6.95, "grad_norm": 0.13274982571601868, "learning_rate": 1.5397241502576784e-06, "loss": 0.0733, "step": 707250 }, { "epoch": 6.95, "grad_norm": 7.6838483810424805, "learning_rate": 1.5396000278034298e-06, "loss": 0.0156, "step": 707275 }, { "epoch": 6.95, "grad_norm": 5.411367893218994, "learning_rate": 1.5394759053491815e-06, "loss": 0.0576, "step": 707300 }, { "epoch": 6.95, "grad_norm": 5.148447513580322, "learning_rate": 1.5393517828949331e-06, "loss": 0.023, "step": 707325 }, { "epoch": 6.95, "grad_norm": 0.2576192319393158, "learning_rate": 1.5392276604406845e-06, "loss": 0.057, "step": 707350 }, { "epoch": 6.96, "grad_norm": 14.688440322875977, "learning_rate": 1.5391035379864362e-06, "loss": 0.0282, "step": 707375 }, { "epoch": 6.96, "grad_norm": 2.385514736175537, "learning_rate": 1.5389794155321874e-06, "loss": 0.0871, "step": 707400 }, { "epoch": 6.96, "grad_norm": 8.796531677246094, "learning_rate": 1.538855293077939e-06, "loss": 0.0277, "step": 707425 }, { "epoch": 6.96, "grad_norm": 3.5990145206451416, "learning_rate": 1.5387311706236907e-06, "loss": 0.0868, "step": 707450 }, { "epoch": 6.96, "grad_norm": 4.480856895446777, "learning_rate": 1.538607048169442e-06, "loss": 0.0273, "step": 707475 }, { "epoch": 6.96, "grad_norm": 3.315662145614624, "learning_rate": 1.5384878906133635e-06, "loss": 0.0716, "step": 707500 }, { "epoch": 6.96, "grad_norm": 6.846597194671631, "learning_rate": 1.5383637681591152e-06, "loss": 0.0283, "step": 707525 }, { "epoch": 6.96, "grad_norm": 1.944878101348877, "learning_rate": 1.5382396457048668e-06, "loss": 0.082, "step": 707550 }, { "epoch": 6.96, "grad_norm": 2.23810076713562, "learning_rate": 1.5381155232506182e-06, "loss": 0.0239, "step": 707575 }, { "epoch": 6.96, "grad_norm": 1.871596097946167, "learning_rate": 1.5379914007963699e-06, "loss": 0.0835, "step": 707600 }, { "epoch": 6.96, "grad_norm": 8.216545104980469, "learning_rate": 1.5378672783421215e-06, "loss": 0.0262, "step": 707625 }, { "epoch": 6.96, "grad_norm": 0.9528799653053284, "learning_rate": 1.5377431558878727e-06, "loss": 0.0838, "step": 707650 }, { "epoch": 6.96, "grad_norm": 5.811770915985107, "learning_rate": 1.5376190334336243e-06, "loss": 0.012, "step": 707675 }, { "epoch": 6.96, "grad_norm": 6.30242395401001, "learning_rate": 1.537494910979376e-06, "loss": 0.0916, "step": 707700 }, { "epoch": 6.96, "grad_norm": 14.727826118469238, "learning_rate": 1.5373707885251274e-06, "loss": 0.0468, "step": 707725 }, { "epoch": 6.96, "grad_norm": 3.3075616359710693, "learning_rate": 1.537246666070879e-06, "loss": 0.0674, "step": 707750 }, { "epoch": 6.96, "grad_norm": 4.208934783935547, "learning_rate": 1.5371225436166307e-06, "loss": 0.0378, "step": 707775 }, { "epoch": 6.96, "grad_norm": 0.6136789917945862, "learning_rate": 1.536998421162382e-06, "loss": 0.0647, "step": 707800 }, { "epoch": 6.96, "grad_norm": 8.964168548583984, "learning_rate": 1.5368742987081337e-06, "loss": 0.0238, "step": 707825 }, { "epoch": 6.96, "grad_norm": 3.6032371520996094, "learning_rate": 1.5367501762538854e-06, "loss": 0.0692, "step": 707850 }, { "epoch": 6.96, "grad_norm": 8.843482971191406, "learning_rate": 1.5366260537996366e-06, "loss": 0.0143, "step": 707875 }, { "epoch": 6.96, "grad_norm": 3.6781022548675537, "learning_rate": 1.5365019313453882e-06, "loss": 0.0853, "step": 707900 }, { "epoch": 6.96, "grad_norm": 6.555485248565674, "learning_rate": 1.5363778088911396e-06, "loss": 0.0241, "step": 707925 }, { "epoch": 6.96, "grad_norm": 0.05910820886492729, "learning_rate": 1.5362536864368913e-06, "loss": 0.0716, "step": 707950 }, { "epoch": 6.96, "grad_norm": 6.203505992889404, "learning_rate": 1.536129563982643e-06, "loss": 0.0181, "step": 707975 }, { "epoch": 6.96, "grad_norm": 2.5262057781219482, "learning_rate": 1.5360054415283943e-06, "loss": 0.0772, "step": 708000 }, { "epoch": 6.96, "grad_norm": 9.658415794372559, "learning_rate": 1.535881319074146e-06, "loss": 0.017, "step": 708025 }, { "epoch": 6.96, "grad_norm": 0.11688560992479324, "learning_rate": 1.5357571966198976e-06, "loss": 0.0758, "step": 708050 }, { "epoch": 6.96, "grad_norm": 5.315996170043945, "learning_rate": 1.5356330741656488e-06, "loss": 0.0264, "step": 708075 }, { "epoch": 6.96, "grad_norm": 1.4607620239257812, "learning_rate": 1.5355089517114004e-06, "loss": 0.0746, "step": 708100 }, { "epoch": 6.96, "grad_norm": 10.83080768585205, "learning_rate": 1.535384829257152e-06, "loss": 0.0217, "step": 708125 }, { "epoch": 6.96, "grad_norm": 7.408085823059082, "learning_rate": 1.5352607068029035e-06, "loss": 0.0868, "step": 708150 }, { "epoch": 6.96, "grad_norm": 5.828485488891602, "learning_rate": 1.5351365843486551e-06, "loss": 0.0195, "step": 708175 }, { "epoch": 6.96, "grad_norm": 3.979570150375366, "learning_rate": 1.5350124618944068e-06, "loss": 0.064, "step": 708200 }, { "epoch": 6.96, "grad_norm": 5.350914001464844, "learning_rate": 1.5348883394401582e-06, "loss": 0.0273, "step": 708225 }, { "epoch": 6.96, "grad_norm": 10.965085983276367, "learning_rate": 1.5347642169859098e-06, "loss": 0.0769, "step": 708250 }, { "epoch": 6.96, "grad_norm": 0.619044840335846, "learning_rate": 1.5346400945316615e-06, "loss": 0.0341, "step": 708275 }, { "epoch": 6.96, "grad_norm": 4.835254192352295, "learning_rate": 1.5345159720774127e-06, "loss": 0.0793, "step": 708300 }, { "epoch": 6.96, "grad_norm": 5.378879070281982, "learning_rate": 1.5343918496231643e-06, "loss": 0.0316, "step": 708325 }, { "epoch": 6.96, "grad_norm": 1.6790591478347778, "learning_rate": 1.5342677271689157e-06, "loss": 0.0663, "step": 708350 }, { "epoch": 6.96, "grad_norm": 1.447651743888855, "learning_rate": 1.5341436047146674e-06, "loss": 0.0218, "step": 708375 }, { "epoch": 6.97, "grad_norm": 2.2753396034240723, "learning_rate": 1.534019482260419e-06, "loss": 0.0719, "step": 708400 }, { "epoch": 6.97, "grad_norm": 3.7744877338409424, "learning_rate": 1.5338953598061704e-06, "loss": 0.0209, "step": 708425 }, { "epoch": 6.97, "grad_norm": 5.774767875671387, "learning_rate": 1.533771237351922e-06, "loss": 0.071, "step": 708450 }, { "epoch": 6.97, "grad_norm": 5.73568058013916, "learning_rate": 1.5336471148976737e-06, "loss": 0.0158, "step": 708475 }, { "epoch": 6.97, "grad_norm": 1.3400294780731201, "learning_rate": 1.533522992443425e-06, "loss": 0.0872, "step": 708500 }, { "epoch": 6.97, "grad_norm": 1.4781286716461182, "learning_rate": 1.5333988699891765e-06, "loss": 0.0233, "step": 708525 }, { "epoch": 6.97, "grad_norm": 3.5719335079193115, "learning_rate": 1.5332747475349282e-06, "loss": 0.0628, "step": 708550 }, { "epoch": 6.97, "grad_norm": 10.830164909362793, "learning_rate": 1.5331506250806796e-06, "loss": 0.0257, "step": 708575 }, { "epoch": 6.97, "grad_norm": 6.295597076416016, "learning_rate": 1.5330265026264312e-06, "loss": 0.0793, "step": 708600 }, { "epoch": 6.97, "grad_norm": 8.369959831237793, "learning_rate": 1.5329023801721829e-06, "loss": 0.0323, "step": 708625 }, { "epoch": 6.97, "grad_norm": 0.5962972640991211, "learning_rate": 1.5327782577179343e-06, "loss": 0.0735, "step": 708650 }, { "epoch": 6.97, "grad_norm": 6.260143280029297, "learning_rate": 1.532654135263686e-06, "loss": 0.0291, "step": 708675 }, { "epoch": 6.97, "grad_norm": 12.999613761901855, "learning_rate": 1.5325300128094376e-06, "loss": 0.0878, "step": 708700 }, { "epoch": 6.97, "grad_norm": 8.666141510009766, "learning_rate": 1.532405890355189e-06, "loss": 0.0213, "step": 708725 }, { "epoch": 6.97, "grad_norm": 1.0578103065490723, "learning_rate": 1.5322817679009406e-06, "loss": 0.0573, "step": 708750 }, { "epoch": 6.97, "grad_norm": 8.73642349243164, "learning_rate": 1.5321576454466918e-06, "loss": 0.0137, "step": 708775 }, { "epoch": 6.97, "grad_norm": 3.0400147438049316, "learning_rate": 1.5320335229924435e-06, "loss": 0.11, "step": 708800 }, { "epoch": 6.97, "grad_norm": 4.206503391265869, "learning_rate": 1.531909400538195e-06, "loss": 0.0262, "step": 708825 }, { "epoch": 6.97, "grad_norm": 0.9360343813896179, "learning_rate": 1.5317852780839465e-06, "loss": 0.0594, "step": 708850 }, { "epoch": 6.97, "grad_norm": 7.504525184631348, "learning_rate": 1.5316611556296982e-06, "loss": 0.0226, "step": 708875 }, { "epoch": 6.97, "grad_norm": 1.1002693176269531, "learning_rate": 1.5315370331754498e-06, "loss": 0.0675, "step": 708900 }, { "epoch": 6.97, "grad_norm": 0.5279040336608887, "learning_rate": 1.5314129107212012e-06, "loss": 0.0226, "step": 708925 }, { "epoch": 6.97, "grad_norm": 5.368360996246338, "learning_rate": 1.5312887882669529e-06, "loss": 0.0777, "step": 708950 }, { "epoch": 6.97, "grad_norm": 4.897741794586182, "learning_rate": 1.5311646658127045e-06, "loss": 0.0387, "step": 708975 }, { "epoch": 6.97, "grad_norm": 0.01942363940179348, "learning_rate": 1.5310405433584557e-06, "loss": 0.0973, "step": 709000 }, { "epoch": 6.97, "grad_norm": 8.849030494689941, "learning_rate": 1.5309164209042073e-06, "loss": 0.0266, "step": 709025 }, { "epoch": 6.97, "grad_norm": 0.09539536386728287, "learning_rate": 1.530792298449959e-06, "loss": 0.0806, "step": 709050 }, { "epoch": 6.97, "grad_norm": 5.177926063537598, "learning_rate": 1.5306681759957104e-06, "loss": 0.0314, "step": 709075 }, { "epoch": 6.97, "grad_norm": 1.0158252716064453, "learning_rate": 1.530544053541462e-06, "loss": 0.0816, "step": 709100 }, { "epoch": 6.97, "grad_norm": 23.93450927734375, "learning_rate": 1.5304199310872137e-06, "loss": 0.0178, "step": 709125 }, { "epoch": 6.97, "grad_norm": 1.2910246849060059, "learning_rate": 1.530295808632965e-06, "loss": 0.0846, "step": 709150 }, { "epoch": 6.97, "grad_norm": 9.228464126586914, "learning_rate": 1.5301716861787167e-06, "loss": 0.0235, "step": 709175 }, { "epoch": 6.97, "grad_norm": 2.0495457649230957, "learning_rate": 1.530047563724468e-06, "loss": 0.0789, "step": 709200 }, { "epoch": 6.97, "grad_norm": 0.6185619831085205, "learning_rate": 1.5299234412702196e-06, "loss": 0.0146, "step": 709225 }, { "epoch": 6.97, "grad_norm": 1.3260008096694946, "learning_rate": 1.5297993188159712e-06, "loss": 0.0516, "step": 709250 }, { "epoch": 6.97, "grad_norm": 7.632351398468018, "learning_rate": 1.5296751963617226e-06, "loss": 0.02, "step": 709275 }, { "epoch": 6.97, "grad_norm": 0.4389992356300354, "learning_rate": 1.5295510739074743e-06, "loss": 0.0796, "step": 709300 }, { "epoch": 6.97, "grad_norm": 8.165889739990234, "learning_rate": 1.529426951453226e-06, "loss": 0.0363, "step": 709325 }, { "epoch": 6.97, "grad_norm": 0.6254473328590393, "learning_rate": 1.5293028289989773e-06, "loss": 0.0986, "step": 709350 }, { "epoch": 6.97, "grad_norm": 5.494479179382324, "learning_rate": 1.529178706544729e-06, "loss": 0.0211, "step": 709375 }, { "epoch": 6.97, "grad_norm": 0.5733268857002258, "learning_rate": 1.5290545840904806e-06, "loss": 0.0769, "step": 709400 }, { "epoch": 6.98, "grad_norm": 12.20936393737793, "learning_rate": 1.5289304616362318e-06, "loss": 0.0127, "step": 709425 }, { "epoch": 6.98, "grad_norm": 0.22214987874031067, "learning_rate": 1.5288063391819834e-06, "loss": 0.0609, "step": 709450 }, { "epoch": 6.98, "grad_norm": 2.1756088733673096, "learning_rate": 1.528682216727735e-06, "loss": 0.0264, "step": 709475 }, { "epoch": 6.98, "grad_norm": 10.121329307556152, "learning_rate": 1.5285580942734865e-06, "loss": 0.0721, "step": 709500 }, { "epoch": 6.98, "grad_norm": 8.352518081665039, "learning_rate": 1.5284339718192381e-06, "loss": 0.0323, "step": 709525 }, { "epoch": 6.98, "grad_norm": 0.38867902755737305, "learning_rate": 1.5283098493649898e-06, "loss": 0.0759, "step": 709550 }, { "epoch": 6.98, "grad_norm": 1.0982029438018799, "learning_rate": 1.5281857269107412e-06, "loss": 0.0215, "step": 709575 }, { "epoch": 6.98, "grad_norm": 3.942270517349243, "learning_rate": 1.5280616044564928e-06, "loss": 0.0708, "step": 709600 }, { "epoch": 6.98, "grad_norm": 3.813535213470459, "learning_rate": 1.527937482002244e-06, "loss": 0.0204, "step": 709625 }, { "epoch": 6.98, "grad_norm": 6.295980930328369, "learning_rate": 1.5278133595479957e-06, "loss": 0.0712, "step": 709650 }, { "epoch": 6.98, "grad_norm": 17.228302001953125, "learning_rate": 1.5276892370937473e-06, "loss": 0.0308, "step": 709675 }, { "epoch": 6.98, "grad_norm": 4.401004791259766, "learning_rate": 1.5275700795376688e-06, "loss": 0.0732, "step": 709700 }, { "epoch": 6.98, "grad_norm": 3.6829092502593994, "learning_rate": 1.5274459570834204e-06, "loss": 0.0192, "step": 709725 }, { "epoch": 6.98, "grad_norm": 0.6345998644828796, "learning_rate": 1.5273218346291718e-06, "loss": 0.0739, "step": 709750 }, { "epoch": 6.98, "grad_norm": 7.139726161956787, "learning_rate": 1.5271977121749234e-06, "loss": 0.0257, "step": 709775 }, { "epoch": 6.98, "grad_norm": 1.373911738395691, "learning_rate": 1.5270735897206749e-06, "loss": 0.0931, "step": 709800 }, { "epoch": 6.98, "grad_norm": 9.056135177612305, "learning_rate": 1.5269494672664265e-06, "loss": 0.0216, "step": 709825 }, { "epoch": 6.98, "grad_norm": 0.8631097078323364, "learning_rate": 1.5268253448121781e-06, "loss": 0.0671, "step": 709850 }, { "epoch": 6.98, "grad_norm": 9.639927864074707, "learning_rate": 1.5267012223579294e-06, "loss": 0.0204, "step": 709875 }, { "epoch": 6.98, "grad_norm": 3.8876545429229736, "learning_rate": 1.526577099903681e-06, "loss": 0.0599, "step": 709900 }, { "epoch": 6.98, "grad_norm": 2.179591417312622, "learning_rate": 1.5264529774494326e-06, "loss": 0.0188, "step": 709925 }, { "epoch": 6.98, "grad_norm": 0.6470537185668945, "learning_rate": 1.526328854995184e-06, "loss": 0.0766, "step": 709950 }, { "epoch": 6.98, "grad_norm": 1.9221527576446533, "learning_rate": 1.5262047325409357e-06, "loss": 0.0193, "step": 709975 }, { "epoch": 6.98, "grad_norm": 6.032917022705078, "learning_rate": 1.5260806100866873e-06, "loss": 0.0766, "step": 710000 }, { "epoch": 6.98, "grad_norm": 11.599783897399902, "learning_rate": 1.5259564876324387e-06, "loss": 0.0436, "step": 710025 }, { "epoch": 6.98, "grad_norm": 4.26804780960083, "learning_rate": 1.5258323651781904e-06, "loss": 0.0798, "step": 710050 }, { "epoch": 6.98, "grad_norm": 7.141132354736328, "learning_rate": 1.525708242723942e-06, "loss": 0.0128, "step": 710075 }, { "epoch": 6.98, "grad_norm": 0.5749006867408752, "learning_rate": 1.5255841202696932e-06, "loss": 0.089, "step": 710100 }, { "epoch": 6.98, "grad_norm": 8.03482437133789, "learning_rate": 1.5254599978154449e-06, "loss": 0.0287, "step": 710125 }, { "epoch": 6.98, "grad_norm": 4.7848100662231445, "learning_rate": 1.5253358753611965e-06, "loss": 0.0669, "step": 710150 }, { "epoch": 6.98, "grad_norm": 14.640022277832031, "learning_rate": 1.525211752906948e-06, "loss": 0.0189, "step": 710175 }, { "epoch": 6.98, "grad_norm": 3.351454019546509, "learning_rate": 1.5250876304526996e-06, "loss": 0.0611, "step": 710200 }, { "epoch": 6.98, "grad_norm": 9.964302062988281, "learning_rate": 1.524963507998451e-06, "loss": 0.0348, "step": 710225 }, { "epoch": 6.98, "grad_norm": 3.9656331539154053, "learning_rate": 1.5248393855442026e-06, "loss": 0.0661, "step": 710250 }, { "epoch": 6.98, "grad_norm": 12.339731216430664, "learning_rate": 1.5247152630899542e-06, "loss": 0.0189, "step": 710275 }, { "epoch": 6.98, "grad_norm": 5.201011657714844, "learning_rate": 1.5245911406357057e-06, "loss": 0.0844, "step": 710300 }, { "epoch": 6.98, "grad_norm": 11.48963737487793, "learning_rate": 1.5244670181814573e-06, "loss": 0.0239, "step": 710325 }, { "epoch": 6.98, "grad_norm": 2.2357125282287598, "learning_rate": 1.524342895727209e-06, "loss": 0.0685, "step": 710350 }, { "epoch": 6.98, "grad_norm": 11.184016227722168, "learning_rate": 1.5242187732729601e-06, "loss": 0.0319, "step": 710375 }, { "epoch": 6.98, "grad_norm": 2.9535739421844482, "learning_rate": 1.5240946508187118e-06, "loss": 0.0667, "step": 710400 }, { "epoch": 6.99, "grad_norm": 3.3388495445251465, "learning_rate": 1.5239705283644634e-06, "loss": 0.0261, "step": 710425 }, { "epoch": 6.99, "grad_norm": 7.557064056396484, "learning_rate": 1.5238464059102148e-06, "loss": 0.1023, "step": 710450 }, { "epoch": 6.99, "grad_norm": 10.011123657226562, "learning_rate": 1.5237222834559665e-06, "loss": 0.0142, "step": 710475 }, { "epoch": 6.99, "grad_norm": 3.3574256896972656, "learning_rate": 1.5235981610017181e-06, "loss": 0.0539, "step": 710500 }, { "epoch": 6.99, "grad_norm": 1.6996389627456665, "learning_rate": 1.5234740385474695e-06, "loss": 0.0095, "step": 710525 }, { "epoch": 6.99, "grad_norm": 5.85147762298584, "learning_rate": 1.5233499160932212e-06, "loss": 0.0804, "step": 710550 }, { "epoch": 6.99, "grad_norm": 10.995570182800293, "learning_rate": 1.5232257936389728e-06, "loss": 0.0327, "step": 710575 }, { "epoch": 6.99, "grad_norm": 1.1692308187484741, "learning_rate": 1.523101671184724e-06, "loss": 0.058, "step": 710600 }, { "epoch": 6.99, "grad_norm": 14.530533790588379, "learning_rate": 1.5229775487304757e-06, "loss": 0.0288, "step": 710625 }, { "epoch": 6.99, "grad_norm": 4.6016459465026855, "learning_rate": 1.522853426276227e-06, "loss": 0.0667, "step": 710650 }, { "epoch": 6.99, "grad_norm": 3.8703465461730957, "learning_rate": 1.5227293038219787e-06, "loss": 0.0223, "step": 710675 }, { "epoch": 6.99, "grad_norm": 1.3623796701431274, "learning_rate": 1.5226051813677303e-06, "loss": 0.0823, "step": 710700 }, { "epoch": 6.99, "grad_norm": 7.071779727935791, "learning_rate": 1.5224810589134818e-06, "loss": 0.0272, "step": 710725 }, { "epoch": 6.99, "grad_norm": 1.0480163097381592, "learning_rate": 1.5223569364592334e-06, "loss": 0.0611, "step": 710750 }, { "epoch": 6.99, "grad_norm": 8.252824783325195, "learning_rate": 1.522232814004985e-06, "loss": 0.0131, "step": 710775 }, { "epoch": 6.99, "grad_norm": 2.013035297393799, "learning_rate": 1.5221086915507362e-06, "loss": 0.0752, "step": 710800 }, { "epoch": 6.99, "grad_norm": 10.466934204101562, "learning_rate": 1.5219845690964879e-06, "loss": 0.0293, "step": 710825 }, { "epoch": 6.99, "grad_norm": 3.0693447589874268, "learning_rate": 1.5218604466422395e-06, "loss": 0.0708, "step": 710850 }, { "epoch": 6.99, "grad_norm": 2.640058755874634, "learning_rate": 1.521736324187991e-06, "loss": 0.0221, "step": 710875 }, { "epoch": 6.99, "grad_norm": 2.853945732116699, "learning_rate": 1.5216122017337426e-06, "loss": 0.0878, "step": 710900 }, { "epoch": 6.99, "grad_norm": 6.567704677581787, "learning_rate": 1.5214880792794942e-06, "loss": 0.0319, "step": 710925 }, { "epoch": 6.99, "grad_norm": 1.36579430103302, "learning_rate": 1.5213639568252456e-06, "loss": 0.0833, "step": 710950 }, { "epoch": 6.99, "grad_norm": 3.3485305309295654, "learning_rate": 1.5212398343709973e-06, "loss": 0.0178, "step": 710975 }, { "epoch": 6.99, "grad_norm": 5.039663314819336, "learning_rate": 1.521115711916749e-06, "loss": 0.0842, "step": 711000 }, { "epoch": 6.99, "grad_norm": 6.122385501861572, "learning_rate": 1.5209915894625001e-06, "loss": 0.0201, "step": 711025 }, { "epoch": 6.99, "grad_norm": 0.20399673283100128, "learning_rate": 1.5208674670082518e-06, "loss": 0.0733, "step": 711050 }, { "epoch": 6.99, "grad_norm": 5.0317206382751465, "learning_rate": 1.5207433445540032e-06, "loss": 0.0191, "step": 711075 }, { "epoch": 6.99, "grad_norm": 2.7547388076782227, "learning_rate": 1.5206192220997548e-06, "loss": 0.0821, "step": 711100 }, { "epoch": 6.99, "grad_norm": 4.54032039642334, "learning_rate": 1.5204950996455064e-06, "loss": 0.0128, "step": 711125 }, { "epoch": 6.99, "grad_norm": 3.8137428760528564, "learning_rate": 1.5203709771912579e-06, "loss": 0.0879, "step": 711150 }, { "epoch": 6.99, "grad_norm": 14.586237907409668, "learning_rate": 1.5202468547370095e-06, "loss": 0.0354, "step": 711175 }, { "epoch": 6.99, "grad_norm": 3.3831894397735596, "learning_rate": 1.5201227322827611e-06, "loss": 0.0963, "step": 711200 }, { "epoch": 6.99, "grad_norm": 11.523430824279785, "learning_rate": 1.5199986098285123e-06, "loss": 0.0344, "step": 711225 }, { "epoch": 6.99, "grad_norm": 1.0243018865585327, "learning_rate": 1.519874487374264e-06, "loss": 0.0806, "step": 711250 }, { "epoch": 6.99, "grad_norm": 0.2759586572647095, "learning_rate": 1.5197503649200156e-06, "loss": 0.0179, "step": 711275 }, { "epoch": 6.99, "grad_norm": 1.9314937591552734, "learning_rate": 1.519626242465767e-06, "loss": 0.0839, "step": 711300 }, { "epoch": 6.99, "grad_norm": 0.7159212231636047, "learning_rate": 1.5195021200115187e-06, "loss": 0.0226, "step": 711325 }, { "epoch": 6.99, "grad_norm": 1.449992299079895, "learning_rate": 1.5193779975572703e-06, "loss": 0.0721, "step": 711350 }, { "epoch": 6.99, "grad_norm": 6.900263786315918, "learning_rate": 1.5192538751030217e-06, "loss": 0.0221, "step": 711375 }, { "epoch": 6.99, "grad_norm": 6.015893459320068, "learning_rate": 1.5191297526487734e-06, "loss": 0.0878, "step": 711400 }, { "epoch": 6.99, "grad_norm": 1.8966823816299438, "learning_rate": 1.519005630194525e-06, "loss": 0.0301, "step": 711425 }, { "epoch": 7.0, "grad_norm": 4.281893730163574, "learning_rate": 1.5188815077402762e-06, "loss": 0.0766, "step": 711450 }, { "epoch": 7.0, "grad_norm": 9.516505241394043, "learning_rate": 1.5187573852860279e-06, "loss": 0.019, "step": 711475 }, { "epoch": 7.0, "grad_norm": 1.4264024496078491, "learning_rate": 1.5186332628317793e-06, "loss": 0.074, "step": 711500 }, { "epoch": 7.0, "grad_norm": 9.61434268951416, "learning_rate": 1.518509140377531e-06, "loss": 0.0256, "step": 711525 }, { "epoch": 7.0, "grad_norm": 0.511604905128479, "learning_rate": 1.5183850179232825e-06, "loss": 0.0982, "step": 711550 }, { "epoch": 7.0, "grad_norm": 4.940730571746826, "learning_rate": 1.518260895469034e-06, "loss": 0.0299, "step": 711575 }, { "epoch": 7.0, "grad_norm": 3.7964015007019043, "learning_rate": 1.5181367730147856e-06, "loss": 0.0811, "step": 711600 }, { "epoch": 7.0, "grad_norm": 21.235689163208008, "learning_rate": 1.5180126505605372e-06, "loss": 0.0359, "step": 711625 }, { "epoch": 7.0, "grad_norm": 1.1917779445648193, "learning_rate": 1.5178885281062887e-06, "loss": 0.0989, "step": 711650 }, { "epoch": 7.0, "grad_norm": 3.35186505317688, "learning_rate": 1.5177644056520403e-06, "loss": 0.0255, "step": 711675 }, { "epoch": 7.0, "grad_norm": 0.9323028326034546, "learning_rate": 1.517640283197792e-06, "loss": 0.0677, "step": 711700 }, { "epoch": 7.0, "grad_norm": 2.957573652267456, "learning_rate": 1.5175161607435431e-06, "loss": 0.0275, "step": 711725 }, { "epoch": 7.0, "grad_norm": 11.707060813903809, "learning_rate": 1.5173920382892948e-06, "loss": 0.0732, "step": 711750 }, { "epoch": 7.0, "grad_norm": 2.8656158447265625, "learning_rate": 1.5172679158350464e-06, "loss": 0.0371, "step": 711775 }, { "epoch": 7.0, "grad_norm": 7.323099613189697, "learning_rate": 1.5171437933807978e-06, "loss": 0.0767, "step": 711800 }, { "epoch": 7.0, "grad_norm": 12.235686302185059, "learning_rate": 1.5170196709265495e-06, "loss": 0.0323, "step": 711825 }, { "epoch": 7.0, "grad_norm": 6.006028175354004, "learning_rate": 1.516895548472301e-06, "loss": 0.0745, "step": 711850 }, { "epoch": 7.0, "grad_norm": 4.36646842956543, "learning_rate": 1.5167714260180525e-06, "loss": 0.0243, "step": 711875 }, { "epoch": 7.0, "grad_norm": 0.0017272623954340816, "learning_rate": 1.5166473035638042e-06, "loss": 0.0972, "step": 711900 }, { "epoch": 7.0, "grad_norm": 9.048266410827637, "learning_rate": 1.5165231811095554e-06, "loss": 0.0222, "step": 711925 }, { "epoch": 7.0, "grad_norm": 1.8233232498168945, "learning_rate": 1.5164040235534772e-06, "loss": 0.0779, "step": 711950 }, { "epoch": 7.0, "grad_norm": 0.1606200635433197, "learning_rate": 1.5162799010992285e-06, "loss": 0.0231, "step": 711975 }, { "epoch": 7.0, "grad_norm": 1.6439735889434814, "learning_rate": 1.51615577864498e-06, "loss": 0.0398, "step": 712000 }, { "epoch": 7.0, "grad_norm": 8.382730484008789, "learning_rate": 1.5160316561907317e-06, "loss": 0.0259, "step": 712025 }, { "epoch": 7.0, "grad_norm": 4.112561225891113, "learning_rate": 1.5159075337364832e-06, "loss": 0.0469, "step": 712050 }, { "epoch": 7.0, "grad_norm": 2.0646018981933594, "learning_rate": 1.5157834112822348e-06, "loss": 0.0227, "step": 712075 }, { "epoch": 7.0, "grad_norm": 3.436486005783081, "learning_rate": 1.5156592888279862e-06, "loss": 0.0797, "step": 712100 }, { "epoch": 7.0, "grad_norm": 3.1856689453125, "learning_rate": 1.5155351663737378e-06, "loss": 0.0203, "step": 712125 }, { "epoch": 7.0, "grad_norm": 2.6966962814331055, "learning_rate": 1.5154110439194895e-06, "loss": 0.0543, "step": 712150 }, { "epoch": 7.0, "grad_norm": 3.930514335632324, "learning_rate": 1.5152869214652407e-06, "loss": 0.014, "step": 712175 }, { "epoch": 7.0, "grad_norm": 4.107699394226074, "learning_rate": 1.5151627990109923e-06, "loss": 0.062, "step": 712200 }, { "epoch": 7.0, "grad_norm": 0.2905219495296478, "learning_rate": 1.515038676556744e-06, "loss": 0.0169, "step": 712225 }, { "epoch": 7.0, "grad_norm": 5.203507900238037, "learning_rate": 1.5149145541024954e-06, "loss": 0.0572, "step": 712250 }, { "epoch": 7.0, "grad_norm": 1.0654603242874146, "learning_rate": 1.514790431648247e-06, "loss": 0.0141, "step": 712275 }, { "epoch": 7.0, "grad_norm": 2.133563756942749, "learning_rate": 1.5146663091939987e-06, "loss": 0.0557, "step": 712300 }, { "epoch": 7.0, "grad_norm": 8.247903823852539, "learning_rate": 1.51454218673975e-06, "loss": 0.0261, "step": 712325 }, { "epoch": 7.0, "grad_norm": 9.309101104736328, "learning_rate": 1.5144180642855017e-06, "loss": 0.0417, "step": 712350 }, { "epoch": 7.0, "grad_norm": 6.5031514167785645, "learning_rate": 1.5142939418312534e-06, "loss": 0.0098, "step": 712375 }, { "epoch": 7.0, "grad_norm": 4.4371337890625, "learning_rate": 1.5141698193770046e-06, "loss": 0.0397, "step": 712400 }, { "epoch": 7.0, "grad_norm": 0.7366735935211182, "learning_rate": 1.5140456969227562e-06, "loss": 0.0194, "step": 712425 }, { "epoch": 7.0, "grad_norm": 3.670401096343994, "learning_rate": 1.5139215744685078e-06, "loss": 0.0671, "step": 712450 }, { "epoch": 7.01, "grad_norm": 0.01946152001619339, "learning_rate": 1.5137974520142593e-06, "loss": 0.0206, "step": 712475 }, { "epoch": 7.01, "grad_norm": 2.98008131980896, "learning_rate": 1.5136733295600109e-06, "loss": 0.0624, "step": 712500 }, { "epoch": 7.01, "grad_norm": 3.175826072692871, "learning_rate": 1.5135492071057623e-06, "loss": 0.0105, "step": 712525 }, { "epoch": 7.01, "grad_norm": 4.1002373695373535, "learning_rate": 1.513425084651514e-06, "loss": 0.0573, "step": 712550 }, { "epoch": 7.01, "grad_norm": 1.8272572755813599, "learning_rate": 1.5133009621972656e-06, "loss": 0.0183, "step": 712575 }, { "epoch": 7.01, "grad_norm": 2.7212820053100586, "learning_rate": 1.5131768397430168e-06, "loss": 0.0436, "step": 712600 }, { "epoch": 7.01, "grad_norm": 1.1651909351348877, "learning_rate": 1.5130527172887684e-06, "loss": 0.0128, "step": 712625 }, { "epoch": 7.01, "grad_norm": 4.973837375640869, "learning_rate": 1.51292859483452e-06, "loss": 0.057, "step": 712650 }, { "epoch": 7.01, "grad_norm": 1.759178876876831, "learning_rate": 1.5128044723802715e-06, "loss": 0.0096, "step": 712675 }, { "epoch": 7.01, "grad_norm": 4.86405086517334, "learning_rate": 1.5126803499260231e-06, "loss": 0.0546, "step": 712700 }, { "epoch": 7.01, "grad_norm": 0.8851847052574158, "learning_rate": 1.5125562274717748e-06, "loss": 0.0174, "step": 712725 }, { "epoch": 7.01, "grad_norm": 2.3955655097961426, "learning_rate": 1.5124321050175262e-06, "loss": 0.0516, "step": 712750 }, { "epoch": 7.01, "grad_norm": 7.471696853637695, "learning_rate": 1.5123079825632778e-06, "loss": 0.0261, "step": 712775 }, { "epoch": 7.01, "grad_norm": 3.578237771987915, "learning_rate": 1.5121838601090295e-06, "loss": 0.0627, "step": 712800 }, { "epoch": 7.01, "grad_norm": 4.311834335327148, "learning_rate": 1.5120597376547807e-06, "loss": 0.0149, "step": 712825 }, { "epoch": 7.01, "grad_norm": 3.9312500953674316, "learning_rate": 1.5119356152005323e-06, "loss": 0.0535, "step": 712850 }, { "epoch": 7.01, "grad_norm": 3.414271831512451, "learning_rate": 1.511811492746284e-06, "loss": 0.0172, "step": 712875 }, { "epoch": 7.01, "grad_norm": 2.7328319549560547, "learning_rate": 1.5116873702920354e-06, "loss": 0.048, "step": 712900 }, { "epoch": 7.01, "grad_norm": 10.943138122558594, "learning_rate": 1.511563247837787e-06, "loss": 0.023, "step": 712925 }, { "epoch": 7.01, "grad_norm": 3.8436429500579834, "learning_rate": 1.5114391253835384e-06, "loss": 0.0462, "step": 712950 }, { "epoch": 7.01, "grad_norm": 3.0382916927337646, "learning_rate": 1.51131500292929e-06, "loss": 0.0181, "step": 712975 }, { "epoch": 7.01, "grad_norm": 2.0403008460998535, "learning_rate": 1.5111908804750417e-06, "loss": 0.0459, "step": 713000 }, { "epoch": 7.01, "grad_norm": 4.478000164031982, "learning_rate": 1.5110667580207931e-06, "loss": 0.0195, "step": 713025 }, { "epoch": 7.01, "grad_norm": 1.9291926622390747, "learning_rate": 1.5109426355665445e-06, "loss": 0.0665, "step": 713050 }, { "epoch": 7.01, "grad_norm": 5.153026103973389, "learning_rate": 1.5108185131122962e-06, "loss": 0.016, "step": 713075 }, { "epoch": 7.01, "grad_norm": 3.465017795562744, "learning_rate": 1.5106943906580476e-06, "loss": 0.078, "step": 713100 }, { "epoch": 7.01, "grad_norm": 0.21752502024173737, "learning_rate": 1.5105702682037992e-06, "loss": 0.0205, "step": 713125 }, { "epoch": 7.01, "grad_norm": 3.075190305709839, "learning_rate": 1.5104461457495509e-06, "loss": 0.0445, "step": 713150 }, { "epoch": 7.01, "grad_norm": 6.5100507736206055, "learning_rate": 1.5103220232953023e-06, "loss": 0.0228, "step": 713175 }, { "epoch": 7.01, "grad_norm": 3.8608696460723877, "learning_rate": 1.510197900841054e-06, "loss": 0.0538, "step": 713200 }, { "epoch": 7.01, "grad_norm": 0.166034534573555, "learning_rate": 1.5100737783868056e-06, "loss": 0.0148, "step": 713225 }, { "epoch": 7.01, "grad_norm": 3.1354827880859375, "learning_rate": 1.509949655932557e-06, "loss": 0.0761, "step": 713250 }, { "epoch": 7.01, "grad_norm": 16.17881202697754, "learning_rate": 1.5098255334783086e-06, "loss": 0.0161, "step": 713275 }, { "epoch": 7.01, "grad_norm": 4.059875011444092, "learning_rate": 1.5097014110240602e-06, "loss": 0.0763, "step": 713300 }, { "epoch": 7.01, "grad_norm": 0.09631328284740448, "learning_rate": 1.5095772885698115e-06, "loss": 0.0196, "step": 713325 }, { "epoch": 7.01, "grad_norm": 2.1832404136657715, "learning_rate": 1.509453166115563e-06, "loss": 0.0653, "step": 713350 }, { "epoch": 7.01, "grad_norm": 0.3637285828590393, "learning_rate": 1.5093290436613145e-06, "loss": 0.0224, "step": 713375 }, { "epoch": 7.01, "grad_norm": 2.0811944007873535, "learning_rate": 1.5092049212070661e-06, "loss": 0.0571, "step": 713400 }, { "epoch": 7.01, "grad_norm": 0.4275142550468445, "learning_rate": 1.5090807987528178e-06, "loss": 0.011, "step": 713425 }, { "epoch": 7.01, "grad_norm": 3.317430019378662, "learning_rate": 1.5089566762985692e-06, "loss": 0.0641, "step": 713450 }, { "epoch": 7.02, "grad_norm": 8.602635383605957, "learning_rate": 1.5088325538443208e-06, "loss": 0.0126, "step": 713475 }, { "epoch": 7.02, "grad_norm": 3.064203977584839, "learning_rate": 1.5087084313900725e-06, "loss": 0.0648, "step": 713500 }, { "epoch": 7.02, "grad_norm": 0.18223267793655396, "learning_rate": 1.5085843089358237e-06, "loss": 0.0258, "step": 713525 }, { "epoch": 7.02, "grad_norm": 2.5590412616729736, "learning_rate": 1.5084601864815753e-06, "loss": 0.0439, "step": 713550 }, { "epoch": 7.02, "grad_norm": 4.428720951080322, "learning_rate": 1.508336064027327e-06, "loss": 0.0207, "step": 713575 }, { "epoch": 7.02, "grad_norm": 3.1116466522216797, "learning_rate": 1.5082119415730784e-06, "loss": 0.0696, "step": 713600 }, { "epoch": 7.02, "grad_norm": 0.3467937707901001, "learning_rate": 1.50808781911883e-06, "loss": 0.011, "step": 713625 }, { "epoch": 7.02, "grad_norm": 2.1969709396362305, "learning_rate": 1.5079636966645817e-06, "loss": 0.0544, "step": 713650 }, { "epoch": 7.02, "grad_norm": 3.080925226211548, "learning_rate": 1.507839574210333e-06, "loss": 0.0136, "step": 713675 }, { "epoch": 7.02, "grad_norm": 4.398599624633789, "learning_rate": 1.5077154517560847e-06, "loss": 0.052, "step": 713700 }, { "epoch": 7.02, "grad_norm": 0.014963001012802124, "learning_rate": 1.5075913293018363e-06, "loss": 0.0225, "step": 713725 }, { "epoch": 7.02, "grad_norm": 3.487959384918213, "learning_rate": 1.5074672068475876e-06, "loss": 0.0711, "step": 713750 }, { "epoch": 7.02, "grad_norm": 3.675971508026123, "learning_rate": 1.5073430843933392e-06, "loss": 0.0176, "step": 713775 }, { "epoch": 7.02, "grad_norm": 5.377338409423828, "learning_rate": 1.5072189619390906e-06, "loss": 0.0602, "step": 713800 }, { "epoch": 7.02, "grad_norm": 7.351321697235107, "learning_rate": 1.5070948394848422e-06, "loss": 0.0254, "step": 713825 }, { "epoch": 7.02, "grad_norm": 3.2147719860076904, "learning_rate": 1.5069707170305939e-06, "loss": 0.0421, "step": 713850 }, { "epoch": 7.02, "grad_norm": 0.23777088522911072, "learning_rate": 1.5068465945763453e-06, "loss": 0.0091, "step": 713875 }, { "epoch": 7.02, "grad_norm": 5.646502494812012, "learning_rate": 1.506722472122097e-06, "loss": 0.0453, "step": 713900 }, { "epoch": 7.02, "grad_norm": 0.1649477183818817, "learning_rate": 1.5065983496678486e-06, "loss": 0.0177, "step": 713925 }, { "epoch": 7.02, "grad_norm": 3.457608699798584, "learning_rate": 1.5064742272135998e-06, "loss": 0.0563, "step": 713950 }, { "epoch": 7.02, "grad_norm": 4.150802135467529, "learning_rate": 1.5063501047593514e-06, "loss": 0.0191, "step": 713975 }, { "epoch": 7.02, "grad_norm": 3.246624231338501, "learning_rate": 1.506225982305103e-06, "loss": 0.0477, "step": 714000 }, { "epoch": 7.02, "grad_norm": 1.8853514194488525, "learning_rate": 1.5061018598508545e-06, "loss": 0.0173, "step": 714025 }, { "epoch": 7.02, "grad_norm": 3.4228978157043457, "learning_rate": 1.5059777373966061e-06, "loss": 0.0701, "step": 714050 }, { "epoch": 7.02, "grad_norm": 5.386983394622803, "learning_rate": 1.5058536149423578e-06, "loss": 0.0162, "step": 714075 }, { "epoch": 7.02, "grad_norm": 9.765894889831543, "learning_rate": 1.5057294924881092e-06, "loss": 0.0394, "step": 714100 }, { "epoch": 7.02, "grad_norm": 0.053333546966314316, "learning_rate": 1.5056053700338608e-06, "loss": 0.0277, "step": 714125 }, { "epoch": 7.02, "grad_norm": 2.2190206050872803, "learning_rate": 1.5054812475796124e-06, "loss": 0.0541, "step": 714150 }, { "epoch": 7.02, "grad_norm": 5.602584362030029, "learning_rate": 1.5053571251253637e-06, "loss": 0.0155, "step": 714175 }, { "epoch": 7.02, "grad_norm": 2.5883164405822754, "learning_rate": 1.5052330026711153e-06, "loss": 0.056, "step": 714200 }, { "epoch": 7.02, "grad_norm": 0.699135959148407, "learning_rate": 1.5051088802168667e-06, "loss": 0.0133, "step": 714225 }, { "epoch": 7.02, "grad_norm": 5.525954246520996, "learning_rate": 1.5049847577626184e-06, "loss": 0.0681, "step": 714250 }, { "epoch": 7.02, "grad_norm": 6.32174825668335, "learning_rate": 1.50486063530837e-06, "loss": 0.0283, "step": 714275 }, { "epoch": 7.02, "grad_norm": 3.434973955154419, "learning_rate": 1.5047414777522914e-06, "loss": 0.0834, "step": 714300 }, { "epoch": 7.02, "grad_norm": 0.29974305629730225, "learning_rate": 1.504617355298043e-06, "loss": 0.0207, "step": 714325 }, { "epoch": 7.02, "grad_norm": 3.5695266723632812, "learning_rate": 1.5044932328437945e-06, "loss": 0.0488, "step": 714350 }, { "epoch": 7.02, "grad_norm": 0.8714238405227661, "learning_rate": 1.5043691103895461e-06, "loss": 0.0176, "step": 714375 }, { "epoch": 7.02, "grad_norm": 3.392294406890869, "learning_rate": 1.5042449879352973e-06, "loss": 0.0482, "step": 714400 }, { "epoch": 7.02, "grad_norm": 3.4984235763549805, "learning_rate": 1.504120865481049e-06, "loss": 0.0094, "step": 714425 }, { "epoch": 7.02, "grad_norm": 5.532963752746582, "learning_rate": 1.5039967430268006e-06, "loss": 0.0599, "step": 714450 }, { "epoch": 7.02, "grad_norm": 1.1770634651184082, "learning_rate": 1.503872620572552e-06, "loss": 0.0114, "step": 714475 }, { "epoch": 7.03, "grad_norm": 2.853567361831665, "learning_rate": 1.5037484981183037e-06, "loss": 0.0998, "step": 714500 }, { "epoch": 7.03, "grad_norm": 0.16331736743450165, "learning_rate": 1.5036243756640553e-06, "loss": 0.0139, "step": 714525 }, { "epoch": 7.03, "grad_norm": 2.0507709980010986, "learning_rate": 1.5035002532098067e-06, "loss": 0.0455, "step": 714550 }, { "epoch": 7.03, "grad_norm": 7.754267692565918, "learning_rate": 1.5033761307555584e-06, "loss": 0.027, "step": 714575 }, { "epoch": 7.03, "grad_norm": 1.4712415933609009, "learning_rate": 1.50325200830131e-06, "loss": 0.0698, "step": 714600 }, { "epoch": 7.03, "grad_norm": 2.4888126850128174, "learning_rate": 1.5031278858470614e-06, "loss": 0.0156, "step": 714625 }, { "epoch": 7.03, "grad_norm": 1.7929569482803345, "learning_rate": 1.503003763392813e-06, "loss": 0.0444, "step": 714650 }, { "epoch": 7.03, "grad_norm": 0.9227637052536011, "learning_rate": 1.5028796409385647e-06, "loss": 0.0207, "step": 714675 }, { "epoch": 7.03, "grad_norm": 2.635038375854492, "learning_rate": 1.502755518484316e-06, "loss": 0.0732, "step": 714700 }, { "epoch": 7.03, "grad_norm": 1.8674741983413696, "learning_rate": 1.5026313960300675e-06, "loss": 0.0291, "step": 714725 }, { "epoch": 7.03, "grad_norm": 2.8705742359161377, "learning_rate": 1.5025072735758192e-06, "loss": 0.0471, "step": 714750 }, { "epoch": 7.03, "grad_norm": 2.607593059539795, "learning_rate": 1.5023831511215706e-06, "loss": 0.0209, "step": 714775 }, { "epoch": 7.03, "grad_norm": 2.811912775039673, "learning_rate": 1.5022590286673222e-06, "loss": 0.0786, "step": 714800 }, { "epoch": 7.03, "grad_norm": 0.16838015615940094, "learning_rate": 1.5021349062130737e-06, "loss": 0.018, "step": 714825 }, { "epoch": 7.03, "grad_norm": 2.695115566253662, "learning_rate": 1.5020107837588253e-06, "loss": 0.0598, "step": 714850 }, { "epoch": 7.03, "grad_norm": 0.10930873453617096, "learning_rate": 1.501886661304577e-06, "loss": 0.0216, "step": 714875 }, { "epoch": 7.03, "grad_norm": 3.3100147247314453, "learning_rate": 1.5017625388503281e-06, "loss": 0.048, "step": 714900 }, { "epoch": 7.03, "grad_norm": 3.121454954147339, "learning_rate": 1.5016384163960798e-06, "loss": 0.0149, "step": 714925 }, { "epoch": 7.03, "grad_norm": 3.004392623901367, "learning_rate": 1.5015142939418314e-06, "loss": 0.0638, "step": 714950 }, { "epoch": 7.03, "grad_norm": 5.861886978149414, "learning_rate": 1.5013901714875828e-06, "loss": 0.0246, "step": 714975 }, { "epoch": 7.03, "grad_norm": 2.9078989028930664, "learning_rate": 1.5012660490333345e-06, "loss": 0.0529, "step": 715000 }, { "epoch": 7.03, "grad_norm": 6.632505416870117, "learning_rate": 1.501141926579086e-06, "loss": 0.0062, "step": 715025 }, { "epoch": 7.03, "grad_norm": 4.255285739898682, "learning_rate": 1.5010178041248375e-06, "loss": 0.0447, "step": 715050 }, { "epoch": 7.03, "grad_norm": 20.442562103271484, "learning_rate": 1.5008936816705892e-06, "loss": 0.0122, "step": 715075 }, { "epoch": 7.03, "grad_norm": 4.1672139167785645, "learning_rate": 1.5007695592163408e-06, "loss": 0.0672, "step": 715100 }, { "epoch": 7.03, "grad_norm": 9.477546691894531, "learning_rate": 1.500645436762092e-06, "loss": 0.0215, "step": 715125 }, { "epoch": 7.03, "grad_norm": 1.9905548095703125, "learning_rate": 1.5005213143078436e-06, "loss": 0.0678, "step": 715150 }, { "epoch": 7.03, "grad_norm": 9.324538230895996, "learning_rate": 1.5003971918535953e-06, "loss": 0.0168, "step": 715175 }, { "epoch": 7.03, "grad_norm": 6.401695251464844, "learning_rate": 1.5002730693993467e-06, "loss": 0.0523, "step": 715200 }, { "epoch": 7.03, "grad_norm": 0.3584720194339752, "learning_rate": 1.5001489469450983e-06, "loss": 0.0263, "step": 715225 }, { "epoch": 7.03, "grad_norm": 2.3599069118499756, "learning_rate": 1.5000248244908498e-06, "loss": 0.0489, "step": 715250 }, { "epoch": 7.03, "grad_norm": 4.538244724273682, "learning_rate": 1.4999007020366014e-06, "loss": 0.0236, "step": 715275 }, { "epoch": 7.03, "grad_norm": 3.2227284908294678, "learning_rate": 1.499776579582353e-06, "loss": 0.0352, "step": 715300 }, { "epoch": 7.03, "grad_norm": 9.513665199279785, "learning_rate": 1.4996524571281042e-06, "loss": 0.0226, "step": 715325 }, { "epoch": 7.03, "grad_norm": 3.4213216304779053, "learning_rate": 1.4995283346738559e-06, "loss": 0.0652, "step": 715350 }, { "epoch": 7.03, "grad_norm": 2.416220188140869, "learning_rate": 1.4994042122196075e-06, "loss": 0.0186, "step": 715375 }, { "epoch": 7.03, "grad_norm": 1.7252908945083618, "learning_rate": 1.499280089765359e-06, "loss": 0.0533, "step": 715400 }, { "epoch": 7.03, "grad_norm": 0.1154402643442154, "learning_rate": 1.4991559673111106e-06, "loss": 0.0131, "step": 715425 }, { "epoch": 7.03, "grad_norm": 3.6448092460632324, "learning_rate": 1.4990318448568622e-06, "loss": 0.0526, "step": 715450 }, { "epoch": 7.03, "grad_norm": 7.131773948669434, "learning_rate": 1.4989077224026136e-06, "loss": 0.0124, "step": 715475 }, { "epoch": 7.03, "grad_norm": 4.818436145782471, "learning_rate": 1.4987835999483653e-06, "loss": 0.0749, "step": 715500 }, { "epoch": 7.04, "grad_norm": 0.4411892890930176, "learning_rate": 1.4986594774941169e-06, "loss": 0.0168, "step": 715525 }, { "epoch": 7.04, "grad_norm": 1.7713203430175781, "learning_rate": 1.498535355039868e-06, "loss": 0.0794, "step": 715550 }, { "epoch": 7.04, "grad_norm": 0.15871039032936096, "learning_rate": 1.4984112325856197e-06, "loss": 0.0214, "step": 715575 }, { "epoch": 7.04, "grad_norm": 2.728222608566284, "learning_rate": 1.4982871101313714e-06, "loss": 0.0298, "step": 715600 }, { "epoch": 7.04, "grad_norm": 0.8965112566947937, "learning_rate": 1.4981629876771228e-06, "loss": 0.02, "step": 715625 }, { "epoch": 7.04, "grad_norm": 4.093758583068848, "learning_rate": 1.4980388652228744e-06, "loss": 0.0592, "step": 715650 }, { "epoch": 7.04, "grad_norm": 0.31990841031074524, "learning_rate": 1.4979147427686259e-06, "loss": 0.0085, "step": 715675 }, { "epoch": 7.04, "grad_norm": 3.425935983657837, "learning_rate": 1.4977906203143775e-06, "loss": 0.0558, "step": 715700 }, { "epoch": 7.04, "grad_norm": 0.1694004386663437, "learning_rate": 1.4976664978601291e-06, "loss": 0.0177, "step": 715725 }, { "epoch": 7.04, "grad_norm": 4.790876388549805, "learning_rate": 1.4975423754058803e-06, "loss": 0.0475, "step": 715750 }, { "epoch": 7.04, "grad_norm": 9.649928092956543, "learning_rate": 1.497418252951632e-06, "loss": 0.0202, "step": 715775 }, { "epoch": 7.04, "grad_norm": 3.790008783340454, "learning_rate": 1.4972941304973836e-06, "loss": 0.0617, "step": 715800 }, { "epoch": 7.04, "grad_norm": 1.475251317024231, "learning_rate": 1.497170008043135e-06, "loss": 0.0184, "step": 715825 }, { "epoch": 7.04, "grad_norm": 3.718380928039551, "learning_rate": 1.4970458855888867e-06, "loss": 0.0404, "step": 715850 }, { "epoch": 7.04, "grad_norm": 14.534001350402832, "learning_rate": 1.4969217631346383e-06, "loss": 0.0266, "step": 715875 }, { "epoch": 7.04, "grad_norm": 2.9183547496795654, "learning_rate": 1.4967976406803897e-06, "loss": 0.0713, "step": 715900 }, { "epoch": 7.04, "grad_norm": 0.17796637117862701, "learning_rate": 1.4966735182261414e-06, "loss": 0.0251, "step": 715925 }, { "epoch": 7.04, "grad_norm": 2.866738796234131, "learning_rate": 1.496549395771893e-06, "loss": 0.0575, "step": 715950 }, { "epoch": 7.04, "grad_norm": 7.893904209136963, "learning_rate": 1.4964252733176444e-06, "loss": 0.0246, "step": 715975 }, { "epoch": 7.04, "grad_norm": 1.6773749589920044, "learning_rate": 1.496301150863396e-06, "loss": 0.078, "step": 716000 }, { "epoch": 7.04, "grad_norm": 2.911294460296631, "learning_rate": 1.4961770284091477e-06, "loss": 0.0099, "step": 716025 }, { "epoch": 7.04, "grad_norm": 3.269458293914795, "learning_rate": 1.496052905954899e-06, "loss": 0.0616, "step": 716050 }, { "epoch": 7.04, "grad_norm": 0.9742540717124939, "learning_rate": 1.4959287835006505e-06, "loss": 0.0213, "step": 716075 }, { "epoch": 7.04, "grad_norm": 2.5478174686431885, "learning_rate": 1.495804661046402e-06, "loss": 0.052, "step": 716100 }, { "epoch": 7.04, "grad_norm": 0.047696929425001144, "learning_rate": 1.4956805385921536e-06, "loss": 0.0257, "step": 716125 }, { "epoch": 7.04, "grad_norm": 2.3455631732940674, "learning_rate": 1.4955564161379052e-06, "loss": 0.0495, "step": 716150 }, { "epoch": 7.04, "grad_norm": 1.8123646974563599, "learning_rate": 1.4954322936836566e-06, "loss": 0.022, "step": 716175 }, { "epoch": 7.04, "grad_norm": 3.1150970458984375, "learning_rate": 1.4953081712294083e-06, "loss": 0.0541, "step": 716200 }, { "epoch": 7.04, "grad_norm": 12.896100044250488, "learning_rate": 1.49518404877516e-06, "loss": 0.0154, "step": 716225 }, { "epoch": 7.04, "grad_norm": 2.197690486907959, "learning_rate": 1.4950599263209111e-06, "loss": 0.0536, "step": 716250 }, { "epoch": 7.04, "grad_norm": 0.05491704121232033, "learning_rate": 1.4949358038666628e-06, "loss": 0.0298, "step": 716275 }, { "epoch": 7.04, "grad_norm": 4.0571489334106445, "learning_rate": 1.4948116814124144e-06, "loss": 0.0436, "step": 716300 }, { "epoch": 7.04, "grad_norm": 10.208964347839355, "learning_rate": 1.4946875589581658e-06, "loss": 0.0216, "step": 716325 }, { "epoch": 7.04, "grad_norm": 4.357958793640137, "learning_rate": 1.4945634365039175e-06, "loss": 0.0488, "step": 716350 }, { "epoch": 7.04, "grad_norm": 0.5315461754798889, "learning_rate": 1.494439314049669e-06, "loss": 0.0151, "step": 716375 }, { "epoch": 7.04, "grad_norm": 2.489410638809204, "learning_rate": 1.4943151915954205e-06, "loss": 0.068, "step": 716400 }, { "epoch": 7.04, "grad_norm": 7.116724491119385, "learning_rate": 1.4941910691411722e-06, "loss": 0.018, "step": 716425 }, { "epoch": 7.04, "grad_norm": 2.9696431159973145, "learning_rate": 1.4940669466869238e-06, "loss": 0.0587, "step": 716450 }, { "epoch": 7.04, "grad_norm": 6.502272605895996, "learning_rate": 1.493942824232675e-06, "loss": 0.0201, "step": 716475 }, { "epoch": 7.04, "grad_norm": 4.94050931930542, "learning_rate": 1.4938187017784266e-06, "loss": 0.0693, "step": 716500 }, { "epoch": 7.04, "grad_norm": 0.7533600330352783, "learning_rate": 1.493694579324178e-06, "loss": 0.0276, "step": 716525 }, { "epoch": 7.05, "grad_norm": 2.4094879627227783, "learning_rate": 1.4935704568699297e-06, "loss": 0.0669, "step": 716550 }, { "epoch": 7.05, "grad_norm": 9.541584014892578, "learning_rate": 1.4934463344156813e-06, "loss": 0.025, "step": 716575 }, { "epoch": 7.05, "grad_norm": 3.4398903846740723, "learning_rate": 1.4933222119614327e-06, "loss": 0.0611, "step": 716600 }, { "epoch": 7.05, "grad_norm": 0.07135791331529617, "learning_rate": 1.4931980895071844e-06, "loss": 0.0255, "step": 716625 }, { "epoch": 7.05, "grad_norm": 2.981652021408081, "learning_rate": 1.493073967052936e-06, "loss": 0.0555, "step": 716650 }, { "epoch": 7.05, "grad_norm": 0.7777700424194336, "learning_rate": 1.4929498445986872e-06, "loss": 0.0184, "step": 716675 }, { "epoch": 7.05, "grad_norm": 4.843564033508301, "learning_rate": 1.4928257221444389e-06, "loss": 0.0398, "step": 716700 }, { "epoch": 7.05, "grad_norm": 8.112591743469238, "learning_rate": 1.4927015996901905e-06, "loss": 0.0164, "step": 716725 }, { "epoch": 7.05, "grad_norm": 2.447890043258667, "learning_rate": 1.492577477235942e-06, "loss": 0.0575, "step": 716750 }, { "epoch": 7.05, "grad_norm": 3.468909502029419, "learning_rate": 1.4924533547816936e-06, "loss": 0.0226, "step": 716775 }, { "epoch": 7.05, "grad_norm": 4.057456970214844, "learning_rate": 1.4923292323274452e-06, "loss": 0.0534, "step": 716800 }, { "epoch": 7.05, "grad_norm": 10.303751945495605, "learning_rate": 1.4922051098731966e-06, "loss": 0.022, "step": 716825 }, { "epoch": 7.05, "grad_norm": 2.712775945663452, "learning_rate": 1.4920809874189483e-06, "loss": 0.045, "step": 716850 }, { "epoch": 7.05, "grad_norm": 0.1156991720199585, "learning_rate": 1.4919568649646999e-06, "loss": 0.0132, "step": 716875 }, { "epoch": 7.05, "grad_norm": 3.185600519180298, "learning_rate": 1.491832742510451e-06, "loss": 0.0388, "step": 716900 }, { "epoch": 7.05, "grad_norm": 12.374249458312988, "learning_rate": 1.4917086200562027e-06, "loss": 0.016, "step": 716925 }, { "epoch": 7.05, "grad_norm": 2.346489429473877, "learning_rate": 1.4915844976019542e-06, "loss": 0.0724, "step": 716950 }, { "epoch": 7.05, "grad_norm": 1.5299521684646606, "learning_rate": 1.4914603751477058e-06, "loss": 0.0249, "step": 716975 }, { "epoch": 7.05, "grad_norm": 2.9661102294921875, "learning_rate": 1.4913362526934574e-06, "loss": 0.0537, "step": 717000 }, { "epoch": 7.05, "grad_norm": 1.871264934539795, "learning_rate": 1.4912121302392088e-06, "loss": 0.0215, "step": 717025 }, { "epoch": 7.05, "grad_norm": 4.3143205642700195, "learning_rate": 1.4910880077849605e-06, "loss": 0.0669, "step": 717050 }, { "epoch": 7.05, "grad_norm": 0.099991075694561, "learning_rate": 1.4909638853307121e-06, "loss": 0.0199, "step": 717075 }, { "epoch": 7.05, "grad_norm": 2.7014784812927246, "learning_rate": 1.4908397628764633e-06, "loss": 0.0347, "step": 717100 }, { "epoch": 7.05, "grad_norm": 3.23822021484375, "learning_rate": 1.490715640422215e-06, "loss": 0.03, "step": 717125 }, { "epoch": 7.05, "grad_norm": 3.4782536029815674, "learning_rate": 1.4905964828661364e-06, "loss": 0.0713, "step": 717150 }, { "epoch": 7.05, "grad_norm": 0.15709532797336578, "learning_rate": 1.490472360411888e-06, "loss": 0.0182, "step": 717175 }, { "epoch": 7.05, "grad_norm": 3.0838663578033447, "learning_rate": 1.4903482379576395e-06, "loss": 0.0719, "step": 717200 }, { "epoch": 7.05, "grad_norm": 0.11067123711109161, "learning_rate": 1.4902241155033911e-06, "loss": 0.021, "step": 717225 }, { "epoch": 7.05, "grad_norm": 3.2099459171295166, "learning_rate": 1.4900999930491427e-06, "loss": 0.0622, "step": 717250 }, { "epoch": 7.05, "grad_norm": 0.9158298969268799, "learning_rate": 1.4899758705948942e-06, "loss": 0.013, "step": 717275 }, { "epoch": 7.05, "grad_norm": 2.417074680328369, "learning_rate": 1.4898517481406458e-06, "loss": 0.0661, "step": 717300 }, { "epoch": 7.05, "grad_norm": 0.276546448469162, "learning_rate": 1.4897276256863974e-06, "loss": 0.0216, "step": 717325 }, { "epoch": 7.05, "grad_norm": 4.188453674316406, "learning_rate": 1.4896035032321486e-06, "loss": 0.0562, "step": 717350 }, { "epoch": 7.05, "grad_norm": 0.06806372106075287, "learning_rate": 1.4894793807779003e-06, "loss": 0.0162, "step": 717375 }, { "epoch": 7.05, "grad_norm": 2.9985082149505615, "learning_rate": 1.489355258323652e-06, "loss": 0.0492, "step": 717400 }, { "epoch": 7.05, "grad_norm": 1.3790967464447021, "learning_rate": 1.4892311358694033e-06, "loss": 0.012, "step": 717425 }, { "epoch": 7.05, "grad_norm": 2.2921595573425293, "learning_rate": 1.489107013415155e-06, "loss": 0.0629, "step": 717450 }, { "epoch": 7.05, "grad_norm": 3.203214645385742, "learning_rate": 1.4889828909609066e-06, "loss": 0.0123, "step": 717475 }, { "epoch": 7.05, "grad_norm": 1.8112903833389282, "learning_rate": 1.488858768506658e-06, "loss": 0.0604, "step": 717500 }, { "epoch": 7.05, "grad_norm": 16.894304275512695, "learning_rate": 1.4887346460524097e-06, "loss": 0.0107, "step": 717525 }, { "epoch": 7.06, "grad_norm": 4.148094177246094, "learning_rate": 1.488610523598161e-06, "loss": 0.0473, "step": 717550 }, { "epoch": 7.06, "grad_norm": 0.06705065071582794, "learning_rate": 1.4884864011439127e-06, "loss": 0.0265, "step": 717575 }, { "epoch": 7.06, "grad_norm": 2.797241449356079, "learning_rate": 1.4883622786896644e-06, "loss": 0.0716, "step": 717600 }, { "epoch": 7.06, "grad_norm": 9.657308578491211, "learning_rate": 1.4882381562354156e-06, "loss": 0.0209, "step": 717625 }, { "epoch": 7.06, "grad_norm": 3.1000404357910156, "learning_rate": 1.4881140337811672e-06, "loss": 0.045, "step": 717650 }, { "epoch": 7.06, "grad_norm": 0.06892597675323486, "learning_rate": 1.4879899113269188e-06, "loss": 0.015, "step": 717675 }, { "epoch": 7.06, "grad_norm": 2.132140874862671, "learning_rate": 1.4878657888726703e-06, "loss": 0.0588, "step": 717700 }, { "epoch": 7.06, "grad_norm": 0.848922848701477, "learning_rate": 1.487741666418422e-06, "loss": 0.0275, "step": 717725 }, { "epoch": 7.06, "grad_norm": 2.996105194091797, "learning_rate": 1.4876175439641735e-06, "loss": 0.0635, "step": 717750 }, { "epoch": 7.06, "grad_norm": 0.5303265452384949, "learning_rate": 1.487493421509925e-06, "loss": 0.0248, "step": 717775 }, { "epoch": 7.06, "grad_norm": 3.6347837448120117, "learning_rate": 1.4873692990556766e-06, "loss": 0.0492, "step": 717800 }, { "epoch": 7.06, "grad_norm": 4.192510604858398, "learning_rate": 1.4872451766014282e-06, "loss": 0.0222, "step": 717825 }, { "epoch": 7.06, "grad_norm": 1.718164324760437, "learning_rate": 1.4871210541471794e-06, "loss": 0.0471, "step": 717850 }, { "epoch": 7.06, "grad_norm": 1.7688859701156616, "learning_rate": 1.486996931692931e-06, "loss": 0.0216, "step": 717875 }, { "epoch": 7.06, "grad_norm": 2.4074649810791016, "learning_rate": 1.4868728092386827e-06, "loss": 0.0586, "step": 717900 }, { "epoch": 7.06, "grad_norm": 9.998251914978027, "learning_rate": 1.4867486867844341e-06, "loss": 0.0215, "step": 717925 }, { "epoch": 7.06, "grad_norm": 4.404343605041504, "learning_rate": 1.4866245643301858e-06, "loss": 0.0549, "step": 717950 }, { "epoch": 7.06, "grad_norm": 0.1308559775352478, "learning_rate": 1.4865004418759372e-06, "loss": 0.0132, "step": 717975 }, { "epoch": 7.06, "grad_norm": 2.944143056869507, "learning_rate": 1.4863763194216888e-06, "loss": 0.0471, "step": 718000 }, { "epoch": 7.06, "grad_norm": 0.32658445835113525, "learning_rate": 1.4862521969674405e-06, "loss": 0.0254, "step": 718025 }, { "epoch": 7.06, "grad_norm": 3.862916946411133, "learning_rate": 1.4861280745131917e-06, "loss": 0.0771, "step": 718050 }, { "epoch": 7.06, "grad_norm": 0.525948166847229, "learning_rate": 1.4860039520589433e-06, "loss": 0.0107, "step": 718075 }, { "epoch": 7.06, "grad_norm": 4.31751012802124, "learning_rate": 1.485879829604695e-06, "loss": 0.0611, "step": 718100 }, { "epoch": 7.06, "grad_norm": 12.195662498474121, "learning_rate": 1.4857557071504464e-06, "loss": 0.0334, "step": 718125 }, { "epoch": 7.06, "grad_norm": 2.3170242309570312, "learning_rate": 1.485631584696198e-06, "loss": 0.0658, "step": 718150 }, { "epoch": 7.06, "grad_norm": 0.05619320273399353, "learning_rate": 1.4855074622419496e-06, "loss": 0.016, "step": 718175 }, { "epoch": 7.06, "grad_norm": 2.562919855117798, "learning_rate": 1.485383339787701e-06, "loss": 0.0565, "step": 718200 }, { "epoch": 7.06, "grad_norm": 0.08080299943685532, "learning_rate": 1.4852592173334527e-06, "loss": 0.017, "step": 718225 }, { "epoch": 7.06, "grad_norm": 4.640023231506348, "learning_rate": 1.4851350948792043e-06, "loss": 0.0764, "step": 718250 }, { "epoch": 7.06, "grad_norm": 1.3952066898345947, "learning_rate": 1.4850109724249555e-06, "loss": 0.0272, "step": 718275 }, { "epoch": 7.06, "grad_norm": 3.892127275466919, "learning_rate": 1.4848868499707072e-06, "loss": 0.0465, "step": 718300 }, { "epoch": 7.06, "grad_norm": 0.4587864875793457, "learning_rate": 1.4847627275164588e-06, "loss": 0.0276, "step": 718325 }, { "epoch": 7.06, "grad_norm": 3.8901071548461914, "learning_rate": 1.4846386050622102e-06, "loss": 0.0626, "step": 718350 }, { "epoch": 7.06, "grad_norm": 0.365896999835968, "learning_rate": 1.4845144826079619e-06, "loss": 0.0158, "step": 718375 }, { "epoch": 7.06, "grad_norm": 5.152801990509033, "learning_rate": 1.4843903601537133e-06, "loss": 0.061, "step": 718400 }, { "epoch": 7.06, "grad_norm": 5.216850280761719, "learning_rate": 1.484266237699465e-06, "loss": 0.0165, "step": 718425 }, { "epoch": 7.06, "grad_norm": 5.7823872566223145, "learning_rate": 1.4841421152452166e-06, "loss": 0.0608, "step": 718450 }, { "epoch": 7.06, "grad_norm": 7.335814476013184, "learning_rate": 1.4840179927909678e-06, "loss": 0.0192, "step": 718475 }, { "epoch": 7.06, "grad_norm": 2.538693428039551, "learning_rate": 1.4838938703367194e-06, "loss": 0.034, "step": 718500 }, { "epoch": 7.06, "grad_norm": 11.495244979858398, "learning_rate": 1.483769747882471e-06, "loss": 0.0189, "step": 718525 }, { "epoch": 7.06, "grad_norm": 2.879739999771118, "learning_rate": 1.4836456254282225e-06, "loss": 0.0611, "step": 718550 }, { "epoch": 7.07, "grad_norm": 0.5203679800033569, "learning_rate": 1.483521502973974e-06, "loss": 0.0112, "step": 718575 }, { "epoch": 7.07, "grad_norm": 2.480408191680908, "learning_rate": 1.4833973805197257e-06, "loss": 0.0795, "step": 718600 }, { "epoch": 7.07, "grad_norm": 1.1418097019195557, "learning_rate": 1.4832732580654772e-06, "loss": 0.0144, "step": 718625 }, { "epoch": 7.07, "grad_norm": 2.99056339263916, "learning_rate": 1.4831491356112288e-06, "loss": 0.0253, "step": 718650 }, { "epoch": 7.07, "grad_norm": 0.29553690552711487, "learning_rate": 1.4830250131569804e-06, "loss": 0.0097, "step": 718675 }, { "epoch": 7.07, "grad_norm": 2.8010592460632324, "learning_rate": 1.4829008907027316e-06, "loss": 0.0514, "step": 718700 }, { "epoch": 7.07, "grad_norm": 0.050310686230659485, "learning_rate": 1.4827767682484833e-06, "loss": 0.0242, "step": 718725 }, { "epoch": 7.07, "grad_norm": 3.8563027381896973, "learning_rate": 1.482652645794235e-06, "loss": 0.0644, "step": 718750 }, { "epoch": 7.07, "grad_norm": 0.13327951729297638, "learning_rate": 1.4825285233399863e-06, "loss": 0.0148, "step": 718775 }, { "epoch": 7.07, "grad_norm": 3.863553285598755, "learning_rate": 1.482404400885738e-06, "loss": 0.0289, "step": 718800 }, { "epoch": 7.07, "grad_norm": 6.343887805938721, "learning_rate": 1.4822802784314894e-06, "loss": 0.0287, "step": 718825 }, { "epoch": 7.07, "grad_norm": 3.0224955081939697, "learning_rate": 1.482156155977241e-06, "loss": 0.0622, "step": 718850 }, { "epoch": 7.07, "grad_norm": 5.588747978210449, "learning_rate": 1.4820320335229927e-06, "loss": 0.0194, "step": 718875 }, { "epoch": 7.07, "grad_norm": 3.449958086013794, "learning_rate": 1.481907911068744e-06, "loss": 0.0573, "step": 718900 }, { "epoch": 7.07, "grad_norm": 8.383596420288086, "learning_rate": 1.4817837886144957e-06, "loss": 0.017, "step": 718925 }, { "epoch": 7.07, "grad_norm": 2.033413887023926, "learning_rate": 1.4816596661602474e-06, "loss": 0.0532, "step": 718950 }, { "epoch": 7.07, "grad_norm": 0.13019832968711853, "learning_rate": 1.4815355437059986e-06, "loss": 0.0144, "step": 718975 }, { "epoch": 7.07, "grad_norm": 3.2345130443573, "learning_rate": 1.4814114212517502e-06, "loss": 0.0431, "step": 719000 }, { "epoch": 7.07, "grad_norm": 1.8255313634872437, "learning_rate": 1.4812872987975018e-06, "loss": 0.0154, "step": 719025 }, { "epoch": 7.07, "grad_norm": 4.903679847717285, "learning_rate": 1.4811631763432533e-06, "loss": 0.0496, "step": 719050 }, { "epoch": 7.07, "grad_norm": 0.41524192690849304, "learning_rate": 1.481039053889005e-06, "loss": 0.0284, "step": 719075 }, { "epoch": 7.07, "grad_norm": 4.415782928466797, "learning_rate": 1.4809149314347565e-06, "loss": 0.0623, "step": 719100 }, { "epoch": 7.07, "grad_norm": 3.0847928524017334, "learning_rate": 1.480790808980508e-06, "loss": 0.0245, "step": 719125 }, { "epoch": 7.07, "grad_norm": 2.7588186264038086, "learning_rate": 1.4806666865262596e-06, "loss": 0.0521, "step": 719150 }, { "epoch": 7.07, "grad_norm": 1.021710753440857, "learning_rate": 1.4805425640720112e-06, "loss": 0.0146, "step": 719175 }, { "epoch": 7.07, "grad_norm": 5.315546035766602, "learning_rate": 1.4804184416177624e-06, "loss": 0.0565, "step": 719200 }, { "epoch": 7.07, "grad_norm": 3.2845418453216553, "learning_rate": 1.480294319163514e-06, "loss": 0.0147, "step": 719225 }, { "epoch": 7.07, "grad_norm": 2.6627111434936523, "learning_rate": 1.4801701967092655e-06, "loss": 0.0644, "step": 719250 }, { "epoch": 7.07, "grad_norm": 5.455343723297119, "learning_rate": 1.4800460742550171e-06, "loss": 0.0122, "step": 719275 }, { "epoch": 7.07, "grad_norm": 3.075754165649414, "learning_rate": 1.4799219518007688e-06, "loss": 0.0684, "step": 719300 }, { "epoch": 7.07, "grad_norm": 6.048635959625244, "learning_rate": 1.4797978293465202e-06, "loss": 0.0282, "step": 719325 }, { "epoch": 7.07, "grad_norm": 3.089907646179199, "learning_rate": 1.4796737068922718e-06, "loss": 0.0532, "step": 719350 }, { "epoch": 7.07, "grad_norm": 7.335402011871338, "learning_rate": 1.4795495844380235e-06, "loss": 0.0175, "step": 719375 }, { "epoch": 7.07, "grad_norm": 3.449758529663086, "learning_rate": 1.4794254619837747e-06, "loss": 0.0427, "step": 719400 }, { "epoch": 7.07, "grad_norm": 0.29109781980514526, "learning_rate": 1.4793013395295263e-06, "loss": 0.0238, "step": 719425 }, { "epoch": 7.07, "grad_norm": 2.8660881519317627, "learning_rate": 1.4791821819734478e-06, "loss": 0.0571, "step": 719450 }, { "epoch": 7.07, "grad_norm": 6.905092239379883, "learning_rate": 1.4790580595191994e-06, "loss": 0.0183, "step": 719475 }, { "epoch": 7.07, "grad_norm": 10.53389835357666, "learning_rate": 1.4789339370649508e-06, "loss": 0.0705, "step": 719500 }, { "epoch": 7.07, "grad_norm": 8.344502449035645, "learning_rate": 1.4788098146107024e-06, "loss": 0.0184, "step": 719525 }, { "epoch": 7.07, "grad_norm": 2.630127429962158, "learning_rate": 1.478685692156454e-06, "loss": 0.0511, "step": 719550 }, { "epoch": 7.07, "grad_norm": 6.104868412017822, "learning_rate": 1.4785615697022055e-06, "loss": 0.0175, "step": 719575 }, { "epoch": 7.08, "grad_norm": 4.855522155761719, "learning_rate": 1.4784374472479571e-06, "loss": 0.0565, "step": 719600 }, { "epoch": 7.08, "grad_norm": 0.051520463079214096, "learning_rate": 1.4783133247937088e-06, "loss": 0.023, "step": 719625 }, { "epoch": 7.08, "grad_norm": 3.879594087600708, "learning_rate": 1.47818920233946e-06, "loss": 0.0666, "step": 719650 }, { "epoch": 7.08, "grad_norm": 3.254122734069824, "learning_rate": 1.4780650798852116e-06, "loss": 0.0221, "step": 719675 }, { "epoch": 7.08, "grad_norm": 4.469464302062988, "learning_rate": 1.4779409574309633e-06, "loss": 0.0569, "step": 719700 }, { "epoch": 7.08, "grad_norm": 2.8338303565979004, "learning_rate": 1.4778168349767147e-06, "loss": 0.0168, "step": 719725 }, { "epoch": 7.08, "grad_norm": 4.098189830780029, "learning_rate": 1.4776927125224663e-06, "loss": 0.0679, "step": 719750 }, { "epoch": 7.08, "grad_norm": 14.096175193786621, "learning_rate": 1.477568590068218e-06, "loss": 0.0221, "step": 719775 }, { "epoch": 7.08, "grad_norm": 2.9966821670532227, "learning_rate": 1.4774444676139694e-06, "loss": 0.0626, "step": 719800 }, { "epoch": 7.08, "grad_norm": 0.013216009363532066, "learning_rate": 1.477320345159721e-06, "loss": 0.0247, "step": 719825 }, { "epoch": 7.08, "grad_norm": 2.9415664672851562, "learning_rate": 1.4771962227054722e-06, "loss": 0.0524, "step": 719850 }, { "epoch": 7.08, "grad_norm": 1.206624984741211, "learning_rate": 1.4770721002512239e-06, "loss": 0.0163, "step": 719875 }, { "epoch": 7.08, "grad_norm": 3.010608673095703, "learning_rate": 1.4769479777969755e-06, "loss": 0.0512, "step": 719900 }, { "epoch": 7.08, "grad_norm": 1.301781177520752, "learning_rate": 1.476823855342727e-06, "loss": 0.0244, "step": 719925 }, { "epoch": 7.08, "grad_norm": 4.035514831542969, "learning_rate": 1.4766997328884785e-06, "loss": 0.0759, "step": 719950 }, { "epoch": 7.08, "grad_norm": 11.566234588623047, "learning_rate": 1.4765756104342302e-06, "loss": 0.0205, "step": 719975 }, { "epoch": 7.08, "grad_norm": 2.2803726196289062, "learning_rate": 1.4764514879799816e-06, "loss": 0.0383, "step": 720000 }, { "epoch": 7.08, "eval_loss": 0.8718562722206116, "eval_runtime": 6097.4317, "eval_samples_per_second": 1.553, "eval_steps_per_second": 0.194, "eval_wer": 0.11224489795918367, "step": 720000 }, { "epoch": 7.08, "grad_norm": 0.0793972760438919, "learning_rate": 1.4763273655257332e-06, "loss": 0.0187, "step": 720025 }, { "epoch": 7.08, "grad_norm": 1.7954127788543701, "learning_rate": 1.4762032430714849e-06, "loss": 0.0559, "step": 720050 }, { "epoch": 7.08, "grad_norm": 0.1038501113653183, "learning_rate": 1.476079120617236e-06, "loss": 0.0076, "step": 720075 }, { "epoch": 7.08, "grad_norm": 1.1775519847869873, "learning_rate": 1.4759549981629877e-06, "loss": 0.0462, "step": 720100 }, { "epoch": 7.08, "grad_norm": 2.4681026935577393, "learning_rate": 1.4758308757087394e-06, "loss": 0.0176, "step": 720125 }, { "epoch": 7.08, "grad_norm": 1.985582709312439, "learning_rate": 1.4757067532544908e-06, "loss": 0.0609, "step": 720150 }, { "epoch": 7.08, "grad_norm": 1.2878011465072632, "learning_rate": 1.4755826308002424e-06, "loss": 0.0153, "step": 720175 }, { "epoch": 7.08, "grad_norm": 2.5112740993499756, "learning_rate": 1.475458508345994e-06, "loss": 0.0481, "step": 720200 }, { "epoch": 7.08, "grad_norm": 4.176258563995361, "learning_rate": 1.4753343858917455e-06, "loss": 0.0127, "step": 720225 }, { "epoch": 7.08, "grad_norm": 3.4061827659606934, "learning_rate": 1.4752102634374971e-06, "loss": 0.0491, "step": 720250 }, { "epoch": 7.08, "grad_norm": 0.28639739751815796, "learning_rate": 1.4750861409832483e-06, "loss": 0.0137, "step": 720275 }, { "epoch": 7.08, "grad_norm": 3.6920127868652344, "learning_rate": 1.474962018529e-06, "loss": 0.0731, "step": 720300 }, { "epoch": 7.08, "grad_norm": 0.047230158001184464, "learning_rate": 1.4748378960747516e-06, "loss": 0.0118, "step": 720325 }, { "epoch": 7.08, "grad_norm": 3.9373855590820312, "learning_rate": 1.474713773620503e-06, "loss": 0.0641, "step": 720350 }, { "epoch": 7.08, "grad_norm": 0.1886783242225647, "learning_rate": 1.4745896511662546e-06, "loss": 0.0176, "step": 720375 }, { "epoch": 7.08, "grad_norm": 4.760197639465332, "learning_rate": 1.4744655287120063e-06, "loss": 0.0559, "step": 720400 }, { "epoch": 7.08, "grad_norm": 1.460683822631836, "learning_rate": 1.4743414062577577e-06, "loss": 0.0165, "step": 720425 }, { "epoch": 7.08, "grad_norm": 3.175020217895508, "learning_rate": 1.4742172838035093e-06, "loss": 0.0726, "step": 720450 }, { "epoch": 7.08, "grad_norm": 3.027491569519043, "learning_rate": 1.474093161349261e-06, "loss": 0.0126, "step": 720475 }, { "epoch": 7.08, "grad_norm": 2.2937309741973877, "learning_rate": 1.4739690388950124e-06, "loss": 0.0519, "step": 720500 }, { "epoch": 7.08, "grad_norm": 0.8911207914352417, "learning_rate": 1.473844916440764e-06, "loss": 0.0221, "step": 720525 }, { "epoch": 7.08, "grad_norm": 1.9165340662002563, "learning_rate": 1.4737207939865157e-06, "loss": 0.0579, "step": 720550 }, { "epoch": 7.08, "grad_norm": 3.4257547855377197, "learning_rate": 1.4735966715322669e-06, "loss": 0.0203, "step": 720575 }, { "epoch": 7.09, "grad_norm": 3.8133418560028076, "learning_rate": 1.4734725490780185e-06, "loss": 0.0717, "step": 720600 }, { "epoch": 7.09, "grad_norm": 0.7078612446784973, "learning_rate": 1.4733484266237702e-06, "loss": 0.0192, "step": 720625 }, { "epoch": 7.09, "grad_norm": 3.395873546600342, "learning_rate": 1.4732243041695216e-06, "loss": 0.0325, "step": 720650 }, { "epoch": 7.09, "grad_norm": 1.1297045946121216, "learning_rate": 1.4731001817152732e-06, "loss": 0.0171, "step": 720675 }, { "epoch": 7.09, "grad_norm": 2.5713679790496826, "learning_rate": 1.4729760592610246e-06, "loss": 0.0629, "step": 720700 }, { "epoch": 7.09, "grad_norm": 0.43253448605537415, "learning_rate": 1.4728519368067763e-06, "loss": 0.0207, "step": 720725 }, { "epoch": 7.09, "grad_norm": 1.900571584701538, "learning_rate": 1.472727814352528e-06, "loss": 0.0547, "step": 720750 }, { "epoch": 7.09, "grad_norm": 7.349789142608643, "learning_rate": 1.4726036918982791e-06, "loss": 0.0285, "step": 720775 }, { "epoch": 7.09, "grad_norm": 4.282343864440918, "learning_rate": 1.4724795694440308e-06, "loss": 0.0581, "step": 720800 }, { "epoch": 7.09, "grad_norm": 1.4039851427078247, "learning_rate": 1.4723554469897824e-06, "loss": 0.0204, "step": 720825 }, { "epoch": 7.09, "grad_norm": 2.6895179748535156, "learning_rate": 1.4722313245355338e-06, "loss": 0.0518, "step": 720850 }, { "epoch": 7.09, "grad_norm": 5.514645099639893, "learning_rate": 1.4721072020812854e-06, "loss": 0.0204, "step": 720875 }, { "epoch": 7.09, "grad_norm": 1.9423178434371948, "learning_rate": 1.471983079627037e-06, "loss": 0.0505, "step": 720900 }, { "epoch": 7.09, "grad_norm": 12.957854270935059, "learning_rate": 1.4718589571727885e-06, "loss": 0.026, "step": 720925 }, { "epoch": 7.09, "grad_norm": 2.0068888664245605, "learning_rate": 1.4717348347185401e-06, "loss": 0.0385, "step": 720950 }, { "epoch": 7.09, "grad_norm": 10.268445014953613, "learning_rate": 1.4716107122642918e-06, "loss": 0.0172, "step": 720975 }, { "epoch": 7.09, "grad_norm": 3.3256728649139404, "learning_rate": 1.471486589810043e-06, "loss": 0.0399, "step": 721000 }, { "epoch": 7.09, "grad_norm": 4.3026909828186035, "learning_rate": 1.4713624673557946e-06, "loss": 0.0239, "step": 721025 }, { "epoch": 7.09, "grad_norm": 1.7736324071884155, "learning_rate": 1.4712383449015463e-06, "loss": 0.0355, "step": 721050 }, { "epoch": 7.09, "grad_norm": 6.75046968460083, "learning_rate": 1.4711142224472977e-06, "loss": 0.0232, "step": 721075 }, { "epoch": 7.09, "grad_norm": 2.176374673843384, "learning_rate": 1.4709900999930493e-06, "loss": 0.0594, "step": 721100 }, { "epoch": 7.09, "grad_norm": 6.212894439697266, "learning_rate": 1.4708659775388007e-06, "loss": 0.02, "step": 721125 }, { "epoch": 7.09, "grad_norm": 4.007859706878662, "learning_rate": 1.4707418550845524e-06, "loss": 0.068, "step": 721150 }, { "epoch": 7.09, "grad_norm": 0.258661687374115, "learning_rate": 1.470617732630304e-06, "loss": 0.0159, "step": 721175 }, { "epoch": 7.09, "grad_norm": 3.8874149322509766, "learning_rate": 1.4704936101760552e-06, "loss": 0.0402, "step": 721200 }, { "epoch": 7.09, "grad_norm": 10.84194278717041, "learning_rate": 1.4703694877218069e-06, "loss": 0.0281, "step": 721225 }, { "epoch": 7.09, "grad_norm": 4.1606268882751465, "learning_rate": 1.4702453652675585e-06, "loss": 0.0499, "step": 721250 }, { "epoch": 7.09, "grad_norm": 3.9914534091949463, "learning_rate": 1.47012124281331e-06, "loss": 0.0161, "step": 721275 }, { "epoch": 7.09, "grad_norm": 2.504511594772339, "learning_rate": 1.4699971203590615e-06, "loss": 0.0576, "step": 721300 }, { "epoch": 7.09, "grad_norm": 5.514843463897705, "learning_rate": 1.4698729979048132e-06, "loss": 0.0229, "step": 721325 }, { "epoch": 7.09, "grad_norm": 2.2593882083892822, "learning_rate": 1.4697488754505646e-06, "loss": 0.0406, "step": 721350 }, { "epoch": 7.09, "grad_norm": 0.7957175374031067, "learning_rate": 1.4696247529963162e-06, "loss": 0.0202, "step": 721375 }, { "epoch": 7.09, "grad_norm": 3.2870333194732666, "learning_rate": 1.4695006305420679e-06, "loss": 0.0618, "step": 721400 }, { "epoch": 7.09, "grad_norm": 0.09572228044271469, "learning_rate": 1.469376508087819e-06, "loss": 0.0199, "step": 721425 }, { "epoch": 7.09, "grad_norm": 3.2205045223236084, "learning_rate": 1.4692523856335707e-06, "loss": 0.07, "step": 721450 }, { "epoch": 7.09, "grad_norm": 0.21614058315753937, "learning_rate": 1.4691282631793224e-06, "loss": 0.0086, "step": 721475 }, { "epoch": 7.09, "grad_norm": 1.7168596982955933, "learning_rate": 1.4690041407250738e-06, "loss": 0.0676, "step": 721500 }, { "epoch": 7.09, "grad_norm": 10.33336067199707, "learning_rate": 1.4688800182708254e-06, "loss": 0.0198, "step": 721525 }, { "epoch": 7.09, "grad_norm": 3.5419821739196777, "learning_rate": 1.4687608607147469e-06, "loss": 0.0555, "step": 721550 }, { "epoch": 7.09, "grad_norm": 8.094511985778809, "learning_rate": 1.4686367382604985e-06, "loss": 0.0169, "step": 721575 }, { "epoch": 7.09, "grad_norm": 4.826869487762451, "learning_rate": 1.46851261580625e-06, "loss": 0.0618, "step": 721600 }, { "epoch": 7.1, "grad_norm": 0.7159218192100525, "learning_rate": 1.4683884933520016e-06, "loss": 0.0204, "step": 721625 }, { "epoch": 7.1, "grad_norm": 2.2679102420806885, "learning_rate": 1.4682643708977532e-06, "loss": 0.041, "step": 721650 }, { "epoch": 7.1, "grad_norm": 1.4666041135787964, "learning_rate": 1.4681402484435044e-06, "loss": 0.0218, "step": 721675 }, { "epoch": 7.1, "grad_norm": 3.1885464191436768, "learning_rate": 1.468016125989256e-06, "loss": 0.0457, "step": 721700 }, { "epoch": 7.1, "grad_norm": 1.1105763912200928, "learning_rate": 1.4678920035350075e-06, "loss": 0.0092, "step": 721725 }, { "epoch": 7.1, "grad_norm": 3.499495506286621, "learning_rate": 1.467767881080759e-06, "loss": 0.0573, "step": 721750 }, { "epoch": 7.1, "grad_norm": 0.4780300557613373, "learning_rate": 1.4676437586265107e-06, "loss": 0.0172, "step": 721775 }, { "epoch": 7.1, "grad_norm": 3.100200891494751, "learning_rate": 1.4675196361722622e-06, "loss": 0.0671, "step": 721800 }, { "epoch": 7.1, "grad_norm": 4.778985977172852, "learning_rate": 1.4673955137180138e-06, "loss": 0.0229, "step": 721825 }, { "epoch": 7.1, "grad_norm": 2.871216058731079, "learning_rate": 1.4672713912637654e-06, "loss": 0.0611, "step": 721850 }, { "epoch": 7.1, "grad_norm": 0.0958586260676384, "learning_rate": 1.4671472688095168e-06, "loss": 0.0231, "step": 721875 }, { "epoch": 7.1, "grad_norm": 4.026118278503418, "learning_rate": 1.4670231463552685e-06, "loss": 0.0792, "step": 721900 }, { "epoch": 7.1, "grad_norm": 10.52584457397461, "learning_rate": 1.4668990239010201e-06, "loss": 0.0115, "step": 721925 }, { "epoch": 7.1, "grad_norm": 4.9358673095703125, "learning_rate": 1.4667749014467713e-06, "loss": 0.0515, "step": 721950 }, { "epoch": 7.1, "grad_norm": 4.505100250244141, "learning_rate": 1.466650778992523e-06, "loss": 0.0166, "step": 721975 }, { "epoch": 7.1, "grad_norm": 2.3862438201904297, "learning_rate": 1.4665266565382746e-06, "loss": 0.0492, "step": 722000 }, { "epoch": 7.1, "grad_norm": 2.7873377799987793, "learning_rate": 1.466402534084026e-06, "loss": 0.016, "step": 722025 }, { "epoch": 7.1, "grad_norm": 5.245197296142578, "learning_rate": 1.4662784116297777e-06, "loss": 0.0432, "step": 722050 }, { "epoch": 7.1, "grad_norm": 5.909761428833008, "learning_rate": 1.4661542891755293e-06, "loss": 0.0236, "step": 722075 }, { "epoch": 7.1, "grad_norm": 2.8982107639312744, "learning_rate": 1.4660301667212807e-06, "loss": 0.0514, "step": 722100 }, { "epoch": 7.1, "grad_norm": 0.4876755177974701, "learning_rate": 1.4659060442670323e-06, "loss": 0.0209, "step": 722125 }, { "epoch": 7.1, "grad_norm": 3.5951154232025146, "learning_rate": 1.4657819218127836e-06, "loss": 0.0511, "step": 722150 }, { "epoch": 7.1, "grad_norm": 1.679548740386963, "learning_rate": 1.4656577993585352e-06, "loss": 0.0189, "step": 722175 }, { "epoch": 7.1, "grad_norm": 2.889719247817993, "learning_rate": 1.4655336769042868e-06, "loss": 0.0638, "step": 722200 }, { "epoch": 7.1, "grad_norm": 0.8986557126045227, "learning_rate": 1.4654095544500383e-06, "loss": 0.0272, "step": 722225 }, { "epoch": 7.1, "grad_norm": 3.6062800884246826, "learning_rate": 1.4652854319957899e-06, "loss": 0.0673, "step": 722250 }, { "epoch": 7.1, "grad_norm": 0.07955188304185867, "learning_rate": 1.4651613095415415e-06, "loss": 0.0206, "step": 722275 }, { "epoch": 7.1, "grad_norm": 3.2968413829803467, "learning_rate": 1.465037187087293e-06, "loss": 0.0571, "step": 722300 }, { "epoch": 7.1, "grad_norm": 0.39743953943252563, "learning_rate": 1.4649130646330446e-06, "loss": 0.0123, "step": 722325 }, { "epoch": 7.1, "grad_norm": 2.4389615058898926, "learning_rate": 1.4647889421787962e-06, "loss": 0.0435, "step": 722350 }, { "epoch": 7.1, "grad_norm": 17.66661834716797, "learning_rate": 1.4646648197245474e-06, "loss": 0.0174, "step": 722375 }, { "epoch": 7.1, "grad_norm": 3.5094070434570312, "learning_rate": 1.464540697270299e-06, "loss": 0.0554, "step": 722400 }, { "epoch": 7.1, "grad_norm": 2.884373903274536, "learning_rate": 1.4644165748160507e-06, "loss": 0.0175, "step": 722425 }, { "epoch": 7.1, "grad_norm": 2.9717001914978027, "learning_rate": 1.4642924523618021e-06, "loss": 0.054, "step": 722450 }, { "epoch": 7.1, "grad_norm": 5.350360870361328, "learning_rate": 1.4641683299075538e-06, "loss": 0.0286, "step": 722475 }, { "epoch": 7.1, "grad_norm": 4.016016960144043, "learning_rate": 1.4640442074533054e-06, "loss": 0.0496, "step": 722500 }, { "epoch": 7.1, "grad_norm": 1.674761414527893, "learning_rate": 1.4639200849990568e-06, "loss": 0.015, "step": 722525 }, { "epoch": 7.1, "grad_norm": 4.761775493621826, "learning_rate": 1.4637959625448084e-06, "loss": 0.0658, "step": 722550 }, { "epoch": 7.1, "grad_norm": 1.8562626838684082, "learning_rate": 1.4636718400905597e-06, "loss": 0.0166, "step": 722575 }, { "epoch": 7.1, "grad_norm": 5.32219123840332, "learning_rate": 1.4635477176363113e-06, "loss": 0.0748, "step": 722600 }, { "epoch": 7.1, "grad_norm": 1.15402090549469, "learning_rate": 1.463423595182063e-06, "loss": 0.0192, "step": 722625 }, { "epoch": 7.11, "grad_norm": 4.188237190246582, "learning_rate": 1.4632994727278144e-06, "loss": 0.0298, "step": 722650 }, { "epoch": 7.11, "grad_norm": 0.02213997207581997, "learning_rate": 1.463175350273566e-06, "loss": 0.0238, "step": 722675 }, { "epoch": 7.11, "grad_norm": 3.4237000942230225, "learning_rate": 1.4630512278193176e-06, "loss": 0.0539, "step": 722700 }, { "epoch": 7.11, "grad_norm": 4.824796676635742, "learning_rate": 1.462927105365069e-06, "loss": 0.021, "step": 722725 }, { "epoch": 7.11, "grad_norm": 2.859994649887085, "learning_rate": 1.4628029829108207e-06, "loss": 0.071, "step": 722750 }, { "epoch": 7.11, "grad_norm": 0.029304172843694687, "learning_rate": 1.4626788604565723e-06, "loss": 0.0148, "step": 722775 }, { "epoch": 7.11, "grad_norm": 3.098906993865967, "learning_rate": 1.4625547380023235e-06, "loss": 0.0748, "step": 722800 }, { "epoch": 7.11, "grad_norm": 0.8859007954597473, "learning_rate": 1.4624306155480752e-06, "loss": 0.0148, "step": 722825 }, { "epoch": 7.11, "grad_norm": 2.7921109199523926, "learning_rate": 1.4623064930938268e-06, "loss": 0.0483, "step": 722850 }, { "epoch": 7.11, "grad_norm": 2.166147232055664, "learning_rate": 1.4621823706395782e-06, "loss": 0.0135, "step": 722875 }, { "epoch": 7.11, "grad_norm": 3.4373486042022705, "learning_rate": 1.4620582481853299e-06, "loss": 0.0406, "step": 722900 }, { "epoch": 7.11, "grad_norm": 2.150148868560791, "learning_rate": 1.4619341257310815e-06, "loss": 0.0289, "step": 722925 }, { "epoch": 7.11, "grad_norm": 3.9791550636291504, "learning_rate": 1.461810003276833e-06, "loss": 0.0845, "step": 722950 }, { "epoch": 7.11, "grad_norm": 4.0015153884887695, "learning_rate": 1.4616858808225846e-06, "loss": 0.0148, "step": 722975 }, { "epoch": 7.11, "grad_norm": 3.7336888313293457, "learning_rate": 1.4615617583683358e-06, "loss": 0.0454, "step": 723000 }, { "epoch": 7.11, "grad_norm": 7.81728982925415, "learning_rate": 1.4614376359140874e-06, "loss": 0.016, "step": 723025 }, { "epoch": 7.11, "grad_norm": 1.1893689632415771, "learning_rate": 1.461313513459839e-06, "loss": 0.0576, "step": 723050 }, { "epoch": 7.11, "grad_norm": 0.3034920394420624, "learning_rate": 1.4611893910055905e-06, "loss": 0.0229, "step": 723075 }, { "epoch": 7.11, "grad_norm": 4.768486499786377, "learning_rate": 1.461065268551342e-06, "loss": 0.0536, "step": 723100 }, { "epoch": 7.11, "grad_norm": 2.6167359352111816, "learning_rate": 1.4609411460970937e-06, "loss": 0.019, "step": 723125 }, { "epoch": 7.11, "grad_norm": 2.296593189239502, "learning_rate": 1.4608170236428451e-06, "loss": 0.0493, "step": 723150 }, { "epoch": 7.11, "grad_norm": 0.4434162676334381, "learning_rate": 1.4606929011885968e-06, "loss": 0.0225, "step": 723175 }, { "epoch": 7.11, "grad_norm": 2.289529800415039, "learning_rate": 1.4605687787343484e-06, "loss": 0.074, "step": 723200 }, { "epoch": 7.11, "grad_norm": 1.8796653747558594, "learning_rate": 1.4604446562800998e-06, "loss": 0.0225, "step": 723225 }, { "epoch": 7.11, "grad_norm": 3.110999584197998, "learning_rate": 1.4603205338258515e-06, "loss": 0.0408, "step": 723250 }, { "epoch": 7.11, "grad_norm": 0.011780127882957458, "learning_rate": 1.4601964113716031e-06, "loss": 0.0209, "step": 723275 }, { "epoch": 7.11, "grad_norm": 3.2103493213653564, "learning_rate": 1.4600722889173543e-06, "loss": 0.0429, "step": 723300 }, { "epoch": 7.11, "grad_norm": 0.2730928361415863, "learning_rate": 1.459948166463106e-06, "loss": 0.0216, "step": 723325 }, { "epoch": 7.11, "grad_norm": 4.25446081161499, "learning_rate": 1.4598240440088576e-06, "loss": 0.0517, "step": 723350 }, { "epoch": 7.11, "grad_norm": 3.332141876220703, "learning_rate": 1.459699921554609e-06, "loss": 0.0125, "step": 723375 }, { "epoch": 7.11, "grad_norm": 2.1803247928619385, "learning_rate": 1.4595757991003607e-06, "loss": 0.0434, "step": 723400 }, { "epoch": 7.11, "grad_norm": 0.2314712107181549, "learning_rate": 1.459451676646112e-06, "loss": 0.0219, "step": 723425 }, { "epoch": 7.11, "grad_norm": 3.3541202545166016, "learning_rate": 1.4593275541918637e-06, "loss": 0.0653, "step": 723450 }, { "epoch": 7.11, "grad_norm": 11.26593017578125, "learning_rate": 1.4592034317376153e-06, "loss": 0.0141, "step": 723475 }, { "epoch": 7.11, "grad_norm": 10.729303359985352, "learning_rate": 1.4590793092833666e-06, "loss": 0.0678, "step": 723500 }, { "epoch": 7.11, "grad_norm": 0.13017557561397552, "learning_rate": 1.4589551868291182e-06, "loss": 0.0194, "step": 723525 }, { "epoch": 7.11, "grad_norm": 2.5938544273376465, "learning_rate": 1.4588310643748698e-06, "loss": 0.0777, "step": 723550 }, { "epoch": 7.11, "grad_norm": 3.214280128479004, "learning_rate": 1.4587069419206212e-06, "loss": 0.0224, "step": 723575 }, { "epoch": 7.11, "grad_norm": 1.416918396949768, "learning_rate": 1.4585828194663729e-06, "loss": 0.0217, "step": 723600 }, { "epoch": 7.11, "grad_norm": 5.414993762969971, "learning_rate": 1.4584586970121245e-06, "loss": 0.0169, "step": 723625 }, { "epoch": 7.12, "grad_norm": 6.016355514526367, "learning_rate": 1.458334574557876e-06, "loss": 0.0512, "step": 723650 }, { "epoch": 7.12, "grad_norm": 3.9599456787109375, "learning_rate": 1.4582104521036276e-06, "loss": 0.0181, "step": 723675 }, { "epoch": 7.12, "grad_norm": 5.867697238922119, "learning_rate": 1.4580863296493792e-06, "loss": 0.0548, "step": 723700 }, { "epoch": 7.12, "grad_norm": 0.03565604239702225, "learning_rate": 1.4579622071951304e-06, "loss": 0.0206, "step": 723725 }, { "epoch": 7.12, "grad_norm": 3.000725269317627, "learning_rate": 1.457838084740882e-06, "loss": 0.0435, "step": 723750 }, { "epoch": 7.12, "grad_norm": 3.8785006999969482, "learning_rate": 1.4577139622866337e-06, "loss": 0.0142, "step": 723775 }, { "epoch": 7.12, "grad_norm": 2.8953142166137695, "learning_rate": 1.4575898398323851e-06, "loss": 0.069, "step": 723800 }, { "epoch": 7.12, "grad_norm": 29.349918365478516, "learning_rate": 1.4574657173781368e-06, "loss": 0.029, "step": 723825 }, { "epoch": 7.12, "grad_norm": 3.021604299545288, "learning_rate": 1.4573415949238882e-06, "loss": 0.0601, "step": 723850 }, { "epoch": 7.12, "grad_norm": 3.8406896591186523, "learning_rate": 1.4572174724696398e-06, "loss": 0.0213, "step": 723875 }, { "epoch": 7.12, "grad_norm": 2.982497453689575, "learning_rate": 1.4570933500153914e-06, "loss": 0.0577, "step": 723900 }, { "epoch": 7.12, "grad_norm": 12.220823287963867, "learning_rate": 1.4569692275611427e-06, "loss": 0.014, "step": 723925 }, { "epoch": 7.12, "grad_norm": 3.9935503005981445, "learning_rate": 1.4568451051068943e-06, "loss": 0.0573, "step": 723950 }, { "epoch": 7.12, "grad_norm": 13.151780128479004, "learning_rate": 1.456720982652646e-06, "loss": 0.0193, "step": 723975 }, { "epoch": 7.12, "grad_norm": 4.1876397132873535, "learning_rate": 1.4565968601983973e-06, "loss": 0.0652, "step": 724000 }, { "epoch": 7.12, "grad_norm": 0.12022958695888519, "learning_rate": 1.456472737744149e-06, "loss": 0.0166, "step": 724025 }, { "epoch": 7.12, "grad_norm": 2.798011064529419, "learning_rate": 1.4563486152899006e-06, "loss": 0.0547, "step": 724050 }, { "epoch": 7.12, "grad_norm": 5.190188884735107, "learning_rate": 1.456224492835652e-06, "loss": 0.0185, "step": 724075 }, { "epoch": 7.12, "grad_norm": 4.832051753997803, "learning_rate": 1.4561003703814037e-06, "loss": 0.0237, "step": 724100 }, { "epoch": 7.12, "grad_norm": 1.520857334136963, "learning_rate": 1.4559762479271553e-06, "loss": 0.0209, "step": 724125 }, { "epoch": 7.12, "grad_norm": 2.199357748031616, "learning_rate": 1.4558521254729065e-06, "loss": 0.0594, "step": 724150 }, { "epoch": 7.12, "grad_norm": 0.3041627109050751, "learning_rate": 1.4557280030186582e-06, "loss": 0.0152, "step": 724175 }, { "epoch": 7.12, "grad_norm": 3.4422688484191895, "learning_rate": 1.4556038805644098e-06, "loss": 0.0546, "step": 724200 }, { "epoch": 7.12, "grad_norm": 1.0025306940078735, "learning_rate": 1.4554797581101612e-06, "loss": 0.0264, "step": 724225 }, { "epoch": 7.12, "grad_norm": 1.962428331375122, "learning_rate": 1.4553556356559129e-06, "loss": 0.06, "step": 724250 }, { "epoch": 7.12, "grad_norm": 1.8835960626602173, "learning_rate": 1.4552315132016643e-06, "loss": 0.0135, "step": 724275 }, { "epoch": 7.12, "grad_norm": 2.8072144985198975, "learning_rate": 1.455107390747416e-06, "loss": 0.0683, "step": 724300 }, { "epoch": 7.12, "grad_norm": 0.038487646728754044, "learning_rate": 1.4549832682931675e-06, "loss": 0.0208, "step": 724325 }, { "epoch": 7.12, "grad_norm": 3.6801695823669434, "learning_rate": 1.4548591458389188e-06, "loss": 0.0763, "step": 724350 }, { "epoch": 7.12, "grad_norm": 7.743415832519531, "learning_rate": 1.4547350233846704e-06, "loss": 0.0131, "step": 724375 }, { "epoch": 7.12, "grad_norm": 3.3548598289489746, "learning_rate": 1.454610900930422e-06, "loss": 0.0669, "step": 724400 }, { "epoch": 7.12, "grad_norm": 0.2784586250782013, "learning_rate": 1.4544867784761734e-06, "loss": 0.0161, "step": 724425 }, { "epoch": 7.12, "grad_norm": 1.8262033462524414, "learning_rate": 1.454362656021925e-06, "loss": 0.0571, "step": 724450 }, { "epoch": 7.12, "grad_norm": 0.28492945432662964, "learning_rate": 1.4542385335676767e-06, "loss": 0.0207, "step": 724475 }, { "epoch": 7.12, "grad_norm": 4.808202266693115, "learning_rate": 1.4541144111134281e-06, "loss": 0.0589, "step": 724500 }, { "epoch": 7.12, "grad_norm": 0.26462846994400024, "learning_rate": 1.4539902886591798e-06, "loss": 0.03, "step": 724525 }, { "epoch": 7.12, "grad_norm": 3.8278257846832275, "learning_rate": 1.4538711311031012e-06, "loss": 0.0578, "step": 724550 }, { "epoch": 7.12, "grad_norm": 2.420966863632202, "learning_rate": 1.4537470086488529e-06, "loss": 0.0192, "step": 724575 }, { "epoch": 7.12, "grad_norm": 3.1401283740997314, "learning_rate": 1.453622886194604e-06, "loss": 0.0504, "step": 724600 }, { "epoch": 7.12, "grad_norm": 12.130727767944336, "learning_rate": 1.4534987637403557e-06, "loss": 0.0291, "step": 724625 }, { "epoch": 7.12, "grad_norm": 3.419600009918213, "learning_rate": 1.4533746412861073e-06, "loss": 0.0566, "step": 724650 }, { "epoch": 7.13, "grad_norm": 5.422631740570068, "learning_rate": 1.4532505188318588e-06, "loss": 0.0141, "step": 724675 }, { "epoch": 7.13, "grad_norm": 3.0517079830169678, "learning_rate": 1.4531263963776104e-06, "loss": 0.0628, "step": 724700 }, { "epoch": 7.13, "grad_norm": 0.7825567126274109, "learning_rate": 1.453002273923362e-06, "loss": 0.0358, "step": 724725 }, { "epoch": 7.13, "grad_norm": 2.2916929721832275, "learning_rate": 1.4528781514691135e-06, "loss": 0.0511, "step": 724750 }, { "epoch": 7.13, "grad_norm": 0.039341043680906296, "learning_rate": 1.452754029014865e-06, "loss": 0.016, "step": 724775 }, { "epoch": 7.13, "grad_norm": 16.07709312438965, "learning_rate": 1.4526299065606167e-06, "loss": 0.0521, "step": 724800 }, { "epoch": 7.13, "grad_norm": 6.417664051055908, "learning_rate": 1.4525057841063682e-06, "loss": 0.019, "step": 724825 }, { "epoch": 7.13, "grad_norm": 3.2076568603515625, "learning_rate": 1.4523816616521198e-06, "loss": 0.0494, "step": 724850 }, { "epoch": 7.13, "grad_norm": 2.401256561279297, "learning_rate": 1.452257539197871e-06, "loss": 0.021, "step": 724875 }, { "epoch": 7.13, "grad_norm": 3.2561521530151367, "learning_rate": 1.4521334167436226e-06, "loss": 0.0644, "step": 724900 }, { "epoch": 7.13, "grad_norm": 6.840097904205322, "learning_rate": 1.4520092942893743e-06, "loss": 0.0214, "step": 724925 }, { "epoch": 7.13, "grad_norm": 2.276860237121582, "learning_rate": 1.4518851718351257e-06, "loss": 0.0555, "step": 724950 }, { "epoch": 7.13, "grad_norm": 4.89280366897583, "learning_rate": 1.4517610493808773e-06, "loss": 0.016, "step": 724975 }, { "epoch": 7.13, "grad_norm": 8.896377563476562, "learning_rate": 1.451636926926629e-06, "loss": 0.0555, "step": 725000 }, { "epoch": 7.13, "grad_norm": 0.6138449907302856, "learning_rate": 1.4515128044723804e-06, "loss": 0.0188, "step": 725025 }, { "epoch": 7.13, "grad_norm": 2.7392241954803467, "learning_rate": 1.451388682018132e-06, "loss": 0.0505, "step": 725050 }, { "epoch": 7.13, "grad_norm": 0.6535317301750183, "learning_rate": 1.4512645595638837e-06, "loss": 0.0121, "step": 725075 }, { "epoch": 7.13, "grad_norm": 1.6882420778274536, "learning_rate": 1.4511404371096349e-06, "loss": 0.0557, "step": 725100 }, { "epoch": 7.13, "grad_norm": 0.5457779169082642, "learning_rate": 1.4510163146553865e-06, "loss": 0.0195, "step": 725125 }, { "epoch": 7.13, "grad_norm": 3.100637197494507, "learning_rate": 1.4508921922011381e-06, "loss": 0.0589, "step": 725150 }, { "epoch": 7.13, "grad_norm": 7.646435737609863, "learning_rate": 1.4507680697468896e-06, "loss": 0.0206, "step": 725175 }, { "epoch": 7.13, "grad_norm": 3.7964911460876465, "learning_rate": 1.4506439472926412e-06, "loss": 0.0631, "step": 725200 }, { "epoch": 7.13, "grad_norm": 4.14056921005249, "learning_rate": 1.4505198248383928e-06, "loss": 0.0125, "step": 725225 }, { "epoch": 7.13, "grad_norm": 2.560661554336548, "learning_rate": 1.4503957023841443e-06, "loss": 0.0512, "step": 725250 }, { "epoch": 7.13, "grad_norm": 1.296942949295044, "learning_rate": 1.4502715799298959e-06, "loss": 0.0124, "step": 725275 }, { "epoch": 7.13, "grad_norm": 3.350414752960205, "learning_rate": 1.450147457475647e-06, "loss": 0.0517, "step": 725300 }, { "epoch": 7.13, "grad_norm": 5.700858116149902, "learning_rate": 1.4500233350213987e-06, "loss": 0.0108, "step": 725325 }, { "epoch": 7.13, "grad_norm": 3.355280876159668, "learning_rate": 1.4498992125671504e-06, "loss": 0.0661, "step": 725350 }, { "epoch": 7.13, "grad_norm": 1.8520907163619995, "learning_rate": 1.4497750901129018e-06, "loss": 0.0264, "step": 725375 }, { "epoch": 7.13, "grad_norm": 2.248063802719116, "learning_rate": 1.4496509676586534e-06, "loss": 0.0405, "step": 725400 }, { "epoch": 7.13, "grad_norm": 7.537069797515869, "learning_rate": 1.449526845204405e-06, "loss": 0.0167, "step": 725425 }, { "epoch": 7.13, "grad_norm": 3.6516201496124268, "learning_rate": 1.4494027227501565e-06, "loss": 0.0668, "step": 725450 }, { "epoch": 7.13, "grad_norm": 9.550368309020996, "learning_rate": 1.4492786002959081e-06, "loss": 0.0197, "step": 725475 }, { "epoch": 7.13, "grad_norm": 2.1088695526123047, "learning_rate": 1.4491544778416598e-06, "loss": 0.0424, "step": 725500 }, { "epoch": 7.13, "grad_norm": 1.8747961521148682, "learning_rate": 1.449030355387411e-06, "loss": 0.0208, "step": 725525 }, { "epoch": 7.13, "grad_norm": 1.976465106010437, "learning_rate": 1.4489062329331626e-06, "loss": 0.0456, "step": 725550 }, { "epoch": 7.13, "grad_norm": 8.72304916381836, "learning_rate": 1.4487821104789142e-06, "loss": 0.0186, "step": 725575 }, { "epoch": 7.13, "grad_norm": 5.689408302307129, "learning_rate": 1.4486579880246657e-06, "loss": 0.0476, "step": 725600 }, { "epoch": 7.13, "grad_norm": 4.216984272003174, "learning_rate": 1.4485338655704173e-06, "loss": 0.0184, "step": 725625 }, { "epoch": 7.13, "grad_norm": 1.3309264183044434, "learning_rate": 1.448409743116169e-06, "loss": 0.0553, "step": 725650 }, { "epoch": 7.13, "grad_norm": 9.931549072265625, "learning_rate": 1.4482856206619204e-06, "loss": 0.016, "step": 725675 }, { "epoch": 7.14, "grad_norm": 3.5087485313415527, "learning_rate": 1.448161498207672e-06, "loss": 0.0508, "step": 725700 }, { "epoch": 7.14, "grad_norm": 0.670799970626831, "learning_rate": 1.4480373757534232e-06, "loss": 0.0189, "step": 725725 }, { "epoch": 7.14, "grad_norm": 2.1836934089660645, "learning_rate": 1.4479132532991748e-06, "loss": 0.0404, "step": 725750 }, { "epoch": 7.14, "grad_norm": 10.007757186889648, "learning_rate": 1.4477891308449265e-06, "loss": 0.0204, "step": 725775 }, { "epoch": 7.14, "grad_norm": 4.367403507232666, "learning_rate": 1.447665008390678e-06, "loss": 0.0565, "step": 725800 }, { "epoch": 7.14, "grad_norm": 12.113993644714355, "learning_rate": 1.4475408859364295e-06, "loss": 0.0174, "step": 725825 }, { "epoch": 7.14, "grad_norm": 2.9435784816741943, "learning_rate": 1.4474167634821812e-06, "loss": 0.0456, "step": 725850 }, { "epoch": 7.14, "grad_norm": 7.580270290374756, "learning_rate": 1.4472926410279326e-06, "loss": 0.0131, "step": 725875 }, { "epoch": 7.14, "grad_norm": 3.1763880252838135, "learning_rate": 1.4471685185736842e-06, "loss": 0.0659, "step": 725900 }, { "epoch": 7.14, "grad_norm": 6.003979682922363, "learning_rate": 1.4470443961194359e-06, "loss": 0.028, "step": 725925 }, { "epoch": 7.14, "grad_norm": 4.401321887969971, "learning_rate": 1.446920273665187e-06, "loss": 0.0483, "step": 725950 }, { "epoch": 7.14, "grad_norm": 2.030371904373169, "learning_rate": 1.4467961512109387e-06, "loss": 0.0159, "step": 725975 }, { "epoch": 7.14, "grad_norm": 1.5795776844024658, "learning_rate": 1.4466720287566903e-06, "loss": 0.0612, "step": 726000 }, { "epoch": 7.14, "grad_norm": 0.1580403447151184, "learning_rate": 1.4465479063024418e-06, "loss": 0.0224, "step": 726025 }, { "epoch": 7.14, "grad_norm": 1.3091926574707031, "learning_rate": 1.4464237838481934e-06, "loss": 0.04, "step": 726050 }, { "epoch": 7.14, "grad_norm": 0.1757490038871765, "learning_rate": 1.446299661393945e-06, "loss": 0.0177, "step": 726075 }, { "epoch": 7.14, "grad_norm": 1.3114292621612549, "learning_rate": 1.4461755389396965e-06, "loss": 0.0422, "step": 726100 }, { "epoch": 7.14, "grad_norm": 1.1566579341888428, "learning_rate": 1.446051416485448e-06, "loss": 0.0199, "step": 726125 }, { "epoch": 7.14, "grad_norm": 3.9392459392547607, "learning_rate": 1.4459272940311995e-06, "loss": 0.0411, "step": 726150 }, { "epoch": 7.14, "grad_norm": 0.41609108448028564, "learning_rate": 1.4458031715769511e-06, "loss": 0.0123, "step": 726175 }, { "epoch": 7.14, "grad_norm": 5.01816463470459, "learning_rate": 1.4456790491227028e-06, "loss": 0.054, "step": 726200 }, { "epoch": 7.14, "grad_norm": 2.5937392711639404, "learning_rate": 1.445554926668454e-06, "loss": 0.0201, "step": 726225 }, { "epoch": 7.14, "grad_norm": 4.010051727294922, "learning_rate": 1.4454308042142056e-06, "loss": 0.0602, "step": 726250 }, { "epoch": 7.14, "grad_norm": 16.291790008544922, "learning_rate": 1.4453066817599573e-06, "loss": 0.0229, "step": 726275 }, { "epoch": 7.14, "grad_norm": 3.1617074012756348, "learning_rate": 1.4451825593057087e-06, "loss": 0.0497, "step": 726300 }, { "epoch": 7.14, "grad_norm": 1.3498096466064453, "learning_rate": 1.4450584368514603e-06, "loss": 0.014, "step": 726325 }, { "epoch": 7.14, "grad_norm": 3.0340516567230225, "learning_rate": 1.444934314397212e-06, "loss": 0.0695, "step": 726350 }, { "epoch": 7.14, "grad_norm": 4.911664962768555, "learning_rate": 1.4448101919429634e-06, "loss": 0.0257, "step": 726375 }, { "epoch": 7.14, "grad_norm": 2.1938369274139404, "learning_rate": 1.444686069488715e-06, "loss": 0.0462, "step": 726400 }, { "epoch": 7.14, "grad_norm": 0.16704508662223816, "learning_rate": 1.4445619470344667e-06, "loss": 0.0136, "step": 726425 }, { "epoch": 7.14, "grad_norm": 5.457915306091309, "learning_rate": 1.4444378245802179e-06, "loss": 0.0776, "step": 726450 }, { "epoch": 7.14, "grad_norm": 5.923571586608887, "learning_rate": 1.4443137021259695e-06, "loss": 0.0213, "step": 726475 }, { "epoch": 7.14, "grad_norm": 2.1248514652252197, "learning_rate": 1.4441895796717211e-06, "loss": 0.0577, "step": 726500 }, { "epoch": 7.14, "grad_norm": 5.89489221572876, "learning_rate": 1.4440654572174726e-06, "loss": 0.0167, "step": 726525 }, { "epoch": 7.14, "grad_norm": 1.6854732036590576, "learning_rate": 1.4439413347632242e-06, "loss": 0.0663, "step": 726550 }, { "epoch": 7.14, "grad_norm": 2.303205966949463, "learning_rate": 1.4438172123089756e-06, "loss": 0.0204, "step": 726575 }, { "epoch": 7.14, "grad_norm": 3.424013137817383, "learning_rate": 1.4436930898547272e-06, "loss": 0.0635, "step": 726600 }, { "epoch": 7.14, "grad_norm": 14.562491416931152, "learning_rate": 1.4435689674004789e-06, "loss": 0.0159, "step": 726625 }, { "epoch": 7.14, "grad_norm": 2.4178242683410645, "learning_rate": 1.44344484494623e-06, "loss": 0.0453, "step": 726650 }, { "epoch": 7.14, "grad_norm": 0.6584643125534058, "learning_rate": 1.4433207224919817e-06, "loss": 0.0078, "step": 726675 }, { "epoch": 7.15, "grad_norm": 3.428356647491455, "learning_rate": 1.4431966000377334e-06, "loss": 0.047, "step": 726700 }, { "epoch": 7.15, "grad_norm": 0.25273555517196655, "learning_rate": 1.4430724775834848e-06, "loss": 0.0238, "step": 726725 }, { "epoch": 7.15, "grad_norm": 2.993816375732422, "learning_rate": 1.4429483551292364e-06, "loss": 0.0458, "step": 726750 }, { "epoch": 7.15, "grad_norm": 3.8734395503997803, "learning_rate": 1.442824232674988e-06, "loss": 0.0311, "step": 726775 }, { "epoch": 7.15, "grad_norm": 2.4326369762420654, "learning_rate": 1.4427001102207395e-06, "loss": 0.0819, "step": 726800 }, { "epoch": 7.15, "grad_norm": 3.9903674125671387, "learning_rate": 1.4425759877664911e-06, "loss": 0.0166, "step": 726825 }, { "epoch": 7.15, "grad_norm": 2.1435506343841553, "learning_rate": 1.4424518653122428e-06, "loss": 0.0453, "step": 726850 }, { "epoch": 7.15, "grad_norm": 2.287968397140503, "learning_rate": 1.442327742857994e-06, "loss": 0.0176, "step": 726875 }, { "epoch": 7.15, "grad_norm": 3.84566068649292, "learning_rate": 1.4422085853019154e-06, "loss": 0.06, "step": 726900 }, { "epoch": 7.15, "grad_norm": 3.140843629837036, "learning_rate": 1.442084462847667e-06, "loss": 0.022, "step": 726925 }, { "epoch": 7.15, "grad_norm": 4.29616117477417, "learning_rate": 1.4419603403934187e-06, "loss": 0.0599, "step": 726950 }, { "epoch": 7.15, "grad_norm": 1.6800775527954102, "learning_rate": 1.4418362179391701e-06, "loss": 0.0249, "step": 726975 }, { "epoch": 7.15, "grad_norm": 2.145765781402588, "learning_rate": 1.4417120954849217e-06, "loss": 0.0371, "step": 727000 }, { "epoch": 7.15, "grad_norm": 11.128994941711426, "learning_rate": 1.4415879730306734e-06, "loss": 0.0231, "step": 727025 }, { "epoch": 7.15, "grad_norm": 2.277862071990967, "learning_rate": 1.4414638505764248e-06, "loss": 0.0606, "step": 727050 }, { "epoch": 7.15, "grad_norm": 4.62733793258667, "learning_rate": 1.4413397281221764e-06, "loss": 0.0233, "step": 727075 }, { "epoch": 7.15, "grad_norm": 3.097525119781494, "learning_rate": 1.4412156056679276e-06, "loss": 0.0757, "step": 727100 }, { "epoch": 7.15, "grad_norm": 0.5867190957069397, "learning_rate": 1.4410914832136793e-06, "loss": 0.032, "step": 727125 }, { "epoch": 7.15, "grad_norm": 18.9212646484375, "learning_rate": 1.440967360759431e-06, "loss": 0.0558, "step": 727150 }, { "epoch": 7.15, "grad_norm": 6.0588765144348145, "learning_rate": 1.4408432383051823e-06, "loss": 0.0275, "step": 727175 }, { "epoch": 7.15, "grad_norm": 2.839829206466675, "learning_rate": 1.440719115850934e-06, "loss": 0.0566, "step": 727200 }, { "epoch": 7.15, "grad_norm": 3.293862819671631, "learning_rate": 1.4405949933966856e-06, "loss": 0.0193, "step": 727225 }, { "epoch": 7.15, "grad_norm": 4.5046234130859375, "learning_rate": 1.440470870942437e-06, "loss": 0.056, "step": 727250 }, { "epoch": 7.15, "grad_norm": 6.831342697143555, "learning_rate": 1.4403467484881887e-06, "loss": 0.0245, "step": 727275 }, { "epoch": 7.15, "grad_norm": 3.1912596225738525, "learning_rate": 1.4402226260339403e-06, "loss": 0.0791, "step": 727300 }, { "epoch": 7.15, "grad_norm": 2.8875083923339844, "learning_rate": 1.4400985035796915e-06, "loss": 0.0142, "step": 727325 }, { "epoch": 7.15, "grad_norm": 3.2705318927764893, "learning_rate": 1.4399743811254432e-06, "loss": 0.054, "step": 727350 }, { "epoch": 7.15, "grad_norm": 0.21151617169380188, "learning_rate": 1.4398502586711948e-06, "loss": 0.0193, "step": 727375 }, { "epoch": 7.15, "grad_norm": 4.057939529418945, "learning_rate": 1.4397261362169462e-06, "loss": 0.0414, "step": 727400 }, { "epoch": 7.15, "grad_norm": 0.9373172521591187, "learning_rate": 1.4396020137626978e-06, "loss": 0.0188, "step": 727425 }, { "epoch": 7.15, "grad_norm": 3.2563440799713135, "learning_rate": 1.4394778913084495e-06, "loss": 0.0532, "step": 727450 }, { "epoch": 7.15, "grad_norm": 1.9271461963653564, "learning_rate": 1.439353768854201e-06, "loss": 0.0322, "step": 727475 }, { "epoch": 7.15, "grad_norm": 3.838425636291504, "learning_rate": 1.4392296463999525e-06, "loss": 0.0404, "step": 727500 }, { "epoch": 7.15, "grad_norm": 3.3819711208343506, "learning_rate": 1.4391055239457037e-06, "loss": 0.0144, "step": 727525 }, { "epoch": 7.15, "grad_norm": 4.3562846183776855, "learning_rate": 1.4389814014914554e-06, "loss": 0.0643, "step": 727550 }, { "epoch": 7.15, "grad_norm": 1.1733183860778809, "learning_rate": 1.438857279037207e-06, "loss": 0.0125, "step": 727575 }, { "epoch": 7.15, "grad_norm": 3.5498743057250977, "learning_rate": 1.4387331565829584e-06, "loss": 0.0684, "step": 727600 }, { "epoch": 7.15, "grad_norm": 3.109450578689575, "learning_rate": 1.43860903412871e-06, "loss": 0.0166, "step": 727625 }, { "epoch": 7.15, "grad_norm": 2.265071392059326, "learning_rate": 1.4384849116744617e-06, "loss": 0.0657, "step": 727650 }, { "epoch": 7.15, "grad_norm": 1.2737947702407837, "learning_rate": 1.4383607892202131e-06, "loss": 0.032, "step": 727675 }, { "epoch": 7.15, "grad_norm": 2.5644619464874268, "learning_rate": 1.4382366667659648e-06, "loss": 0.0659, "step": 727700 }, { "epoch": 7.16, "grad_norm": 0.15598434209823608, "learning_rate": 1.4381125443117164e-06, "loss": 0.0135, "step": 727725 }, { "epoch": 7.16, "grad_norm": 3.2596282958984375, "learning_rate": 1.4379884218574678e-06, "loss": 0.0511, "step": 727750 }, { "epoch": 7.16, "grad_norm": 9.138978004455566, "learning_rate": 1.4378642994032195e-06, "loss": 0.0283, "step": 727775 }, { "epoch": 7.16, "grad_norm": 2.1804420948028564, "learning_rate": 1.437740176948971e-06, "loss": 0.0516, "step": 727800 }, { "epoch": 7.16, "grad_norm": 0.5385563969612122, "learning_rate": 1.4376160544947223e-06, "loss": 0.0095, "step": 727825 }, { "epoch": 7.16, "grad_norm": 6.63880729675293, "learning_rate": 1.437491932040474e-06, "loss": 0.0629, "step": 727850 }, { "epoch": 7.16, "grad_norm": 8.822632789611816, "learning_rate": 1.4373678095862256e-06, "loss": 0.0174, "step": 727875 }, { "epoch": 7.16, "grad_norm": 2.105628490447998, "learning_rate": 1.437243687131977e-06, "loss": 0.0581, "step": 727900 }, { "epoch": 7.16, "grad_norm": 7.910539627075195, "learning_rate": 1.4371195646777286e-06, "loss": 0.0139, "step": 727925 }, { "epoch": 7.16, "grad_norm": 2.704475164413452, "learning_rate": 1.4369954422234803e-06, "loss": 0.0651, "step": 727950 }, { "epoch": 7.16, "grad_norm": 2.1277270317077637, "learning_rate": 1.4368713197692317e-06, "loss": 0.0173, "step": 727975 }, { "epoch": 7.16, "grad_norm": 1.693607211112976, "learning_rate": 1.4367471973149833e-06, "loss": 0.0429, "step": 728000 }, { "epoch": 7.16, "grad_norm": 0.8848771452903748, "learning_rate": 1.4366230748607345e-06, "loss": 0.0143, "step": 728025 }, { "epoch": 7.16, "grad_norm": 3.0899927616119385, "learning_rate": 1.4364989524064862e-06, "loss": 0.0567, "step": 728050 }, { "epoch": 7.16, "grad_norm": 9.245365142822266, "learning_rate": 1.4363748299522378e-06, "loss": 0.0213, "step": 728075 }, { "epoch": 7.16, "grad_norm": 3.587691307067871, "learning_rate": 1.4362507074979892e-06, "loss": 0.0469, "step": 728100 }, { "epoch": 7.16, "grad_norm": 1.4875755310058594, "learning_rate": 1.4361265850437409e-06, "loss": 0.0196, "step": 728125 }, { "epoch": 7.16, "grad_norm": 3.1538608074188232, "learning_rate": 1.4360024625894925e-06, "loss": 0.0492, "step": 728150 }, { "epoch": 7.16, "grad_norm": 1.0513451099395752, "learning_rate": 1.435878340135244e-06, "loss": 0.0188, "step": 728175 }, { "epoch": 7.16, "grad_norm": 2.384213924407959, "learning_rate": 1.4357542176809956e-06, "loss": 0.05, "step": 728200 }, { "epoch": 7.16, "grad_norm": 0.18916726112365723, "learning_rate": 1.4356300952267472e-06, "loss": 0.0152, "step": 728225 }, { "epoch": 7.16, "grad_norm": 4.941858768463135, "learning_rate": 1.4355059727724984e-06, "loss": 0.0622, "step": 728250 }, { "epoch": 7.16, "grad_norm": 6.898728370666504, "learning_rate": 1.43538185031825e-06, "loss": 0.0293, "step": 728275 }, { "epoch": 7.16, "grad_norm": 3.5356881618499756, "learning_rate": 1.4352577278640017e-06, "loss": 0.0422, "step": 728300 }, { "epoch": 7.16, "grad_norm": 7.096157073974609, "learning_rate": 1.435133605409753e-06, "loss": 0.0137, "step": 728325 }, { "epoch": 7.16, "grad_norm": 3.7915735244750977, "learning_rate": 1.4350094829555047e-06, "loss": 0.0573, "step": 728350 }, { "epoch": 7.16, "grad_norm": 0.15575414896011353, "learning_rate": 1.4348853605012564e-06, "loss": 0.0284, "step": 728375 }, { "epoch": 7.16, "grad_norm": 5.280480861663818, "learning_rate": 1.4347612380470078e-06, "loss": 0.0256, "step": 728400 }, { "epoch": 7.16, "grad_norm": 2.750986099243164, "learning_rate": 1.4346371155927594e-06, "loss": 0.027, "step": 728425 }, { "epoch": 7.16, "grad_norm": 3.960907459259033, "learning_rate": 1.4345129931385106e-06, "loss": 0.0654, "step": 728450 }, { "epoch": 7.16, "grad_norm": 0.6120471358299255, "learning_rate": 1.4343888706842623e-06, "loss": 0.0165, "step": 728475 }, { "epoch": 7.16, "grad_norm": 3.1871798038482666, "learning_rate": 1.434264748230014e-06, "loss": 0.0595, "step": 728500 }, { "epoch": 7.16, "grad_norm": 2.019425392150879, "learning_rate": 1.4341406257757653e-06, "loss": 0.0328, "step": 728525 }, { "epoch": 7.16, "grad_norm": 4.244369983673096, "learning_rate": 1.434016503321517e-06, "loss": 0.078, "step": 728550 }, { "epoch": 7.16, "grad_norm": 4.876628398895264, "learning_rate": 1.4338923808672686e-06, "loss": 0.0219, "step": 728575 }, { "epoch": 7.16, "grad_norm": 3.480401039123535, "learning_rate": 1.43376825841302e-06, "loss": 0.049, "step": 728600 }, { "epoch": 7.16, "grad_norm": 0.41801702976226807, "learning_rate": 1.4336441359587717e-06, "loss": 0.0196, "step": 728625 }, { "epoch": 7.16, "grad_norm": 4.132795333862305, "learning_rate": 1.4335200135045233e-06, "loss": 0.0516, "step": 728650 }, { "epoch": 7.16, "grad_norm": 6.484187126159668, "learning_rate": 1.4333958910502745e-06, "loss": 0.02, "step": 728675 }, { "epoch": 7.16, "grad_norm": 3.328920841217041, "learning_rate": 1.4332717685960261e-06, "loss": 0.058, "step": 728700 }, { "epoch": 7.16, "grad_norm": 0.8372623920440674, "learning_rate": 1.4331476461417778e-06, "loss": 0.0198, "step": 728725 }, { "epoch": 7.17, "grad_norm": 2.546599864959717, "learning_rate": 1.4330235236875292e-06, "loss": 0.0485, "step": 728750 }, { "epoch": 7.17, "grad_norm": 3.0005664825439453, "learning_rate": 1.4328994012332808e-06, "loss": 0.0248, "step": 728775 }, { "epoch": 7.17, "grad_norm": 3.051908493041992, "learning_rate": 1.4327752787790325e-06, "loss": 0.0744, "step": 728800 }, { "epoch": 7.17, "grad_norm": 0.1221347227692604, "learning_rate": 1.432651156324784e-06, "loss": 0.0145, "step": 728825 }, { "epoch": 7.17, "grad_norm": 3.764230966567993, "learning_rate": 1.4325270338705355e-06, "loss": 0.0469, "step": 728850 }, { "epoch": 7.17, "grad_norm": 6.378902435302734, "learning_rate": 1.4324029114162867e-06, "loss": 0.0293, "step": 728875 }, { "epoch": 7.17, "grad_norm": 3.4085304737091064, "learning_rate": 1.4322787889620384e-06, "loss": 0.0632, "step": 728900 }, { "epoch": 7.17, "grad_norm": 2.7313082218170166, "learning_rate": 1.43215466650779e-06, "loss": 0.0213, "step": 728925 }, { "epoch": 7.17, "grad_norm": 2.412980318069458, "learning_rate": 1.4320355089517115e-06, "loss": 0.0737, "step": 728950 }, { "epoch": 7.17, "grad_norm": 4.4200758934021, "learning_rate": 1.4319113864974629e-06, "loss": 0.0126, "step": 728975 }, { "epoch": 7.17, "grad_norm": 4.020821571350098, "learning_rate": 1.4317872640432145e-06, "loss": 0.0785, "step": 729000 }, { "epoch": 7.17, "grad_norm": 0.6220018267631531, "learning_rate": 1.4316631415889662e-06, "loss": 0.0339, "step": 729025 }, { "epoch": 7.17, "grad_norm": 4.210209369659424, "learning_rate": 1.4315390191347176e-06, "loss": 0.0537, "step": 729050 }, { "epoch": 7.17, "grad_norm": 9.131753921508789, "learning_rate": 1.4314148966804692e-06, "loss": 0.0221, "step": 729075 }, { "epoch": 7.17, "grad_norm": 4.298603534698486, "learning_rate": 1.4312907742262209e-06, "loss": 0.0549, "step": 729100 }, { "epoch": 7.17, "grad_norm": 0.8066911697387695, "learning_rate": 1.4311666517719723e-06, "loss": 0.0222, "step": 729125 }, { "epoch": 7.17, "grad_norm": 2.8007943630218506, "learning_rate": 1.431042529317724e-06, "loss": 0.0574, "step": 729150 }, { "epoch": 7.17, "grad_norm": 4.701807975769043, "learning_rate": 1.4309184068634755e-06, "loss": 0.0119, "step": 729175 }, { "epoch": 7.17, "grad_norm": 1.5788027048110962, "learning_rate": 1.4307942844092268e-06, "loss": 0.0571, "step": 729200 }, { "epoch": 7.17, "grad_norm": 30.033422470092773, "learning_rate": 1.4306701619549784e-06, "loss": 0.0221, "step": 729225 }, { "epoch": 7.17, "grad_norm": 2.9549036026000977, "learning_rate": 1.43054603950073e-06, "loss": 0.0717, "step": 729250 }, { "epoch": 7.17, "grad_norm": 0.33997660875320435, "learning_rate": 1.4304219170464814e-06, "loss": 0.0183, "step": 729275 }, { "epoch": 7.17, "grad_norm": 2.5099544525146484, "learning_rate": 1.430297794592233e-06, "loss": 0.0474, "step": 729300 }, { "epoch": 7.17, "grad_norm": 5.608231067657471, "learning_rate": 1.4301736721379847e-06, "loss": 0.0143, "step": 729325 }, { "epoch": 7.17, "grad_norm": 3.414839744567871, "learning_rate": 1.4300495496837361e-06, "loss": 0.0646, "step": 729350 }, { "epoch": 7.17, "grad_norm": 0.059945493936538696, "learning_rate": 1.4299254272294878e-06, "loss": 0.0106, "step": 729375 }, { "epoch": 7.17, "grad_norm": 2.8402349948883057, "learning_rate": 1.429801304775239e-06, "loss": 0.0646, "step": 729400 }, { "epoch": 7.17, "grad_norm": 6.869565963745117, "learning_rate": 1.4296771823209906e-06, "loss": 0.0152, "step": 729425 }, { "epoch": 7.17, "grad_norm": 2.716289520263672, "learning_rate": 1.4295530598667423e-06, "loss": 0.0519, "step": 729450 }, { "epoch": 7.17, "grad_norm": 0.22817391157150269, "learning_rate": 1.4294289374124937e-06, "loss": 0.0193, "step": 729475 }, { "epoch": 7.17, "grad_norm": 3.625619888305664, "learning_rate": 1.4293048149582453e-06, "loss": 0.0675, "step": 729500 }, { "epoch": 7.17, "grad_norm": 0.008327895775437355, "learning_rate": 1.429180692503997e-06, "loss": 0.0144, "step": 729525 }, { "epoch": 7.17, "grad_norm": 1.2428779602050781, "learning_rate": 1.4290565700497484e-06, "loss": 0.0703, "step": 729550 }, { "epoch": 7.17, "grad_norm": 6.372961044311523, "learning_rate": 1.4289324475955e-06, "loss": 0.0167, "step": 729575 }, { "epoch": 7.17, "grad_norm": 1.5954798460006714, "learning_rate": 1.4288083251412516e-06, "loss": 0.0829, "step": 729600 }, { "epoch": 7.17, "grad_norm": 0.5914563536643982, "learning_rate": 1.4286842026870029e-06, "loss": 0.0244, "step": 729625 }, { "epoch": 7.17, "grad_norm": 2.495436906814575, "learning_rate": 1.4285600802327545e-06, "loss": 0.0484, "step": 729650 }, { "epoch": 7.17, "grad_norm": 8.331809043884277, "learning_rate": 1.4284359577785061e-06, "loss": 0.0246, "step": 729675 }, { "epoch": 7.17, "grad_norm": 3.891359806060791, "learning_rate": 1.4283118353242575e-06, "loss": 0.0624, "step": 729700 }, { "epoch": 7.17, "grad_norm": 3.1405599117279053, "learning_rate": 1.4281877128700092e-06, "loss": 0.0191, "step": 729725 }, { "epoch": 7.18, "grad_norm": 4.069431781768799, "learning_rate": 1.4280635904157608e-06, "loss": 0.0525, "step": 729750 }, { "epoch": 7.18, "grad_norm": 0.410293310880661, "learning_rate": 1.4279394679615122e-06, "loss": 0.0188, "step": 729775 }, { "epoch": 7.18, "grad_norm": 2.6737349033355713, "learning_rate": 1.4278153455072639e-06, "loss": 0.0439, "step": 729800 }, { "epoch": 7.18, "grad_norm": 1.5274102687835693, "learning_rate": 1.427691223053015e-06, "loss": 0.0125, "step": 729825 }, { "epoch": 7.18, "grad_norm": 2.908729076385498, "learning_rate": 1.4275671005987667e-06, "loss": 0.0703, "step": 729850 }, { "epoch": 7.18, "grad_norm": 4.222856044769287, "learning_rate": 1.4274429781445184e-06, "loss": 0.0246, "step": 729875 }, { "epoch": 7.18, "grad_norm": 4.242889404296875, "learning_rate": 1.4273188556902698e-06, "loss": 0.048, "step": 729900 }, { "epoch": 7.18, "grad_norm": 0.39000919461250305, "learning_rate": 1.4271947332360214e-06, "loss": 0.0195, "step": 729925 }, { "epoch": 7.18, "grad_norm": 2.313657760620117, "learning_rate": 1.427070610781773e-06, "loss": 0.0589, "step": 729950 }, { "epoch": 7.18, "grad_norm": 0.06062188372015953, "learning_rate": 1.4269464883275245e-06, "loss": 0.0071, "step": 729975 }, { "epoch": 7.18, "grad_norm": 3.167825222015381, "learning_rate": 1.4268223658732761e-06, "loss": 0.055, "step": 730000 }, { "epoch": 7.18, "grad_norm": 0.2764955759048462, "learning_rate": 1.4266982434190277e-06, "loss": 0.0193, "step": 730025 }, { "epoch": 7.18, "grad_norm": 6.391854763031006, "learning_rate": 1.426574120964779e-06, "loss": 0.0589, "step": 730050 }, { "epoch": 7.18, "grad_norm": 6.102078914642334, "learning_rate": 1.4264499985105306e-06, "loss": 0.0197, "step": 730075 }, { "epoch": 7.18, "grad_norm": 3.036216974258423, "learning_rate": 1.4263258760562822e-06, "loss": 0.0651, "step": 730100 }, { "epoch": 7.18, "grad_norm": 8.067230224609375, "learning_rate": 1.4262017536020336e-06, "loss": 0.0197, "step": 730125 }, { "epoch": 7.18, "grad_norm": 2.1745078563690186, "learning_rate": 1.4260776311477853e-06, "loss": 0.0677, "step": 730150 }, { "epoch": 7.18, "grad_norm": 0.41006648540496826, "learning_rate": 1.425953508693537e-06, "loss": 0.0167, "step": 730175 }, { "epoch": 7.18, "grad_norm": 3.3772034645080566, "learning_rate": 1.4258293862392883e-06, "loss": 0.0711, "step": 730200 }, { "epoch": 7.18, "grad_norm": 10.210814476013184, "learning_rate": 1.42570526378504e-06, "loss": 0.021, "step": 730225 }, { "epoch": 7.18, "grad_norm": 2.868072748184204, "learning_rate": 1.4255811413307912e-06, "loss": 0.0629, "step": 730250 }, { "epoch": 7.18, "grad_norm": 1.462073802947998, "learning_rate": 1.4254570188765428e-06, "loss": 0.0146, "step": 730275 }, { "epoch": 7.18, "grad_norm": 2.5411221981048584, "learning_rate": 1.4253328964222945e-06, "loss": 0.0582, "step": 730300 }, { "epoch": 7.18, "grad_norm": 7.670866966247559, "learning_rate": 1.4252087739680459e-06, "loss": 0.017, "step": 730325 }, { "epoch": 7.18, "grad_norm": 5.397127151489258, "learning_rate": 1.4250846515137975e-06, "loss": 0.0395, "step": 730350 }, { "epoch": 7.18, "grad_norm": 0.3339558243751526, "learning_rate": 1.4249605290595492e-06, "loss": 0.0148, "step": 730375 }, { "epoch": 7.18, "grad_norm": 4.96244478225708, "learning_rate": 1.4248364066053006e-06, "loss": 0.0414, "step": 730400 }, { "epoch": 7.18, "grad_norm": 10.115031242370605, "learning_rate": 1.4247122841510522e-06, "loss": 0.0197, "step": 730425 }, { "epoch": 7.18, "grad_norm": 5.574033260345459, "learning_rate": 1.4245881616968038e-06, "loss": 0.0532, "step": 730450 }, { "epoch": 7.18, "grad_norm": 0.9919189810752869, "learning_rate": 1.4244640392425553e-06, "loss": 0.0231, "step": 730475 }, { "epoch": 7.18, "grad_norm": 2.7349634170532227, "learning_rate": 1.424339916788307e-06, "loss": 0.0613, "step": 730500 }, { "epoch": 7.18, "grad_norm": 10.331033706665039, "learning_rate": 1.4242157943340585e-06, "loss": 0.0237, "step": 730525 }, { "epoch": 7.18, "grad_norm": 3.2146334648132324, "learning_rate": 1.4240916718798097e-06, "loss": 0.0623, "step": 730550 }, { "epoch": 7.18, "grad_norm": 4.575338840484619, "learning_rate": 1.4239675494255614e-06, "loss": 0.0173, "step": 730575 }, { "epoch": 7.18, "grad_norm": 3.8466379642486572, "learning_rate": 1.423843426971313e-06, "loss": 0.0434, "step": 730600 }, { "epoch": 7.18, "grad_norm": 6.617901802062988, "learning_rate": 1.4237193045170644e-06, "loss": 0.0187, "step": 730625 }, { "epoch": 7.18, "grad_norm": 6.301601409912109, "learning_rate": 1.423595182062816e-06, "loss": 0.0627, "step": 730650 }, { "epoch": 7.18, "grad_norm": 0.9076817631721497, "learning_rate": 1.4234710596085675e-06, "loss": 0.0191, "step": 730675 }, { "epoch": 7.18, "grad_norm": 3.4676363468170166, "learning_rate": 1.4233469371543191e-06, "loss": 0.0372, "step": 730700 }, { "epoch": 7.18, "grad_norm": 2.3485453128814697, "learning_rate": 1.4232228147000708e-06, "loss": 0.023, "step": 730725 }, { "epoch": 7.18, "grad_norm": 1.6802107095718384, "learning_rate": 1.423098692245822e-06, "loss": 0.0594, "step": 730750 }, { "epoch": 7.19, "grad_norm": 1.9740180969238281, "learning_rate": 1.4229745697915736e-06, "loss": 0.0258, "step": 730775 }, { "epoch": 7.19, "grad_norm": 3.130640983581543, "learning_rate": 1.4228504473373253e-06, "loss": 0.0724, "step": 730800 }, { "epoch": 7.19, "grad_norm": 0.39527496695518494, "learning_rate": 1.4227263248830767e-06, "loss": 0.0125, "step": 730825 }, { "epoch": 7.19, "grad_norm": 2.1095001697540283, "learning_rate": 1.4226022024288283e-06, "loss": 0.0763, "step": 730850 }, { "epoch": 7.19, "grad_norm": 7.336045742034912, "learning_rate": 1.42247807997458e-06, "loss": 0.0215, "step": 730875 }, { "epoch": 7.19, "grad_norm": 2.229804754257202, "learning_rate": 1.4223539575203314e-06, "loss": 0.0475, "step": 730900 }, { "epoch": 7.19, "grad_norm": 2.590606689453125, "learning_rate": 1.422229835066083e-06, "loss": 0.0104, "step": 730925 }, { "epoch": 7.19, "grad_norm": 2.917682409286499, "learning_rate": 1.4221057126118346e-06, "loss": 0.0511, "step": 730950 }, { "epoch": 7.19, "grad_norm": 5.138666152954102, "learning_rate": 1.4219815901575859e-06, "loss": 0.0204, "step": 730975 }, { "epoch": 7.19, "grad_norm": 5.046177864074707, "learning_rate": 1.4218574677033375e-06, "loss": 0.0755, "step": 731000 }, { "epoch": 7.19, "grad_norm": 4.516957759857178, "learning_rate": 1.4217333452490891e-06, "loss": 0.0092, "step": 731025 }, { "epoch": 7.19, "grad_norm": 2.2732269763946533, "learning_rate": 1.4216092227948405e-06, "loss": 0.0571, "step": 731050 }, { "epoch": 7.19, "grad_norm": 0.2666175663471222, "learning_rate": 1.4214851003405922e-06, "loss": 0.0269, "step": 731075 }, { "epoch": 7.19, "grad_norm": 2.952747106552124, "learning_rate": 1.4213609778863436e-06, "loss": 0.0377, "step": 731100 }, { "epoch": 7.19, "grad_norm": 0.24914397299289703, "learning_rate": 1.4212368554320952e-06, "loss": 0.0203, "step": 731125 }, { "epoch": 7.19, "grad_norm": 4.402299880981445, "learning_rate": 1.4211127329778469e-06, "loss": 0.0661, "step": 731150 }, { "epoch": 7.19, "grad_norm": 2.162367582321167, "learning_rate": 1.420988610523598e-06, "loss": 0.0165, "step": 731175 }, { "epoch": 7.19, "grad_norm": 8.96724796295166, "learning_rate": 1.4208644880693497e-06, "loss": 0.0669, "step": 731200 }, { "epoch": 7.19, "grad_norm": 5.16124153137207, "learning_rate": 1.4207403656151014e-06, "loss": 0.03, "step": 731225 }, { "epoch": 7.19, "grad_norm": 2.6126651763916016, "learning_rate": 1.4206162431608528e-06, "loss": 0.0524, "step": 731250 }, { "epoch": 7.19, "grad_norm": 2.934180974960327, "learning_rate": 1.4204921207066044e-06, "loss": 0.0253, "step": 731275 }, { "epoch": 7.19, "grad_norm": 2.7027156352996826, "learning_rate": 1.4203729631505259e-06, "loss": 0.0416, "step": 731300 }, { "epoch": 7.19, "grad_norm": 0.10808870196342468, "learning_rate": 1.4202488406962775e-06, "loss": 0.0171, "step": 731325 }, { "epoch": 7.19, "grad_norm": 2.986213207244873, "learning_rate": 1.420124718242029e-06, "loss": 0.0604, "step": 731350 }, { "epoch": 7.19, "grad_norm": 4.905633926391602, "learning_rate": 1.4200005957877806e-06, "loss": 0.0108, "step": 731375 }, { "epoch": 7.19, "grad_norm": 4.083792686462402, "learning_rate": 1.4198764733335322e-06, "loss": 0.0582, "step": 731400 }, { "epoch": 7.19, "grad_norm": 3.5409767627716064, "learning_rate": 1.4197523508792834e-06, "loss": 0.0254, "step": 731425 }, { "epoch": 7.19, "grad_norm": 2.596328020095825, "learning_rate": 1.419628228425035e-06, "loss": 0.0776, "step": 731450 }, { "epoch": 7.19, "grad_norm": 4.758947849273682, "learning_rate": 1.4195041059707867e-06, "loss": 0.0153, "step": 731475 }, { "epoch": 7.19, "grad_norm": 3.5058586597442627, "learning_rate": 1.419379983516538e-06, "loss": 0.0388, "step": 731500 }, { "epoch": 7.19, "grad_norm": 0.43055105209350586, "learning_rate": 1.4192558610622897e-06, "loss": 0.012, "step": 731525 }, { "epoch": 7.19, "grad_norm": 2.7649431228637695, "learning_rate": 1.4191317386080414e-06, "loss": 0.0442, "step": 731550 }, { "epoch": 7.19, "grad_norm": 0.41780582070350647, "learning_rate": 1.4190076161537928e-06, "loss": 0.0138, "step": 731575 }, { "epoch": 7.19, "grad_norm": 2.780693769454956, "learning_rate": 1.4188834936995444e-06, "loss": 0.0639, "step": 731600 }, { "epoch": 7.19, "grad_norm": 0.12923741340637207, "learning_rate": 1.418759371245296e-06, "loss": 0.0166, "step": 731625 }, { "epoch": 7.19, "grad_norm": 3.3815431594848633, "learning_rate": 1.4186352487910473e-06, "loss": 0.0514, "step": 731650 }, { "epoch": 7.19, "grad_norm": 6.159157752990723, "learning_rate": 1.418511126336799e-06, "loss": 0.0177, "step": 731675 }, { "epoch": 7.19, "grad_norm": 6.755041599273682, "learning_rate": 1.4183870038825503e-06, "loss": 0.0508, "step": 731700 }, { "epoch": 7.19, "grad_norm": 0.317158579826355, "learning_rate": 1.418262881428302e-06, "loss": 0.0294, "step": 731725 }, { "epoch": 7.19, "grad_norm": 1.6004947423934937, "learning_rate": 1.4181387589740536e-06, "loss": 0.0664, "step": 731750 }, { "epoch": 7.19, "grad_norm": 6.876605987548828, "learning_rate": 1.418014636519805e-06, "loss": 0.0221, "step": 731775 }, { "epoch": 7.2, "grad_norm": 2.270867109298706, "learning_rate": 1.4178905140655567e-06, "loss": 0.063, "step": 731800 }, { "epoch": 7.2, "grad_norm": 3.1983556747436523, "learning_rate": 1.4177663916113083e-06, "loss": 0.0154, "step": 731825 }, { "epoch": 7.2, "grad_norm": 5.51377010345459, "learning_rate": 1.4176422691570595e-06, "loss": 0.0481, "step": 731850 }, { "epoch": 7.2, "grad_norm": 0.389544278383255, "learning_rate": 1.4175181467028111e-06, "loss": 0.0151, "step": 731875 }, { "epoch": 7.2, "grad_norm": 3.7185490131378174, "learning_rate": 1.4173940242485628e-06, "loss": 0.0393, "step": 731900 }, { "epoch": 7.2, "grad_norm": 0.3727491497993469, "learning_rate": 1.4172699017943142e-06, "loss": 0.0185, "step": 731925 }, { "epoch": 7.2, "grad_norm": 2.5759763717651367, "learning_rate": 1.4171457793400658e-06, "loss": 0.0593, "step": 731950 }, { "epoch": 7.2, "grad_norm": 1.078850507736206, "learning_rate": 1.4170216568858175e-06, "loss": 0.0209, "step": 731975 }, { "epoch": 7.2, "grad_norm": 3.1236329078674316, "learning_rate": 1.4168975344315689e-06, "loss": 0.0554, "step": 732000 }, { "epoch": 7.2, "grad_norm": 4.073817253112793, "learning_rate": 1.4167734119773205e-06, "loss": 0.0189, "step": 732025 }, { "epoch": 7.2, "grad_norm": 4.733691215515137, "learning_rate": 1.4166492895230722e-06, "loss": 0.0556, "step": 732050 }, { "epoch": 7.2, "grad_norm": 6.348066329956055, "learning_rate": 1.4165251670688236e-06, "loss": 0.0133, "step": 732075 }, { "epoch": 7.2, "grad_norm": 3.7537944316864014, "learning_rate": 1.4164010446145752e-06, "loss": 0.0706, "step": 732100 }, { "epoch": 7.2, "grad_norm": 0.8577224016189575, "learning_rate": 1.4162769221603264e-06, "loss": 0.0178, "step": 732125 }, { "epoch": 7.2, "grad_norm": 3.232609987258911, "learning_rate": 1.416152799706078e-06, "loss": 0.0424, "step": 732150 }, { "epoch": 7.2, "grad_norm": 0.12243425846099854, "learning_rate": 1.4160286772518297e-06, "loss": 0.0148, "step": 732175 }, { "epoch": 7.2, "grad_norm": 1.5950130224227905, "learning_rate": 1.4159045547975811e-06, "loss": 0.0754, "step": 732200 }, { "epoch": 7.2, "grad_norm": 1.9600698947906494, "learning_rate": 1.4157804323433328e-06, "loss": 0.0102, "step": 732225 }, { "epoch": 7.2, "grad_norm": 3.3747148513793945, "learning_rate": 1.4156563098890844e-06, "loss": 0.0441, "step": 732250 }, { "epoch": 7.2, "grad_norm": 0.214623361825943, "learning_rate": 1.4155321874348358e-06, "loss": 0.0206, "step": 732275 }, { "epoch": 7.2, "grad_norm": 2.8969058990478516, "learning_rate": 1.4154080649805874e-06, "loss": 0.0515, "step": 732300 }, { "epoch": 7.2, "grad_norm": 0.21596485376358032, "learning_rate": 1.415283942526339e-06, "loss": 0.0151, "step": 732325 }, { "epoch": 7.2, "grad_norm": 4.384426593780518, "learning_rate": 1.4151598200720903e-06, "loss": 0.0571, "step": 732350 }, { "epoch": 7.2, "grad_norm": 0.022550592198967934, "learning_rate": 1.415035697617842e-06, "loss": 0.0175, "step": 732375 }, { "epoch": 7.2, "grad_norm": 3.485369920730591, "learning_rate": 1.4149115751635936e-06, "loss": 0.0554, "step": 732400 }, { "epoch": 7.2, "grad_norm": 2.1205780506134033, "learning_rate": 1.414787452709345e-06, "loss": 0.0227, "step": 732425 }, { "epoch": 7.2, "grad_norm": 4.821053981781006, "learning_rate": 1.4146633302550966e-06, "loss": 0.0752, "step": 732450 }, { "epoch": 7.2, "grad_norm": 2.8835277557373047, "learning_rate": 1.4145392078008483e-06, "loss": 0.0171, "step": 732475 }, { "epoch": 7.2, "grad_norm": 2.8219034671783447, "learning_rate": 1.4144150853465997e-06, "loss": 0.0637, "step": 732500 }, { "epoch": 7.2, "grad_norm": 1.4862397909164429, "learning_rate": 1.4142909628923513e-06, "loss": 0.0179, "step": 732525 }, { "epoch": 7.2, "grad_norm": 3.61234450340271, "learning_rate": 1.4141668404381025e-06, "loss": 0.0456, "step": 732550 }, { "epoch": 7.2, "grad_norm": 0.4904012978076935, "learning_rate": 1.4140427179838542e-06, "loss": 0.0165, "step": 732575 }, { "epoch": 7.2, "grad_norm": 4.884749889373779, "learning_rate": 1.4139185955296058e-06, "loss": 0.0835, "step": 732600 }, { "epoch": 7.2, "grad_norm": 0.39788705110549927, "learning_rate": 1.4137944730753572e-06, "loss": 0.0202, "step": 732625 }, { "epoch": 7.2, "grad_norm": 5.703648567199707, "learning_rate": 1.4136703506211089e-06, "loss": 0.0501, "step": 732650 }, { "epoch": 7.2, "grad_norm": 0.06820830702781677, "learning_rate": 1.4135462281668605e-06, "loss": 0.016, "step": 732675 }, { "epoch": 7.2, "grad_norm": 3.450117349624634, "learning_rate": 1.413422105712612e-06, "loss": 0.0629, "step": 732700 }, { "epoch": 7.2, "grad_norm": 0.06972251087427139, "learning_rate": 1.4132979832583635e-06, "loss": 0.0205, "step": 732725 }, { "epoch": 7.2, "grad_norm": 3.182371139526367, "learning_rate": 1.4131738608041152e-06, "loss": 0.0516, "step": 732750 }, { "epoch": 7.2, "grad_norm": 0.9377787113189697, "learning_rate": 1.4130497383498664e-06, "loss": 0.0188, "step": 732775 }, { "epoch": 7.21, "grad_norm": 3.5198800563812256, "learning_rate": 1.412925615895618e-06, "loss": 0.0457, "step": 732800 }, { "epoch": 7.21, "grad_norm": 6.054130554199219, "learning_rate": 1.4128014934413697e-06, "loss": 0.0116, "step": 732825 }, { "epoch": 7.21, "grad_norm": 3.509490489959717, "learning_rate": 1.412677370987121e-06, "loss": 0.062, "step": 732850 }, { "epoch": 7.21, "grad_norm": 0.9469690322875977, "learning_rate": 1.4125532485328727e-06, "loss": 0.0145, "step": 732875 }, { "epoch": 7.21, "grad_norm": 4.178328514099121, "learning_rate": 1.4124291260786244e-06, "loss": 0.0628, "step": 732900 }, { "epoch": 7.21, "grad_norm": 9.63911247253418, "learning_rate": 1.4123050036243758e-06, "loss": 0.0225, "step": 732925 }, { "epoch": 7.21, "grad_norm": 3.70991587638855, "learning_rate": 1.4121808811701274e-06, "loss": 0.0526, "step": 732950 }, { "epoch": 7.21, "grad_norm": 5.704389572143555, "learning_rate": 1.4120567587158786e-06, "loss": 0.0302, "step": 732975 }, { "epoch": 7.21, "grad_norm": 2.6228320598602295, "learning_rate": 1.4119326362616303e-06, "loss": 0.05, "step": 733000 }, { "epoch": 7.21, "grad_norm": 0.5190934538841248, "learning_rate": 1.411808513807382e-06, "loss": 0.0246, "step": 733025 }, { "epoch": 7.21, "grad_norm": 4.248258113861084, "learning_rate": 1.4116843913531333e-06, "loss": 0.0775, "step": 733050 }, { "epoch": 7.21, "grad_norm": 0.7156853079795837, "learning_rate": 1.411560268898885e-06, "loss": 0.0168, "step": 733075 }, { "epoch": 7.21, "grad_norm": 2.3580069541931152, "learning_rate": 1.4114361464446366e-06, "loss": 0.068, "step": 733100 }, { "epoch": 7.21, "grad_norm": 0.6114773750305176, "learning_rate": 1.411312023990388e-06, "loss": 0.0097, "step": 733125 }, { "epoch": 7.21, "grad_norm": 2.720424175262451, "learning_rate": 1.4111879015361397e-06, "loss": 0.0419, "step": 733150 }, { "epoch": 7.21, "grad_norm": 1.4919224977493286, "learning_rate": 1.4110637790818913e-06, "loss": 0.0225, "step": 733175 }, { "epoch": 7.21, "grad_norm": 4.802855968475342, "learning_rate": 1.4109396566276425e-06, "loss": 0.0476, "step": 733200 }, { "epoch": 7.21, "grad_norm": 0.39817553758621216, "learning_rate": 1.4108155341733941e-06, "loss": 0.013, "step": 733225 }, { "epoch": 7.21, "grad_norm": 5.225204944610596, "learning_rate": 1.4106914117191458e-06, "loss": 0.0766, "step": 733250 }, { "epoch": 7.21, "grad_norm": 13.844679832458496, "learning_rate": 1.4105672892648972e-06, "loss": 0.0158, "step": 733275 }, { "epoch": 7.21, "grad_norm": 5.832890033721924, "learning_rate": 1.4104431668106488e-06, "loss": 0.0557, "step": 733300 }, { "epoch": 7.21, "grad_norm": 0.3082104027271271, "learning_rate": 1.4103190443564005e-06, "loss": 0.0078, "step": 733325 }, { "epoch": 7.21, "grad_norm": 1.721551775932312, "learning_rate": 1.4101949219021519e-06, "loss": 0.061, "step": 733350 }, { "epoch": 7.21, "grad_norm": 0.9095489978790283, "learning_rate": 1.4100707994479035e-06, "loss": 0.016, "step": 733375 }, { "epoch": 7.21, "grad_norm": 6.6296916007995605, "learning_rate": 1.409946676993655e-06, "loss": 0.0545, "step": 733400 }, { "epoch": 7.21, "grad_norm": 1.583625078201294, "learning_rate": 1.4098225545394066e-06, "loss": 0.0195, "step": 733425 }, { "epoch": 7.21, "grad_norm": 2.8801496028900146, "learning_rate": 1.4096984320851582e-06, "loss": 0.055, "step": 733450 }, { "epoch": 7.21, "grad_norm": 0.28939059376716614, "learning_rate": 1.4095743096309094e-06, "loss": 0.0204, "step": 733475 }, { "epoch": 7.21, "grad_norm": 2.0563344955444336, "learning_rate": 1.409450187176661e-06, "loss": 0.0467, "step": 733500 }, { "epoch": 7.21, "grad_norm": 7.5162272453308105, "learning_rate": 1.4093260647224127e-06, "loss": 0.0277, "step": 733525 }, { "epoch": 7.21, "grad_norm": 2.4780545234680176, "learning_rate": 1.4092019422681641e-06, "loss": 0.0599, "step": 733550 }, { "epoch": 7.21, "grad_norm": 1.2325031757354736, "learning_rate": 1.4090778198139158e-06, "loss": 0.031, "step": 733575 }, { "epoch": 7.21, "grad_norm": 2.4994895458221436, "learning_rate": 1.4089536973596674e-06, "loss": 0.0301, "step": 733600 }, { "epoch": 7.21, "grad_norm": 0.1375417709350586, "learning_rate": 1.4088295749054188e-06, "loss": 0.0227, "step": 733625 }, { "epoch": 7.21, "grad_norm": 3.8897759914398193, "learning_rate": 1.4087054524511704e-06, "loss": 0.0509, "step": 733650 }, { "epoch": 7.21, "grad_norm": 0.3938618004322052, "learning_rate": 1.408581329996922e-06, "loss": 0.014, "step": 733675 }, { "epoch": 7.21, "grad_norm": 3.7737560272216797, "learning_rate": 1.4084572075426733e-06, "loss": 0.0439, "step": 733700 }, { "epoch": 7.21, "grad_norm": 4.210147857666016, "learning_rate": 1.408333085088425e-06, "loss": 0.0179, "step": 733725 }, { "epoch": 7.21, "grad_norm": 3.7661831378936768, "learning_rate": 1.4082089626341766e-06, "loss": 0.0557, "step": 733750 }, { "epoch": 7.21, "grad_norm": 4.594388961791992, "learning_rate": 1.408084840179928e-06, "loss": 0.0191, "step": 733775 }, { "epoch": 7.21, "grad_norm": 3.105710983276367, "learning_rate": 1.4079607177256796e-06, "loss": 0.0741, "step": 733800 }, { "epoch": 7.22, "grad_norm": 0.47919178009033203, "learning_rate": 1.407836595271431e-06, "loss": 0.0158, "step": 733825 }, { "epoch": 7.22, "grad_norm": 3.0036096572875977, "learning_rate": 1.4077124728171827e-06, "loss": 0.092, "step": 733850 }, { "epoch": 7.22, "grad_norm": 0.3937666118144989, "learning_rate": 1.4075883503629343e-06, "loss": 0.0182, "step": 733875 }, { "epoch": 7.22, "grad_norm": 2.4859282970428467, "learning_rate": 1.4074642279086855e-06, "loss": 0.0632, "step": 733900 }, { "epoch": 7.22, "grad_norm": 0.12632110714912415, "learning_rate": 1.4073401054544372e-06, "loss": 0.0128, "step": 733925 }, { "epoch": 7.22, "grad_norm": 4.354715824127197, "learning_rate": 1.4072159830001888e-06, "loss": 0.0523, "step": 733950 }, { "epoch": 7.22, "grad_norm": 1.2083271741867065, "learning_rate": 1.4070918605459402e-06, "loss": 0.0235, "step": 733975 }, { "epoch": 7.22, "grad_norm": 2.124802350997925, "learning_rate": 1.4069677380916919e-06, "loss": 0.0447, "step": 734000 }, { "epoch": 7.22, "grad_norm": 0.05412158742547035, "learning_rate": 1.4068436156374435e-06, "loss": 0.0154, "step": 734025 }, { "epoch": 7.22, "grad_norm": 3.9522180557250977, "learning_rate": 1.406724458081365e-06, "loss": 0.0701, "step": 734050 }, { "epoch": 7.22, "grad_norm": 6.635473251342773, "learning_rate": 1.4066003356271164e-06, "loss": 0.0213, "step": 734075 }, { "epoch": 7.22, "grad_norm": 1.9381482601165771, "learning_rate": 1.406476213172868e-06, "loss": 0.0552, "step": 734100 }, { "epoch": 7.22, "grad_norm": 3.6602933406829834, "learning_rate": 1.4063520907186196e-06, "loss": 0.0243, "step": 734125 }, { "epoch": 7.22, "grad_norm": 3.6175360679626465, "learning_rate": 1.4062279682643708e-06, "loss": 0.0809, "step": 734150 }, { "epoch": 7.22, "grad_norm": 0.18087051808834076, "learning_rate": 1.4061038458101225e-06, "loss": 0.0188, "step": 734175 }, { "epoch": 7.22, "grad_norm": 7.269321441650391, "learning_rate": 1.4059797233558741e-06, "loss": 0.0623, "step": 734200 }, { "epoch": 7.22, "grad_norm": 2.8299853801727295, "learning_rate": 1.4058556009016255e-06, "loss": 0.0196, "step": 734225 }, { "epoch": 7.22, "grad_norm": 3.021620273590088, "learning_rate": 1.4057314784473772e-06, "loss": 0.0505, "step": 734250 }, { "epoch": 7.22, "grad_norm": 0.5824314951896667, "learning_rate": 1.4056073559931288e-06, "loss": 0.0199, "step": 734275 }, { "epoch": 7.22, "grad_norm": 3.1777613162994385, "learning_rate": 1.4054832335388802e-06, "loss": 0.0523, "step": 734300 }, { "epoch": 7.22, "grad_norm": 9.079826354980469, "learning_rate": 1.4053591110846319e-06, "loss": 0.0138, "step": 734325 }, { "epoch": 7.22, "grad_norm": 1.8663989305496216, "learning_rate": 1.4052349886303835e-06, "loss": 0.0373, "step": 734350 }, { "epoch": 7.22, "grad_norm": 1.3396978378295898, "learning_rate": 1.4051108661761347e-06, "loss": 0.0161, "step": 734375 }, { "epoch": 7.22, "grad_norm": 3.134439706802368, "learning_rate": 1.4049867437218863e-06, "loss": 0.063, "step": 734400 }, { "epoch": 7.22, "grad_norm": 0.850100576877594, "learning_rate": 1.4048626212676378e-06, "loss": 0.0184, "step": 734425 }, { "epoch": 7.22, "grad_norm": 2.007106065750122, "learning_rate": 1.4047384988133894e-06, "loss": 0.0534, "step": 734450 }, { "epoch": 7.22, "grad_norm": 0.18048100173473358, "learning_rate": 1.404614376359141e-06, "loss": 0.0351, "step": 734475 }, { "epoch": 7.22, "grad_norm": 2.9600942134857178, "learning_rate": 1.4044902539048925e-06, "loss": 0.0576, "step": 734500 }, { "epoch": 7.22, "grad_norm": 6.156052112579346, "learning_rate": 1.404366131450644e-06, "loss": 0.0114, "step": 734525 }, { "epoch": 7.22, "grad_norm": 2.89975905418396, "learning_rate": 1.4042420089963957e-06, "loss": 0.0513, "step": 734550 }, { "epoch": 7.22, "grad_norm": 0.7585601806640625, "learning_rate": 1.404117886542147e-06, "loss": 0.0241, "step": 734575 }, { "epoch": 7.22, "grad_norm": 4.736211776733398, "learning_rate": 1.4039937640878986e-06, "loss": 0.0746, "step": 734600 }, { "epoch": 7.22, "grad_norm": 0.1209326684474945, "learning_rate": 1.4038696416336502e-06, "loss": 0.0221, "step": 734625 }, { "epoch": 7.22, "grad_norm": 4.537081718444824, "learning_rate": 1.4037455191794016e-06, "loss": 0.0344, "step": 734650 }, { "epoch": 7.22, "grad_norm": 2.3371503353118896, "learning_rate": 1.4036213967251533e-06, "loss": 0.0082, "step": 734675 }, { "epoch": 7.22, "grad_norm": 2.6664254665374756, "learning_rate": 1.403497274270905e-06, "loss": 0.0537, "step": 734700 }, { "epoch": 7.22, "grad_norm": 3.684476375579834, "learning_rate": 1.4033731518166563e-06, "loss": 0.0173, "step": 734725 }, { "epoch": 7.22, "grad_norm": 3.206035852432251, "learning_rate": 1.403249029362408e-06, "loss": 0.0338, "step": 734750 }, { "epoch": 7.22, "grad_norm": 2.0337655544281006, "learning_rate": 1.4031249069081596e-06, "loss": 0.0253, "step": 734775 }, { "epoch": 7.22, "grad_norm": 2.9550468921661377, "learning_rate": 1.4030007844539108e-06, "loss": 0.041, "step": 734800 }, { "epoch": 7.22, "grad_norm": 6.76200008392334, "learning_rate": 1.4028766619996624e-06, "loss": 0.0236, "step": 734825 }, { "epoch": 7.23, "grad_norm": 4.544464588165283, "learning_rate": 1.4027525395454139e-06, "loss": 0.0962, "step": 734850 }, { "epoch": 7.23, "grad_norm": 0.1507282257080078, "learning_rate": 1.4026284170911655e-06, "loss": 0.017, "step": 734875 }, { "epoch": 7.23, "grad_norm": 4.082666873931885, "learning_rate": 1.4025042946369171e-06, "loss": 0.0392, "step": 734900 }, { "epoch": 7.23, "grad_norm": 7.587663173675537, "learning_rate": 1.4023801721826686e-06, "loss": 0.0185, "step": 734925 }, { "epoch": 7.23, "grad_norm": 3.296448230743408, "learning_rate": 1.4022560497284202e-06, "loss": 0.0408, "step": 734950 }, { "epoch": 7.23, "grad_norm": 4.6458306312561035, "learning_rate": 1.4021319272741718e-06, "loss": 0.0241, "step": 734975 }, { "epoch": 7.23, "grad_norm": 2.9430410861968994, "learning_rate": 1.4020078048199233e-06, "loss": 0.0546, "step": 735000 }, { "epoch": 7.23, "grad_norm": 0.4954798221588135, "learning_rate": 1.4018836823656749e-06, "loss": 0.017, "step": 735025 }, { "epoch": 7.23, "grad_norm": 4.27379846572876, "learning_rate": 1.4017595599114265e-06, "loss": 0.0435, "step": 735050 }, { "epoch": 7.23, "grad_norm": 0.4980992078781128, "learning_rate": 1.4016354374571777e-06, "loss": 0.0247, "step": 735075 }, { "epoch": 7.23, "grad_norm": 6.421379089355469, "learning_rate": 1.4015113150029294e-06, "loss": 0.0849, "step": 735100 }, { "epoch": 7.23, "grad_norm": 0.5209434032440186, "learning_rate": 1.401387192548681e-06, "loss": 0.0108, "step": 735125 }, { "epoch": 7.23, "grad_norm": 5.230409622192383, "learning_rate": 1.4012630700944324e-06, "loss": 0.0546, "step": 735150 }, { "epoch": 7.23, "grad_norm": 0.07565994560718536, "learning_rate": 1.401138947640184e-06, "loss": 0.0137, "step": 735175 }, { "epoch": 7.23, "grad_norm": 3.5469794273376465, "learning_rate": 1.4010148251859357e-06, "loss": 0.0513, "step": 735200 }, { "epoch": 7.23, "grad_norm": 2.122283458709717, "learning_rate": 1.4008907027316871e-06, "loss": 0.0155, "step": 735225 }, { "epoch": 7.23, "grad_norm": 4.162202835083008, "learning_rate": 1.4007665802774388e-06, "loss": 0.0742, "step": 735250 }, { "epoch": 7.23, "grad_norm": 0.07427096366882324, "learning_rate": 1.40064245782319e-06, "loss": 0.0264, "step": 735275 }, { "epoch": 7.23, "grad_norm": 2.8737730979919434, "learning_rate": 1.4005183353689416e-06, "loss": 0.0704, "step": 735300 }, { "epoch": 7.23, "grad_norm": 8.150193214416504, "learning_rate": 1.4003942129146932e-06, "loss": 0.0141, "step": 735325 }, { "epoch": 7.23, "grad_norm": 1.4535516500473022, "learning_rate": 1.4002700904604447e-06, "loss": 0.0866, "step": 735350 }, { "epoch": 7.23, "grad_norm": 0.7107206583023071, "learning_rate": 1.4001459680061963e-06, "loss": 0.0149, "step": 735375 }, { "epoch": 7.23, "grad_norm": 3.633350372314453, "learning_rate": 1.400021845551948e-06, "loss": 0.0446, "step": 735400 }, { "epoch": 7.23, "grad_norm": 10.530872344970703, "learning_rate": 1.3998977230976994e-06, "loss": 0.0277, "step": 735425 }, { "epoch": 7.23, "grad_norm": 3.622077465057373, "learning_rate": 1.399773600643451e-06, "loss": 0.0662, "step": 735450 }, { "epoch": 7.23, "grad_norm": 9.335238456726074, "learning_rate": 1.3996494781892026e-06, "loss": 0.0249, "step": 735475 }, { "epoch": 7.23, "grad_norm": 4.034637928009033, "learning_rate": 1.3995253557349538e-06, "loss": 0.0401, "step": 735500 }, { "epoch": 7.23, "grad_norm": 3.8642048835754395, "learning_rate": 1.3994012332807055e-06, "loss": 0.0124, "step": 735525 }, { "epoch": 7.23, "grad_norm": 4.423948287963867, "learning_rate": 1.3992771108264571e-06, "loss": 0.0668, "step": 735550 }, { "epoch": 7.23, "grad_norm": 0.16625750064849854, "learning_rate": 1.3991529883722085e-06, "loss": 0.0281, "step": 735575 }, { "epoch": 7.23, "grad_norm": 2.7825779914855957, "learning_rate": 1.3990288659179602e-06, "loss": 0.0368, "step": 735600 }, { "epoch": 7.23, "grad_norm": 7.566277027130127, "learning_rate": 1.3989047434637118e-06, "loss": 0.0113, "step": 735625 }, { "epoch": 7.23, "grad_norm": 2.4302587509155273, "learning_rate": 1.3987806210094632e-06, "loss": 0.0394, "step": 735650 }, { "epoch": 7.23, "grad_norm": 0.3382766544818878, "learning_rate": 1.3986564985552149e-06, "loss": 0.0148, "step": 735675 }, { "epoch": 7.23, "grad_norm": 3.3401896953582764, "learning_rate": 1.398532376100966e-06, "loss": 0.0478, "step": 735700 }, { "epoch": 7.23, "grad_norm": 4.710598468780518, "learning_rate": 1.3984082536467177e-06, "loss": 0.0138, "step": 735725 }, { "epoch": 7.23, "grad_norm": 4.654747486114502, "learning_rate": 1.3982841311924693e-06, "loss": 0.0563, "step": 735750 }, { "epoch": 7.23, "grad_norm": 3.7095749378204346, "learning_rate": 1.3981600087382208e-06, "loss": 0.026, "step": 735775 }, { "epoch": 7.23, "grad_norm": 4.605404853820801, "learning_rate": 1.3980358862839724e-06, "loss": 0.0569, "step": 735800 }, { "epoch": 7.23, "grad_norm": 11.21212100982666, "learning_rate": 1.397911763829724e-06, "loss": 0.0216, "step": 735825 }, { "epoch": 7.23, "grad_norm": 2.502570390701294, "learning_rate": 1.3977876413754755e-06, "loss": 0.0908, "step": 735850 }, { "epoch": 7.24, "grad_norm": 0.5279943943023682, "learning_rate": 1.397663518921227e-06, "loss": 0.024, "step": 735875 }, { "epoch": 7.24, "grad_norm": 2.3296751976013184, "learning_rate": 1.3975393964669787e-06, "loss": 0.0451, "step": 735900 }, { "epoch": 7.24, "grad_norm": 0.1026645377278328, "learning_rate": 1.39741527401273e-06, "loss": 0.0112, "step": 735925 }, { "epoch": 7.24, "grad_norm": 2.6508712768554688, "learning_rate": 1.3972911515584816e-06, "loss": 0.0729, "step": 735950 }, { "epoch": 7.24, "grad_norm": 2.596954584121704, "learning_rate": 1.3971670291042332e-06, "loss": 0.0222, "step": 735975 }, { "epoch": 7.24, "grad_norm": 3.0724093914031982, "learning_rate": 1.3970429066499846e-06, "loss": 0.0666, "step": 736000 }, { "epoch": 7.24, "grad_norm": 7.745326519012451, "learning_rate": 1.3969187841957363e-06, "loss": 0.0239, "step": 736025 }, { "epoch": 7.24, "grad_norm": 3.0914127826690674, "learning_rate": 1.396794661741488e-06, "loss": 0.0502, "step": 736050 }, { "epoch": 7.24, "grad_norm": 0.3901144564151764, "learning_rate": 1.3966705392872393e-06, "loss": 0.0126, "step": 736075 }, { "epoch": 7.24, "grad_norm": 4.957425117492676, "learning_rate": 1.396546416832991e-06, "loss": 0.0568, "step": 736100 }, { "epoch": 7.24, "grad_norm": 0.4629009962081909, "learning_rate": 1.3964222943787422e-06, "loss": 0.012, "step": 736125 }, { "epoch": 7.24, "grad_norm": 3.663494348526001, "learning_rate": 1.3962981719244938e-06, "loss": 0.0595, "step": 736150 }, { "epoch": 7.24, "grad_norm": 2.258059501647949, "learning_rate": 1.3961740494702454e-06, "loss": 0.0204, "step": 736175 }, { "epoch": 7.24, "grad_norm": 2.2386069297790527, "learning_rate": 1.3960499270159969e-06, "loss": 0.0507, "step": 736200 }, { "epoch": 7.24, "grad_norm": 3.5709805488586426, "learning_rate": 1.3959258045617485e-06, "loss": 0.0209, "step": 736225 }, { "epoch": 7.24, "grad_norm": 4.286976337432861, "learning_rate": 1.3958016821075001e-06, "loss": 0.0596, "step": 736250 }, { "epoch": 7.24, "grad_norm": 15.227444648742676, "learning_rate": 1.3956775596532516e-06, "loss": 0.0229, "step": 736275 }, { "epoch": 7.24, "grad_norm": 3.82767391204834, "learning_rate": 1.3955534371990032e-06, "loss": 0.0696, "step": 736300 }, { "epoch": 7.24, "grad_norm": 2.2581498622894287, "learning_rate": 1.3954293147447548e-06, "loss": 0.0175, "step": 736325 }, { "epoch": 7.24, "grad_norm": 4.7325263023376465, "learning_rate": 1.3953051922905062e-06, "loss": 0.0374, "step": 736350 }, { "epoch": 7.24, "grad_norm": 2.870035171508789, "learning_rate": 1.3951810698362579e-06, "loss": 0.0166, "step": 736375 }, { "epoch": 7.24, "grad_norm": 2.6058223247528076, "learning_rate": 1.3950569473820095e-06, "loss": 0.0672, "step": 736400 }, { "epoch": 7.24, "grad_norm": 23.108625411987305, "learning_rate": 1.3949328249277607e-06, "loss": 0.0188, "step": 736425 }, { "epoch": 7.24, "grad_norm": 2.01995587348938, "learning_rate": 1.3948087024735124e-06, "loss": 0.0387, "step": 736450 }, { "epoch": 7.24, "grad_norm": 1.3145016431808472, "learning_rate": 1.394684580019264e-06, "loss": 0.0194, "step": 736475 }, { "epoch": 7.24, "grad_norm": 2.5797367095947266, "learning_rate": 1.3945604575650154e-06, "loss": 0.0416, "step": 736500 }, { "epoch": 7.24, "grad_norm": 1.744104266166687, "learning_rate": 1.394436335110767e-06, "loss": 0.0263, "step": 736525 }, { "epoch": 7.24, "grad_norm": 4.195789813995361, "learning_rate": 1.3943122126565185e-06, "loss": 0.0577, "step": 736550 }, { "epoch": 7.24, "grad_norm": 7.046400547027588, "learning_rate": 1.3941880902022701e-06, "loss": 0.025, "step": 736575 }, { "epoch": 7.24, "grad_norm": 6.155950546264648, "learning_rate": 1.3940639677480218e-06, "loss": 0.044, "step": 736600 }, { "epoch": 7.24, "grad_norm": 2.361583709716797, "learning_rate": 1.393939845293773e-06, "loss": 0.0128, "step": 736625 }, { "epoch": 7.24, "grad_norm": 2.976315498352051, "learning_rate": 1.3938157228395246e-06, "loss": 0.0421, "step": 736650 }, { "epoch": 7.24, "grad_norm": 0.7759056091308594, "learning_rate": 1.3936916003852762e-06, "loss": 0.0174, "step": 736675 }, { "epoch": 7.24, "grad_norm": 3.133085250854492, "learning_rate": 1.3935674779310277e-06, "loss": 0.0569, "step": 736700 }, { "epoch": 7.24, "grad_norm": 5.004436492919922, "learning_rate": 1.3934433554767793e-06, "loss": 0.0367, "step": 736725 }, { "epoch": 7.24, "grad_norm": 2.2100086212158203, "learning_rate": 1.393319233022531e-06, "loss": 0.0447, "step": 736750 }, { "epoch": 7.24, "grad_norm": 11.913846015930176, "learning_rate": 1.3931951105682823e-06, "loss": 0.0196, "step": 736775 }, { "epoch": 7.24, "grad_norm": 2.691420316696167, "learning_rate": 1.393070988114034e-06, "loss": 0.0751, "step": 736800 }, { "epoch": 7.24, "grad_norm": 0.814132571220398, "learning_rate": 1.3929468656597856e-06, "loss": 0.0155, "step": 736825 }, { "epoch": 7.24, "grad_norm": 3.9302361011505127, "learning_rate": 1.3928227432055368e-06, "loss": 0.0601, "step": 736850 }, { "epoch": 7.25, "grad_norm": 6.018214702606201, "learning_rate": 1.3926986207512885e-06, "loss": 0.0196, "step": 736875 }, { "epoch": 7.25, "grad_norm": 2.8327183723449707, "learning_rate": 1.39257449829704e-06, "loss": 0.0663, "step": 736900 }, { "epoch": 7.25, "grad_norm": 7.136113166809082, "learning_rate": 1.3924503758427915e-06, "loss": 0.0228, "step": 736925 }, { "epoch": 7.25, "grad_norm": 2.8314545154571533, "learning_rate": 1.3923262533885432e-06, "loss": 0.0758, "step": 736950 }, { "epoch": 7.25, "grad_norm": 6.472605228424072, "learning_rate": 1.3922021309342946e-06, "loss": 0.0116, "step": 736975 }, { "epoch": 7.25, "grad_norm": 1.9053696393966675, "learning_rate": 1.3920780084800462e-06, "loss": 0.0492, "step": 737000 }, { "epoch": 7.25, "grad_norm": 0.6809110045433044, "learning_rate": 1.3919538860257979e-06, "loss": 0.0138, "step": 737025 }, { "epoch": 7.25, "grad_norm": 3.4065277576446533, "learning_rate": 1.391829763571549e-06, "loss": 0.0553, "step": 737050 }, { "epoch": 7.25, "grad_norm": 0.9514963030815125, "learning_rate": 1.3917056411173007e-06, "loss": 0.0213, "step": 737075 }, { "epoch": 7.25, "grad_norm": 2.588256597518921, "learning_rate": 1.3915815186630523e-06, "loss": 0.0442, "step": 737100 }, { "epoch": 7.25, "grad_norm": 1.4793193340301514, "learning_rate": 1.3914573962088038e-06, "loss": 0.0348, "step": 737125 }, { "epoch": 7.25, "grad_norm": 2.9196956157684326, "learning_rate": 1.3913332737545554e-06, "loss": 0.0499, "step": 737150 }, { "epoch": 7.25, "grad_norm": 0.588903546333313, "learning_rate": 1.391209151300307e-06, "loss": 0.0183, "step": 737175 }, { "epoch": 7.25, "grad_norm": 3.7193379402160645, "learning_rate": 1.3910850288460585e-06, "loss": 0.0495, "step": 737200 }, { "epoch": 7.25, "grad_norm": 0.541118323802948, "learning_rate": 1.39096090639181e-06, "loss": 0.0192, "step": 737225 }, { "epoch": 7.25, "grad_norm": 1.631544589996338, "learning_rate": 1.3908367839375617e-06, "loss": 0.0492, "step": 737250 }, { "epoch": 7.25, "grad_norm": 10.427412033081055, "learning_rate": 1.390712661483313e-06, "loss": 0.0228, "step": 737275 }, { "epoch": 7.25, "grad_norm": 4.429316520690918, "learning_rate": 1.3905885390290646e-06, "loss": 0.0561, "step": 737300 }, { "epoch": 7.25, "grad_norm": 5.32744836807251, "learning_rate": 1.3904644165748162e-06, "loss": 0.0177, "step": 737325 }, { "epoch": 7.25, "grad_norm": 4.508055686950684, "learning_rate": 1.3903402941205676e-06, "loss": 0.0768, "step": 737350 }, { "epoch": 7.25, "grad_norm": 13.601127624511719, "learning_rate": 1.3902161716663193e-06, "loss": 0.0224, "step": 737375 }, { "epoch": 7.25, "grad_norm": 3.2066683769226074, "learning_rate": 1.3900920492120707e-06, "loss": 0.0586, "step": 737400 }, { "epoch": 7.25, "grad_norm": 0.4482252597808838, "learning_rate": 1.3899679267578223e-06, "loss": 0.0133, "step": 737425 }, { "epoch": 7.25, "grad_norm": 3.518098831176758, "learning_rate": 1.389843804303574e-06, "loss": 0.0419, "step": 737450 }, { "epoch": 7.25, "grad_norm": 3.4693350791931152, "learning_rate": 1.3897196818493252e-06, "loss": 0.0244, "step": 737475 }, { "epoch": 7.25, "grad_norm": 1.917660117149353, "learning_rate": 1.3895955593950768e-06, "loss": 0.0437, "step": 737500 }, { "epoch": 7.25, "grad_norm": 3.476133108139038, "learning_rate": 1.3894714369408284e-06, "loss": 0.0142, "step": 737525 }, { "epoch": 7.25, "grad_norm": 3.0672197341918945, "learning_rate": 1.3893522793847499e-06, "loss": 0.0512, "step": 737550 }, { "epoch": 7.25, "grad_norm": 0.43011701107025146, "learning_rate": 1.3892281569305013e-06, "loss": 0.0238, "step": 737575 }, { "epoch": 7.25, "grad_norm": 3.2398149967193604, "learning_rate": 1.389104034476253e-06, "loss": 0.0674, "step": 737600 }, { "epoch": 7.25, "grad_norm": 0.02340286411345005, "learning_rate": 1.3889799120220046e-06, "loss": 0.013, "step": 737625 }, { "epoch": 7.25, "grad_norm": 3.4152071475982666, "learning_rate": 1.388855789567756e-06, "loss": 0.0453, "step": 737650 }, { "epoch": 7.25, "grad_norm": 10.979750633239746, "learning_rate": 1.3887316671135076e-06, "loss": 0.0241, "step": 737675 }, { "epoch": 7.25, "grad_norm": 5.47196626663208, "learning_rate": 1.3886075446592593e-06, "loss": 0.0475, "step": 737700 }, { "epoch": 7.25, "grad_norm": 1.8376131057739258, "learning_rate": 1.3884834222050107e-06, "loss": 0.0188, "step": 737725 }, { "epoch": 7.25, "grad_norm": 3.7614951133728027, "learning_rate": 1.3883592997507621e-06, "loss": 0.0538, "step": 737750 }, { "epoch": 7.25, "grad_norm": 0.5630186200141907, "learning_rate": 1.3882351772965138e-06, "loss": 0.0186, "step": 737775 }, { "epoch": 7.25, "grad_norm": 2.2817299365997314, "learning_rate": 1.3881110548422652e-06, "loss": 0.0354, "step": 737800 }, { "epoch": 7.25, "grad_norm": 0.2825388014316559, "learning_rate": 1.3879869323880168e-06, "loss": 0.0091, "step": 737825 }, { "epoch": 7.25, "grad_norm": 1.3478338718414307, "learning_rate": 1.3878628099337684e-06, "loss": 0.0629, "step": 737850 }, { "epoch": 7.25, "grad_norm": 0.4705074727535248, "learning_rate": 1.3877386874795199e-06, "loss": 0.0099, "step": 737875 }, { "epoch": 7.26, "grad_norm": 2.718282461166382, "learning_rate": 1.3876145650252715e-06, "loss": 0.047, "step": 737900 }, { "epoch": 7.26, "grad_norm": 5.550058364868164, "learning_rate": 1.3874904425710231e-06, "loss": 0.0187, "step": 737925 }, { "epoch": 7.26, "grad_norm": 1.5998892784118652, "learning_rate": 1.3873663201167746e-06, "loss": 0.0603, "step": 737950 }, { "epoch": 7.26, "grad_norm": 0.03445800021290779, "learning_rate": 1.3872421976625262e-06, "loss": 0.0188, "step": 737975 }, { "epoch": 7.26, "grad_norm": 2.6619999408721924, "learning_rate": 1.3871180752082774e-06, "loss": 0.0677, "step": 738000 }, { "epoch": 7.26, "grad_norm": 11.233001708984375, "learning_rate": 1.386993952754029e-06, "loss": 0.0298, "step": 738025 }, { "epoch": 7.26, "grad_norm": 3.553269147872925, "learning_rate": 1.3868698302997807e-06, "loss": 0.0431, "step": 738050 }, { "epoch": 7.26, "grad_norm": 2.801576614379883, "learning_rate": 1.386745707845532e-06, "loss": 0.0166, "step": 738075 }, { "epoch": 7.26, "grad_norm": 3.5964643955230713, "learning_rate": 1.3866215853912837e-06, "loss": 0.0554, "step": 738100 }, { "epoch": 7.26, "grad_norm": 0.10001487284898758, "learning_rate": 1.3864974629370354e-06, "loss": 0.0139, "step": 738125 }, { "epoch": 7.26, "grad_norm": 2.659437894821167, "learning_rate": 1.3863733404827868e-06, "loss": 0.0583, "step": 738150 }, { "epoch": 7.26, "grad_norm": 9.763167381286621, "learning_rate": 1.3862492180285384e-06, "loss": 0.022, "step": 738175 }, { "epoch": 7.26, "grad_norm": 2.7388217449188232, "learning_rate": 1.38612509557429e-06, "loss": 0.0535, "step": 738200 }, { "epoch": 7.26, "grad_norm": 2.731070041656494, "learning_rate": 1.3860009731200413e-06, "loss": 0.0158, "step": 738225 }, { "epoch": 7.26, "grad_norm": 2.9310221672058105, "learning_rate": 1.385876850665793e-06, "loss": 0.0497, "step": 738250 }, { "epoch": 7.26, "grad_norm": 0.04664827138185501, "learning_rate": 1.3857527282115445e-06, "loss": 0.0222, "step": 738275 }, { "epoch": 7.26, "grad_norm": 3.444575548171997, "learning_rate": 1.385628605757296e-06, "loss": 0.0459, "step": 738300 }, { "epoch": 7.26, "grad_norm": 0.13893505930900574, "learning_rate": 1.3855044833030476e-06, "loss": 0.011, "step": 738325 }, { "epoch": 7.26, "grad_norm": 3.0151584148406982, "learning_rate": 1.3853803608487992e-06, "loss": 0.061, "step": 738350 }, { "epoch": 7.26, "grad_norm": 0.019467461854219437, "learning_rate": 1.3852562383945507e-06, "loss": 0.006, "step": 738375 }, { "epoch": 7.26, "grad_norm": 3.5338008403778076, "learning_rate": 1.3851321159403023e-06, "loss": 0.0709, "step": 738400 }, { "epoch": 7.26, "grad_norm": 0.0699620172381401, "learning_rate": 1.3850079934860535e-06, "loss": 0.0207, "step": 738425 }, { "epoch": 7.26, "grad_norm": 3.788614273071289, "learning_rate": 1.3848838710318051e-06, "loss": 0.0621, "step": 738450 }, { "epoch": 7.26, "grad_norm": 1.8042134046554565, "learning_rate": 1.3847597485775568e-06, "loss": 0.017, "step": 738475 }, { "epoch": 7.26, "grad_norm": 2.2829811573028564, "learning_rate": 1.3846356261233082e-06, "loss": 0.0554, "step": 738500 }, { "epoch": 7.26, "grad_norm": 0.30198270082473755, "learning_rate": 1.3845115036690598e-06, "loss": 0.0255, "step": 738525 }, { "epoch": 7.26, "grad_norm": 2.981170892715454, "learning_rate": 1.3843873812148115e-06, "loss": 0.0587, "step": 738550 }, { "epoch": 7.26, "grad_norm": 1.0484017133712769, "learning_rate": 1.384263258760563e-06, "loss": 0.0158, "step": 738575 }, { "epoch": 7.26, "grad_norm": 3.3411245346069336, "learning_rate": 1.3841391363063145e-06, "loss": 0.0533, "step": 738600 }, { "epoch": 7.26, "grad_norm": 3.131857395172119, "learning_rate": 1.3840150138520662e-06, "loss": 0.0168, "step": 738625 }, { "epoch": 7.26, "grad_norm": 3.186673879623413, "learning_rate": 1.3838908913978174e-06, "loss": 0.0507, "step": 738650 }, { "epoch": 7.26, "grad_norm": 1.757179617881775, "learning_rate": 1.383766768943569e-06, "loss": 0.0168, "step": 738675 }, { "epoch": 7.26, "grad_norm": 4.242772579193115, "learning_rate": 1.3836426464893206e-06, "loss": 0.088, "step": 738700 }, { "epoch": 7.26, "grad_norm": 2.452179431915283, "learning_rate": 1.383518524035072e-06, "loss": 0.0239, "step": 738725 }, { "epoch": 7.26, "grad_norm": 4.325706481933594, "learning_rate": 1.3833944015808237e-06, "loss": 0.0577, "step": 738750 }, { "epoch": 7.26, "grad_norm": 0.26437196135520935, "learning_rate": 1.3832702791265753e-06, "loss": 0.0168, "step": 738775 }, { "epoch": 7.26, "grad_norm": 3.8584580421447754, "learning_rate": 1.3831461566723268e-06, "loss": 0.0689, "step": 738800 }, { "epoch": 7.26, "grad_norm": 1.4833546876907349, "learning_rate": 1.3830220342180784e-06, "loss": 0.0117, "step": 738825 }, { "epoch": 7.26, "grad_norm": 2.5851845741271973, "learning_rate": 1.3828979117638296e-06, "loss": 0.0696, "step": 738850 }, { "epoch": 7.26, "grad_norm": 0.5148006081581116, "learning_rate": 1.3827737893095812e-06, "loss": 0.0239, "step": 738875 }, { "epoch": 7.26, "grad_norm": 2.8398690223693848, "learning_rate": 1.3826496668553329e-06, "loss": 0.0731, "step": 738900 }, { "epoch": 7.27, "grad_norm": 0.14773017168045044, "learning_rate": 1.3825255444010843e-06, "loss": 0.0135, "step": 738925 }, { "epoch": 7.27, "grad_norm": 2.5316481590270996, "learning_rate": 1.382401421946836e-06, "loss": 0.0563, "step": 738950 }, { "epoch": 7.27, "grad_norm": 3.3081204891204834, "learning_rate": 1.3822772994925876e-06, "loss": 0.018, "step": 738975 }, { "epoch": 7.27, "grad_norm": 2.6358039379119873, "learning_rate": 1.382153177038339e-06, "loss": 0.035, "step": 739000 }, { "epoch": 7.27, "grad_norm": 1.621212363243103, "learning_rate": 1.3820290545840906e-06, "loss": 0.0115, "step": 739025 }, { "epoch": 7.27, "grad_norm": 3.4109277725219727, "learning_rate": 1.3819049321298423e-06, "loss": 0.0755, "step": 739050 }, { "epoch": 7.27, "grad_norm": 1.573776125907898, "learning_rate": 1.3817808096755935e-06, "loss": 0.0156, "step": 739075 }, { "epoch": 7.27, "grad_norm": 2.604951858520508, "learning_rate": 1.3816566872213451e-06, "loss": 0.0618, "step": 739100 }, { "epoch": 7.27, "grad_norm": 11.169319152832031, "learning_rate": 1.3815325647670967e-06, "loss": 0.0186, "step": 739125 }, { "epoch": 7.27, "grad_norm": 1.7769551277160645, "learning_rate": 1.3814084423128482e-06, "loss": 0.0549, "step": 739150 }, { "epoch": 7.27, "grad_norm": 0.501845121383667, "learning_rate": 1.3812843198585998e-06, "loss": 0.0214, "step": 739175 }, { "epoch": 7.27, "grad_norm": 3.0842130184173584, "learning_rate": 1.3811601974043514e-06, "loss": 0.0521, "step": 739200 }, { "epoch": 7.27, "grad_norm": 0.37183722853660583, "learning_rate": 1.3810360749501029e-06, "loss": 0.0217, "step": 739225 }, { "epoch": 7.27, "grad_norm": 3.267915964126587, "learning_rate": 1.3809119524958545e-06, "loss": 0.061, "step": 739250 }, { "epoch": 7.27, "grad_norm": 3.035038948059082, "learning_rate": 1.380787830041606e-06, "loss": 0.0192, "step": 739275 }, { "epoch": 7.27, "grad_norm": 3.0806729793548584, "learning_rate": 1.3806637075873576e-06, "loss": 0.0538, "step": 739300 }, { "epoch": 7.27, "grad_norm": 0.04610178992152214, "learning_rate": 1.3805395851331092e-06, "loss": 0.0115, "step": 739325 }, { "epoch": 7.27, "grad_norm": 2.2412750720977783, "learning_rate": 1.3804154626788604e-06, "loss": 0.0564, "step": 739350 }, { "epoch": 7.27, "grad_norm": 2.7083706855773926, "learning_rate": 1.380291340224612e-06, "loss": 0.0111, "step": 739375 }, { "epoch": 7.27, "grad_norm": 2.4471120834350586, "learning_rate": 1.3801672177703637e-06, "loss": 0.0445, "step": 739400 }, { "epoch": 7.27, "grad_norm": 11.045971870422363, "learning_rate": 1.380043095316115e-06, "loss": 0.0215, "step": 739425 }, { "epoch": 7.27, "grad_norm": 3.92741322517395, "learning_rate": 1.3799189728618667e-06, "loss": 0.0297, "step": 739450 }, { "epoch": 7.27, "grad_norm": 0.2467092126607895, "learning_rate": 1.3797948504076184e-06, "loss": 0.0161, "step": 739475 }, { "epoch": 7.27, "grad_norm": 4.420871257781982, "learning_rate": 1.3796707279533698e-06, "loss": 0.0569, "step": 739500 }, { "epoch": 7.27, "grad_norm": 0.23262107372283936, "learning_rate": 1.3795466054991214e-06, "loss": 0.0212, "step": 739525 }, { "epoch": 7.27, "grad_norm": 3.1104068756103516, "learning_rate": 1.379422483044873e-06, "loss": 0.072, "step": 739550 }, { "epoch": 7.27, "grad_norm": 6.638644695281982, "learning_rate": 1.3792983605906243e-06, "loss": 0.0265, "step": 739575 }, { "epoch": 7.27, "grad_norm": 5.199542999267578, "learning_rate": 1.379174238136376e-06, "loss": 0.0483, "step": 739600 }, { "epoch": 7.27, "grad_norm": 0.1962272822856903, "learning_rate": 1.3790501156821275e-06, "loss": 0.0212, "step": 739625 }, { "epoch": 7.27, "grad_norm": 4.032607078552246, "learning_rate": 1.378925993227879e-06, "loss": 0.0604, "step": 739650 }, { "epoch": 7.27, "grad_norm": 6.791686058044434, "learning_rate": 1.3788018707736306e-06, "loss": 0.0315, "step": 739675 }, { "epoch": 7.27, "grad_norm": 3.5948965549468994, "learning_rate": 1.378677748319382e-06, "loss": 0.0681, "step": 739700 }, { "epoch": 7.27, "grad_norm": 0.4084629714488983, "learning_rate": 1.3785536258651337e-06, "loss": 0.0197, "step": 739725 }, { "epoch": 7.27, "grad_norm": 3.7645068168640137, "learning_rate": 1.3784295034108853e-06, "loss": 0.0444, "step": 739750 }, { "epoch": 7.27, "grad_norm": 6.587662220001221, "learning_rate": 1.3783053809566365e-06, "loss": 0.0227, "step": 739775 }, { "epoch": 7.27, "grad_norm": 2.8935327529907227, "learning_rate": 1.3781812585023881e-06, "loss": 0.0533, "step": 739800 }, { "epoch": 7.27, "grad_norm": 1.9001166820526123, "learning_rate": 1.3780571360481398e-06, "loss": 0.0288, "step": 739825 }, { "epoch": 7.27, "grad_norm": 3.2153549194335938, "learning_rate": 1.3779330135938912e-06, "loss": 0.0443, "step": 739850 }, { "epoch": 7.27, "grad_norm": 2.244957447052002, "learning_rate": 1.3778088911396428e-06, "loss": 0.0162, "step": 739875 }, { "epoch": 7.27, "grad_norm": 4.99764347076416, "learning_rate": 1.3776847686853945e-06, "loss": 0.0754, "step": 739900 }, { "epoch": 7.28, "grad_norm": 2.0708439350128174, "learning_rate": 1.3775606462311459e-06, "loss": 0.0238, "step": 739925 }, { "epoch": 7.28, "grad_norm": 3.8301665782928467, "learning_rate": 1.3774365237768975e-06, "loss": 0.0366, "step": 739950 }, { "epoch": 7.28, "grad_norm": 2.6038413047790527, "learning_rate": 1.3773124013226492e-06, "loss": 0.0176, "step": 739975 }, { "epoch": 7.28, "grad_norm": 2.8077049255371094, "learning_rate": 1.3771882788684004e-06, "loss": 0.0461, "step": 740000 }, { "epoch": 7.28, "eval_loss": 0.8640064597129822, "eval_runtime": 6114.1044, "eval_samples_per_second": 1.548, "eval_steps_per_second": 0.194, "eval_wer": 0.1130174467263246, "step": 740000 }, { "epoch": 7.28, "grad_norm": 0.7979934215545654, "learning_rate": 1.377064156414152e-06, "loss": 0.0138, "step": 740025 }, { "epoch": 7.28, "grad_norm": 3.5256545543670654, "learning_rate": 1.3769400339599036e-06, "loss": 0.0581, "step": 740050 }, { "epoch": 7.28, "grad_norm": 5.2745161056518555, "learning_rate": 1.376815911505655e-06, "loss": 0.0205, "step": 740075 }, { "epoch": 7.28, "grad_norm": 2.244373083114624, "learning_rate": 1.3766917890514067e-06, "loss": 0.0506, "step": 740100 }, { "epoch": 7.28, "grad_norm": 11.235641479492188, "learning_rate": 1.3765676665971581e-06, "loss": 0.0172, "step": 740125 }, { "epoch": 7.28, "grad_norm": 2.7655584812164307, "learning_rate": 1.3764435441429098e-06, "loss": 0.0792, "step": 740150 }, { "epoch": 7.28, "grad_norm": 0.04790419712662697, "learning_rate": 1.3763194216886614e-06, "loss": 0.0186, "step": 740175 }, { "epoch": 7.28, "grad_norm": 2.3852784633636475, "learning_rate": 1.3761952992344126e-06, "loss": 0.0634, "step": 740200 }, { "epoch": 7.28, "grad_norm": 0.4215558171272278, "learning_rate": 1.3760711767801642e-06, "loss": 0.0208, "step": 740225 }, { "epoch": 7.28, "grad_norm": 2.6353509426116943, "learning_rate": 1.3759470543259159e-06, "loss": 0.0715, "step": 740250 }, { "epoch": 7.28, "grad_norm": 7.6352386474609375, "learning_rate": 1.3758229318716673e-06, "loss": 0.0196, "step": 740275 }, { "epoch": 7.28, "grad_norm": 3.9900331497192383, "learning_rate": 1.375698809417419e-06, "loss": 0.0536, "step": 740300 }, { "epoch": 7.28, "grad_norm": 0.46362295746803284, "learning_rate": 1.3755746869631706e-06, "loss": 0.0231, "step": 740325 }, { "epoch": 7.28, "grad_norm": 2.0538854598999023, "learning_rate": 1.375450564508922e-06, "loss": 0.0649, "step": 740350 }, { "epoch": 7.28, "grad_norm": 1.3519704341888428, "learning_rate": 1.3753264420546736e-06, "loss": 0.0122, "step": 740375 }, { "epoch": 7.28, "grad_norm": 2.986842632293701, "learning_rate": 1.3752023196004253e-06, "loss": 0.048, "step": 740400 }, { "epoch": 7.28, "grad_norm": 0.9775909185409546, "learning_rate": 1.3750781971461765e-06, "loss": 0.0162, "step": 740425 }, { "epoch": 7.28, "grad_norm": 2.7613584995269775, "learning_rate": 1.3749540746919281e-06, "loss": 0.0644, "step": 740450 }, { "epoch": 7.28, "grad_norm": 6.517563343048096, "learning_rate": 1.3748299522376797e-06, "loss": 0.0168, "step": 740475 }, { "epoch": 7.28, "grad_norm": 4.200671672821045, "learning_rate": 1.3747058297834312e-06, "loss": 0.0537, "step": 740500 }, { "epoch": 7.28, "grad_norm": 0.391408771276474, "learning_rate": 1.3745817073291828e-06, "loss": 0.0154, "step": 740525 }, { "epoch": 7.28, "grad_norm": 4.302466869354248, "learning_rate": 1.3744575848749342e-06, "loss": 0.0565, "step": 740550 }, { "epoch": 7.28, "grad_norm": 1.235530138015747, "learning_rate": 1.3743334624206859e-06, "loss": 0.0125, "step": 740575 }, { "epoch": 7.28, "grad_norm": 3.978667974472046, "learning_rate": 1.3742093399664375e-06, "loss": 0.0403, "step": 740600 }, { "epoch": 7.28, "grad_norm": 5.241034507751465, "learning_rate": 1.374085217512189e-06, "loss": 0.0165, "step": 740625 }, { "epoch": 7.28, "grad_norm": 2.9290807247161865, "learning_rate": 1.3739610950579406e-06, "loss": 0.052, "step": 740650 }, { "epoch": 7.28, "grad_norm": 0.8024770617485046, "learning_rate": 1.3738369726036922e-06, "loss": 0.0178, "step": 740675 }, { "epoch": 7.28, "grad_norm": 3.9777660369873047, "learning_rate": 1.3737128501494434e-06, "loss": 0.0446, "step": 740700 }, { "epoch": 7.28, "grad_norm": 3.5867607593536377, "learning_rate": 1.373588727695195e-06, "loss": 0.0158, "step": 740725 }, { "epoch": 7.28, "grad_norm": 1.9702025651931763, "learning_rate": 1.3734646052409467e-06, "loss": 0.048, "step": 740750 }, { "epoch": 7.28, "grad_norm": 0.26803573966026306, "learning_rate": 1.373340482786698e-06, "loss": 0.0207, "step": 740775 }, { "epoch": 7.28, "grad_norm": 2.519103527069092, "learning_rate": 1.3732213252306195e-06, "loss": 0.0481, "step": 740800 }, { "epoch": 7.28, "grad_norm": 0.042546652257442474, "learning_rate": 1.3730972027763712e-06, "loss": 0.0234, "step": 740825 }, { "epoch": 7.28, "grad_norm": 4.556230068206787, "learning_rate": 1.3729730803221228e-06, "loss": 0.0608, "step": 740850 }, { "epoch": 7.28, "grad_norm": 1.1423399448394775, "learning_rate": 1.3728489578678742e-06, "loss": 0.0123, "step": 740875 }, { "epoch": 7.28, "grad_norm": 4.469028472900391, "learning_rate": 1.3727248354136259e-06, "loss": 0.0414, "step": 740900 }, { "epoch": 7.28, "grad_norm": 3.502302646636963, "learning_rate": 1.3726007129593775e-06, "loss": 0.0332, "step": 740925 }, { "epoch": 7.29, "grad_norm": 2.1496644020080566, "learning_rate": 1.3724765905051287e-06, "loss": 0.0697, "step": 740950 }, { "epoch": 7.29, "grad_norm": 6.363694190979004, "learning_rate": 1.3723524680508804e-06, "loss": 0.0281, "step": 740975 }, { "epoch": 7.29, "grad_norm": 2.067838668823242, "learning_rate": 1.372228345596632e-06, "loss": 0.0705, "step": 741000 }, { "epoch": 7.29, "grad_norm": 0.20149710774421692, "learning_rate": 1.3721042231423834e-06, "loss": 0.0126, "step": 741025 }, { "epoch": 7.29, "grad_norm": 2.919664144515991, "learning_rate": 1.371980100688135e-06, "loss": 0.0577, "step": 741050 }, { "epoch": 7.29, "grad_norm": 0.23964841663837433, "learning_rate": 1.3718559782338867e-06, "loss": 0.0168, "step": 741075 }, { "epoch": 7.29, "grad_norm": 3.562696933746338, "learning_rate": 1.371731855779638e-06, "loss": 0.0464, "step": 741100 }, { "epoch": 7.29, "grad_norm": 0.08837893605232239, "learning_rate": 1.3716077333253897e-06, "loss": 0.014, "step": 741125 }, { "epoch": 7.29, "grad_norm": 2.6166484355926514, "learning_rate": 1.371483610871141e-06, "loss": 0.0498, "step": 741150 }, { "epoch": 7.29, "grad_norm": 6.6456379890441895, "learning_rate": 1.3713594884168926e-06, "loss": 0.0136, "step": 741175 }, { "epoch": 7.29, "grad_norm": 12.244410514831543, "learning_rate": 1.3712353659626442e-06, "loss": 0.0826, "step": 741200 }, { "epoch": 7.29, "grad_norm": 0.08095160126686096, "learning_rate": 1.3711112435083956e-06, "loss": 0.0231, "step": 741225 }, { "epoch": 7.29, "grad_norm": 3.2255282402038574, "learning_rate": 1.3709871210541473e-06, "loss": 0.0591, "step": 741250 }, { "epoch": 7.29, "grad_norm": 1.3600090742111206, "learning_rate": 1.370862998599899e-06, "loss": 0.0199, "step": 741275 }, { "epoch": 7.29, "grad_norm": 2.648059368133545, "learning_rate": 1.3707388761456503e-06, "loss": 0.0515, "step": 741300 }, { "epoch": 7.29, "grad_norm": 0.6904423832893372, "learning_rate": 1.370614753691402e-06, "loss": 0.0155, "step": 741325 }, { "epoch": 7.29, "grad_norm": 3.232187032699585, "learning_rate": 1.3704906312371536e-06, "loss": 0.0496, "step": 741350 }, { "epoch": 7.29, "grad_norm": 0.14626283943653107, "learning_rate": 1.3703665087829048e-06, "loss": 0.0116, "step": 741375 }, { "epoch": 7.29, "grad_norm": 1.8615823984146118, "learning_rate": 1.3702423863286565e-06, "loss": 0.0534, "step": 741400 }, { "epoch": 7.29, "grad_norm": 8.30064868927002, "learning_rate": 1.370118263874408e-06, "loss": 0.0219, "step": 741425 }, { "epoch": 7.29, "grad_norm": 1.7493059635162354, "learning_rate": 1.3699941414201595e-06, "loss": 0.0704, "step": 741450 }, { "epoch": 7.29, "grad_norm": 10.616743087768555, "learning_rate": 1.3698700189659111e-06, "loss": 0.0265, "step": 741475 }, { "epoch": 7.29, "grad_norm": 2.5886545181274414, "learning_rate": 1.3697458965116628e-06, "loss": 0.0499, "step": 741500 }, { "epoch": 7.29, "grad_norm": 9.492199897766113, "learning_rate": 1.3696217740574142e-06, "loss": 0.0272, "step": 741525 }, { "epoch": 7.29, "grad_norm": 2.9805681705474854, "learning_rate": 1.3694976516031658e-06, "loss": 0.0356, "step": 741550 }, { "epoch": 7.29, "grad_norm": 0.08596663177013397, "learning_rate": 1.369373529148917e-06, "loss": 0.0195, "step": 741575 }, { "epoch": 7.29, "grad_norm": 1.9986858367919922, "learning_rate": 1.3692494066946687e-06, "loss": 0.0816, "step": 741600 }, { "epoch": 7.29, "grad_norm": 0.20713303983211517, "learning_rate": 1.3691252842404203e-06, "loss": 0.0155, "step": 741625 }, { "epoch": 7.29, "grad_norm": 4.00063943862915, "learning_rate": 1.3690011617861717e-06, "loss": 0.0729, "step": 741650 }, { "epoch": 7.29, "grad_norm": 0.0967787429690361, "learning_rate": 1.3688770393319234e-06, "loss": 0.0195, "step": 741675 }, { "epoch": 7.29, "grad_norm": 4.236558437347412, "learning_rate": 1.368752916877675e-06, "loss": 0.0728, "step": 741700 }, { "epoch": 7.29, "grad_norm": 3.1147525310516357, "learning_rate": 1.3686287944234264e-06, "loss": 0.0178, "step": 741725 }, { "epoch": 7.29, "grad_norm": 5.057094573974609, "learning_rate": 1.368504671969178e-06, "loss": 0.0619, "step": 741750 }, { "epoch": 7.29, "grad_norm": 2.3476502895355225, "learning_rate": 1.3683805495149297e-06, "loss": 0.0253, "step": 741775 }, { "epoch": 7.29, "grad_norm": 4.183058261871338, "learning_rate": 1.368256427060681e-06, "loss": 0.0601, "step": 741800 }, { "epoch": 7.29, "grad_norm": 3.61112117767334, "learning_rate": 1.3681323046064326e-06, "loss": 0.0151, "step": 741825 }, { "epoch": 7.29, "grad_norm": 3.5897819995880127, "learning_rate": 1.3680081821521842e-06, "loss": 0.0565, "step": 741850 }, { "epoch": 7.29, "grad_norm": 5.55668306350708, "learning_rate": 1.3678840596979356e-06, "loss": 0.0197, "step": 741875 }, { "epoch": 7.29, "grad_norm": 2.7168784141540527, "learning_rate": 1.3677599372436872e-06, "loss": 0.0742, "step": 741900 }, { "epoch": 7.29, "grad_norm": 2.542235851287842, "learning_rate": 1.3676358147894389e-06, "loss": 0.024, "step": 741925 }, { "epoch": 7.29, "grad_norm": 2.1236565113067627, "learning_rate": 1.3675116923351903e-06, "loss": 0.0738, "step": 741950 }, { "epoch": 7.3, "grad_norm": 2.344806671142578, "learning_rate": 1.367387569880942e-06, "loss": 0.0169, "step": 741975 }, { "epoch": 7.3, "grad_norm": 3.009829521179199, "learning_rate": 1.3672634474266934e-06, "loss": 0.0694, "step": 742000 }, { "epoch": 7.3, "grad_norm": 1.8600988388061523, "learning_rate": 1.367139324972445e-06, "loss": 0.0186, "step": 742025 }, { "epoch": 7.3, "grad_norm": 1.6627781391143799, "learning_rate": 1.3670152025181966e-06, "loss": 0.0539, "step": 742050 }, { "epoch": 7.3, "grad_norm": 1.8279976844787598, "learning_rate": 1.3668910800639478e-06, "loss": 0.0111, "step": 742075 }, { "epoch": 7.3, "grad_norm": 2.5541486740112305, "learning_rate": 1.3667669576096995e-06, "loss": 0.0492, "step": 742100 }, { "epoch": 7.3, "grad_norm": 0.32737836241722107, "learning_rate": 1.3666428351554511e-06, "loss": 0.023, "step": 742125 }, { "epoch": 7.3, "grad_norm": 1.5052473545074463, "learning_rate": 1.3665187127012025e-06, "loss": 0.0458, "step": 742150 }, { "epoch": 7.3, "grad_norm": 0.9020377397537231, "learning_rate": 1.3663945902469542e-06, "loss": 0.0318, "step": 742175 }, { "epoch": 7.3, "grad_norm": 4.722795009613037, "learning_rate": 1.3662704677927058e-06, "loss": 0.0472, "step": 742200 }, { "epoch": 7.3, "grad_norm": 0.28847578167915344, "learning_rate": 1.3661463453384572e-06, "loss": 0.0191, "step": 742225 }, { "epoch": 7.3, "grad_norm": 2.142857551574707, "learning_rate": 1.3660222228842089e-06, "loss": 0.0708, "step": 742250 }, { "epoch": 7.3, "grad_norm": 2.011981964111328, "learning_rate": 1.3658981004299605e-06, "loss": 0.0178, "step": 742275 }, { "epoch": 7.3, "grad_norm": 4.022722244262695, "learning_rate": 1.3657739779757117e-06, "loss": 0.0546, "step": 742300 }, { "epoch": 7.3, "grad_norm": 0.11748038232326508, "learning_rate": 1.3656498555214633e-06, "loss": 0.0206, "step": 742325 }, { "epoch": 7.3, "grad_norm": 2.538694143295288, "learning_rate": 1.365525733067215e-06, "loss": 0.0428, "step": 742350 }, { "epoch": 7.3, "grad_norm": 15.044466018676758, "learning_rate": 1.3654016106129664e-06, "loss": 0.0176, "step": 742375 }, { "epoch": 7.3, "grad_norm": 2.624211311340332, "learning_rate": 1.365277488158718e-06, "loss": 0.0475, "step": 742400 }, { "epoch": 7.3, "grad_norm": 2.5663089752197266, "learning_rate": 1.3651533657044695e-06, "loss": 0.0103, "step": 742425 }, { "epoch": 7.3, "grad_norm": 2.9158122539520264, "learning_rate": 1.365029243250221e-06, "loss": 0.0541, "step": 742450 }, { "epoch": 7.3, "grad_norm": 0.3644790053367615, "learning_rate": 1.3649051207959727e-06, "loss": 0.0254, "step": 742475 }, { "epoch": 7.3, "grad_norm": 3.6199162006378174, "learning_rate": 1.364780998341724e-06, "loss": 0.0482, "step": 742500 }, { "epoch": 7.3, "grad_norm": 4.981985569000244, "learning_rate": 1.3646568758874756e-06, "loss": 0.0173, "step": 742525 }, { "epoch": 7.3, "grad_norm": 4.2589569091796875, "learning_rate": 1.3645327534332272e-06, "loss": 0.0705, "step": 742550 }, { "epoch": 7.3, "grad_norm": 3.26114821434021, "learning_rate": 1.3644086309789786e-06, "loss": 0.0189, "step": 742575 }, { "epoch": 7.3, "grad_norm": 2.8026652336120605, "learning_rate": 1.3642845085247303e-06, "loss": 0.0513, "step": 742600 }, { "epoch": 7.3, "grad_norm": 0.23848208785057068, "learning_rate": 1.364160386070482e-06, "loss": 0.0201, "step": 742625 }, { "epoch": 7.3, "grad_norm": 3.6470866203308105, "learning_rate": 1.3640362636162333e-06, "loss": 0.064, "step": 742650 }, { "epoch": 7.3, "grad_norm": 11.986994743347168, "learning_rate": 1.363912141161985e-06, "loss": 0.0245, "step": 742675 }, { "epoch": 7.3, "grad_norm": 3.191542625427246, "learning_rate": 1.3637880187077366e-06, "loss": 0.0639, "step": 742700 }, { "epoch": 7.3, "grad_norm": 1.5480589866638184, "learning_rate": 1.3636638962534878e-06, "loss": 0.0078, "step": 742725 }, { "epoch": 7.3, "grad_norm": 3.0189523696899414, "learning_rate": 1.3635397737992394e-06, "loss": 0.0658, "step": 742750 }, { "epoch": 7.3, "grad_norm": 7.548810958862305, "learning_rate": 1.363415651344991e-06, "loss": 0.0167, "step": 742775 }, { "epoch": 7.3, "grad_norm": 4.1821136474609375, "learning_rate": 1.3632915288907425e-06, "loss": 0.0634, "step": 742800 }, { "epoch": 7.3, "grad_norm": 4.8105268478393555, "learning_rate": 1.3631674064364941e-06, "loss": 0.0209, "step": 742825 }, { "epoch": 7.3, "grad_norm": 3.2303779125213623, "learning_rate": 1.3630482488804156e-06, "loss": 0.0691, "step": 742850 }, { "epoch": 7.3, "grad_norm": 2.0558128356933594, "learning_rate": 1.3629241264261672e-06, "loss": 0.0203, "step": 742875 }, { "epoch": 7.3, "grad_norm": 3.747849225997925, "learning_rate": 1.3628000039719186e-06, "loss": 0.0466, "step": 742900 }, { "epoch": 7.3, "grad_norm": 7.941615104675293, "learning_rate": 1.3626758815176703e-06, "loss": 0.0286, "step": 742925 }, { "epoch": 7.3, "grad_norm": 3.697519302368164, "learning_rate": 1.362551759063422e-06, "loss": 0.0655, "step": 742950 }, { "epoch": 7.31, "grad_norm": 2.4305288791656494, "learning_rate": 1.3624276366091731e-06, "loss": 0.0315, "step": 742975 }, { "epoch": 7.31, "grad_norm": 3.099811553955078, "learning_rate": 1.3623035141549248e-06, "loss": 0.0603, "step": 743000 }, { "epoch": 7.31, "grad_norm": 2.7044241428375244, "learning_rate": 1.3621793917006762e-06, "loss": 0.013, "step": 743025 }, { "epoch": 7.31, "grad_norm": 3.9605095386505127, "learning_rate": 1.3620552692464278e-06, "loss": 0.0544, "step": 743050 }, { "epoch": 7.31, "grad_norm": 1.689751148223877, "learning_rate": 1.3619311467921795e-06, "loss": 0.0299, "step": 743075 }, { "epoch": 7.31, "grad_norm": 4.149684429168701, "learning_rate": 1.3618070243379309e-06, "loss": 0.0855, "step": 743100 }, { "epoch": 7.31, "grad_norm": 24.269973754882812, "learning_rate": 1.3616829018836825e-06, "loss": 0.0213, "step": 743125 }, { "epoch": 7.31, "grad_norm": 3.2837092876434326, "learning_rate": 1.3615587794294342e-06, "loss": 0.0614, "step": 743150 }, { "epoch": 7.31, "grad_norm": 6.982760906219482, "learning_rate": 1.3614346569751854e-06, "loss": 0.0196, "step": 743175 }, { "epoch": 7.31, "grad_norm": 3.9672861099243164, "learning_rate": 1.361310534520937e-06, "loss": 0.0835, "step": 743200 }, { "epoch": 7.31, "grad_norm": 3.9079976081848145, "learning_rate": 1.3611864120666886e-06, "loss": 0.0127, "step": 743225 }, { "epoch": 7.31, "grad_norm": 2.767380475997925, "learning_rate": 1.36106228961244e-06, "loss": 0.0605, "step": 743250 }, { "epoch": 7.31, "grad_norm": 3.4444634914398193, "learning_rate": 1.3609381671581917e-06, "loss": 0.0225, "step": 743275 }, { "epoch": 7.31, "grad_norm": 8.635196685791016, "learning_rate": 1.3608140447039433e-06, "loss": 0.0418, "step": 743300 }, { "epoch": 7.31, "grad_norm": 3.26638126373291, "learning_rate": 1.3606899222496947e-06, "loss": 0.0273, "step": 743325 }, { "epoch": 7.31, "grad_norm": 2.774343252182007, "learning_rate": 1.3605657997954464e-06, "loss": 0.05, "step": 743350 }, { "epoch": 7.31, "grad_norm": 0.15461526811122894, "learning_rate": 1.360441677341198e-06, "loss": 0.0267, "step": 743375 }, { "epoch": 7.31, "grad_norm": 4.313994884490967, "learning_rate": 1.3603175548869492e-06, "loss": 0.0457, "step": 743400 }, { "epoch": 7.31, "grad_norm": 0.1686093956232071, "learning_rate": 1.3601934324327009e-06, "loss": 0.0109, "step": 743425 }, { "epoch": 7.31, "grad_norm": 2.328623056411743, "learning_rate": 1.3600693099784523e-06, "loss": 0.0434, "step": 743450 }, { "epoch": 7.31, "grad_norm": 0.9418385624885559, "learning_rate": 1.359945187524204e-06, "loss": 0.0282, "step": 743475 }, { "epoch": 7.31, "grad_norm": 3.9918394088745117, "learning_rate": 1.3598210650699556e-06, "loss": 0.0558, "step": 743500 }, { "epoch": 7.31, "grad_norm": 1.8858790397644043, "learning_rate": 1.359696942615707e-06, "loss": 0.029, "step": 743525 }, { "epoch": 7.31, "grad_norm": 2.609116792678833, "learning_rate": 1.3595728201614586e-06, "loss": 0.0824, "step": 743550 }, { "epoch": 7.31, "grad_norm": 5.410900592803955, "learning_rate": 1.3594486977072103e-06, "loss": 0.0202, "step": 743575 }, { "epoch": 7.31, "grad_norm": 3.1107699871063232, "learning_rate": 1.3593245752529617e-06, "loss": 0.0675, "step": 743600 }, { "epoch": 7.31, "grad_norm": 3.3030714988708496, "learning_rate": 1.3592004527987133e-06, "loss": 0.0266, "step": 743625 }, { "epoch": 7.31, "grad_norm": 2.865290880203247, "learning_rate": 1.359076330344465e-06, "loss": 0.0573, "step": 743650 }, { "epoch": 7.31, "grad_norm": 0.3701131045818329, "learning_rate": 1.3589522078902162e-06, "loss": 0.0256, "step": 743675 }, { "epoch": 7.31, "grad_norm": 3.5571553707122803, "learning_rate": 1.3588280854359678e-06, "loss": 0.0566, "step": 743700 }, { "epoch": 7.31, "grad_norm": 2.9537923336029053, "learning_rate": 1.3587039629817194e-06, "loss": 0.0174, "step": 743725 }, { "epoch": 7.31, "grad_norm": 30.562681198120117, "learning_rate": 1.3585798405274709e-06, "loss": 0.032, "step": 743750 }, { "epoch": 7.31, "grad_norm": 6.891330718994141, "learning_rate": 1.3584557180732225e-06, "loss": 0.0282, "step": 743775 }, { "epoch": 7.31, "grad_norm": 2.9564454555511475, "learning_rate": 1.3583315956189741e-06, "loss": 0.0648, "step": 743800 }, { "epoch": 7.31, "grad_norm": 1.0512722730636597, "learning_rate": 1.3582074731647255e-06, "loss": 0.0144, "step": 743825 }, { "epoch": 7.31, "grad_norm": 2.88464093208313, "learning_rate": 1.3580833507104772e-06, "loss": 0.0512, "step": 743850 }, { "epoch": 7.31, "grad_norm": 0.0864543616771698, "learning_rate": 1.3579592282562284e-06, "loss": 0.0099, "step": 743875 }, { "epoch": 7.31, "grad_norm": 2.142832040786743, "learning_rate": 1.35783510580198e-06, "loss": 0.0739, "step": 743900 }, { "epoch": 7.31, "grad_norm": 4.483187675476074, "learning_rate": 1.3577109833477317e-06, "loss": 0.0215, "step": 743925 }, { "epoch": 7.31, "grad_norm": 1.9589366912841797, "learning_rate": 1.357586860893483e-06, "loss": 0.0513, "step": 743950 }, { "epoch": 7.31, "grad_norm": 0.21262192726135254, "learning_rate": 1.3574627384392347e-06, "loss": 0.0138, "step": 743975 }, { "epoch": 7.32, "grad_norm": 2.653197765350342, "learning_rate": 1.3573386159849864e-06, "loss": 0.0587, "step": 744000 }, { "epoch": 7.32, "grad_norm": 6.2628984451293945, "learning_rate": 1.3572144935307378e-06, "loss": 0.0183, "step": 744025 }, { "epoch": 7.32, "grad_norm": 3.759561777114868, "learning_rate": 1.3570903710764894e-06, "loss": 0.0451, "step": 744050 }, { "epoch": 7.32, "grad_norm": 5.809438228607178, "learning_rate": 1.356966248622241e-06, "loss": 0.0206, "step": 744075 }, { "epoch": 7.32, "grad_norm": 3.4773621559143066, "learning_rate": 1.3568421261679923e-06, "loss": 0.0748, "step": 744100 }, { "epoch": 7.32, "grad_norm": 10.29971694946289, "learning_rate": 1.3567180037137439e-06, "loss": 0.0348, "step": 744125 }, { "epoch": 7.32, "grad_norm": 3.549764633178711, "learning_rate": 1.3565938812594955e-06, "loss": 0.0628, "step": 744150 }, { "epoch": 7.32, "grad_norm": 10.057442665100098, "learning_rate": 1.356469758805247e-06, "loss": 0.0167, "step": 744175 }, { "epoch": 7.32, "grad_norm": 2.7821478843688965, "learning_rate": 1.3563456363509986e-06, "loss": 0.0609, "step": 744200 }, { "epoch": 7.32, "grad_norm": 0.19395111501216888, "learning_rate": 1.3562215138967502e-06, "loss": 0.019, "step": 744225 }, { "epoch": 7.32, "grad_norm": 5.2212934494018555, "learning_rate": 1.3560973914425016e-06, "loss": 0.0567, "step": 744250 }, { "epoch": 7.32, "grad_norm": 11.141135215759277, "learning_rate": 1.3559732689882533e-06, "loss": 0.0236, "step": 744275 }, { "epoch": 7.32, "grad_norm": 2.154153347015381, "learning_rate": 1.3558491465340045e-06, "loss": 0.0502, "step": 744300 }, { "epoch": 7.32, "grad_norm": 5.764591217041016, "learning_rate": 1.3557250240797561e-06, "loss": 0.0299, "step": 744325 }, { "epoch": 7.32, "grad_norm": 4.381890773773193, "learning_rate": 1.3556009016255078e-06, "loss": 0.0703, "step": 744350 }, { "epoch": 7.32, "grad_norm": 0.07331311702728271, "learning_rate": 1.3554767791712592e-06, "loss": 0.015, "step": 744375 }, { "epoch": 7.32, "grad_norm": 2.2695608139038086, "learning_rate": 1.3553526567170108e-06, "loss": 0.0465, "step": 744400 }, { "epoch": 7.32, "grad_norm": 1.9422671794891357, "learning_rate": 1.3552285342627625e-06, "loss": 0.0232, "step": 744425 }, { "epoch": 7.32, "grad_norm": 3.134028673171997, "learning_rate": 1.3551044118085139e-06, "loss": 0.0543, "step": 744450 }, { "epoch": 7.32, "grad_norm": 0.627178966999054, "learning_rate": 1.3549802893542655e-06, "loss": 0.0161, "step": 744475 }, { "epoch": 7.32, "grad_norm": 2.471761465072632, "learning_rate": 1.3548561669000171e-06, "loss": 0.0758, "step": 744500 }, { "epoch": 7.32, "grad_norm": 5.374696731567383, "learning_rate": 1.3547320444457684e-06, "loss": 0.0181, "step": 744525 }, { "epoch": 7.32, "grad_norm": 1.1854196786880493, "learning_rate": 1.35460792199152e-06, "loss": 0.0396, "step": 744550 }, { "epoch": 7.32, "grad_norm": 0.09273849427700043, "learning_rate": 1.3544837995372716e-06, "loss": 0.0167, "step": 744575 }, { "epoch": 7.32, "grad_norm": 2.3009300231933594, "learning_rate": 1.354359677083023e-06, "loss": 0.0597, "step": 744600 }, { "epoch": 7.32, "grad_norm": 4.683050155639648, "learning_rate": 1.3542355546287747e-06, "loss": 0.0235, "step": 744625 }, { "epoch": 7.32, "grad_norm": 2.7420570850372314, "learning_rate": 1.3541114321745263e-06, "loss": 0.0613, "step": 744650 }, { "epoch": 7.32, "grad_norm": 0.7368829250335693, "learning_rate": 1.3539873097202777e-06, "loss": 0.0244, "step": 744675 }, { "epoch": 7.32, "grad_norm": 3.837301254272461, "learning_rate": 1.3538631872660294e-06, "loss": 0.0614, "step": 744700 }, { "epoch": 7.32, "grad_norm": 0.2338525503873825, "learning_rate": 1.3537390648117806e-06, "loss": 0.0155, "step": 744725 }, { "epoch": 7.32, "grad_norm": 3.5336875915527344, "learning_rate": 1.3536149423575322e-06, "loss": 0.0471, "step": 744750 }, { "epoch": 7.32, "grad_norm": 1.9907397031784058, "learning_rate": 1.3534908199032839e-06, "loss": 0.0315, "step": 744775 }, { "epoch": 7.32, "grad_norm": 2.0144660472869873, "learning_rate": 1.3533666974490353e-06, "loss": 0.0458, "step": 744800 }, { "epoch": 7.32, "grad_norm": 0.7838648557662964, "learning_rate": 1.353242574994787e-06, "loss": 0.0183, "step": 744825 }, { "epoch": 7.32, "grad_norm": 3.997267007827759, "learning_rate": 1.3531184525405386e-06, "loss": 0.0344, "step": 744850 }, { "epoch": 7.32, "grad_norm": 0.5112001299858093, "learning_rate": 1.35299433008629e-06, "loss": 0.0217, "step": 744875 }, { "epoch": 7.32, "grad_norm": 1.522479772567749, "learning_rate": 1.3528702076320416e-06, "loss": 0.05, "step": 744900 }, { "epoch": 7.32, "grad_norm": 0.1674254685640335, "learning_rate": 1.3527460851777932e-06, "loss": 0.017, "step": 744925 }, { "epoch": 7.32, "grad_norm": 3.9222872257232666, "learning_rate": 1.3526219627235447e-06, "loss": 0.0726, "step": 744950 }, { "epoch": 7.32, "grad_norm": 0.015137615613639355, "learning_rate": 1.3524978402692963e-06, "loss": 0.016, "step": 744975 }, { "epoch": 7.32, "grad_norm": 3.3716025352478027, "learning_rate": 1.352373717815048e-06, "loss": 0.0639, "step": 745000 }, { "epoch": 7.33, "grad_norm": 0.1331941783428192, "learning_rate": 1.3522495953607992e-06, "loss": 0.011, "step": 745025 }, { "epoch": 7.33, "grad_norm": 3.807056427001953, "learning_rate": 1.3521254729065508e-06, "loss": 0.037, "step": 745050 }, { "epoch": 7.33, "grad_norm": 8.278400421142578, "learning_rate": 1.3520013504523024e-06, "loss": 0.0236, "step": 745075 }, { "epoch": 7.33, "grad_norm": 3.54097318649292, "learning_rate": 1.3518772279980538e-06, "loss": 0.0537, "step": 745100 }, { "epoch": 7.33, "grad_norm": 0.12186812609434128, "learning_rate": 1.3517531055438055e-06, "loss": 0.0165, "step": 745125 }, { "epoch": 7.33, "grad_norm": 5.726783752441406, "learning_rate": 1.351633947987727e-06, "loss": 0.0417, "step": 745150 }, { "epoch": 7.33, "grad_norm": 4.930649280548096, "learning_rate": 1.3515098255334786e-06, "loss": 0.0195, "step": 745175 }, { "epoch": 7.33, "grad_norm": 3.28857421875, "learning_rate": 1.35138570307923e-06, "loss": 0.0632, "step": 745200 }, { "epoch": 7.33, "grad_norm": 3.99056077003479, "learning_rate": 1.3512615806249816e-06, "loss": 0.0106, "step": 745225 }, { "epoch": 7.33, "grad_norm": 2.9577670097351074, "learning_rate": 1.3511374581707333e-06, "loss": 0.0716, "step": 745250 }, { "epoch": 7.33, "grad_norm": 8.80903434753418, "learning_rate": 1.3510133357164845e-06, "loss": 0.0207, "step": 745275 }, { "epoch": 7.33, "grad_norm": 1.8867521286010742, "learning_rate": 1.350889213262236e-06, "loss": 0.0518, "step": 745300 }, { "epoch": 7.33, "grad_norm": 6.731518745422363, "learning_rate": 1.3507650908079875e-06, "loss": 0.0355, "step": 745325 }, { "epoch": 7.33, "grad_norm": 3.9335670471191406, "learning_rate": 1.3506409683537392e-06, "loss": 0.0595, "step": 745350 }, { "epoch": 7.33, "grad_norm": 2.282440185546875, "learning_rate": 1.3505168458994908e-06, "loss": 0.0204, "step": 745375 }, { "epoch": 7.33, "grad_norm": 3.734282970428467, "learning_rate": 1.3503927234452422e-06, "loss": 0.0597, "step": 745400 }, { "epoch": 7.33, "grad_norm": 2.551098346710205, "learning_rate": 1.3502686009909939e-06, "loss": 0.0248, "step": 745425 }, { "epoch": 7.33, "grad_norm": 4.061654090881348, "learning_rate": 1.3501444785367455e-06, "loss": 0.0402, "step": 745450 }, { "epoch": 7.33, "grad_norm": 0.46332570910453796, "learning_rate": 1.3500203560824967e-06, "loss": 0.0246, "step": 745475 }, { "epoch": 7.33, "grad_norm": 3.112417221069336, "learning_rate": 1.3498962336282483e-06, "loss": 0.0488, "step": 745500 }, { "epoch": 7.33, "grad_norm": 1.3260955810546875, "learning_rate": 1.349772111174e-06, "loss": 0.023, "step": 745525 }, { "epoch": 7.33, "grad_norm": 9.664581298828125, "learning_rate": 1.3496479887197514e-06, "loss": 0.04, "step": 745550 }, { "epoch": 7.33, "grad_norm": 7.728385925292969, "learning_rate": 1.349523866265503e-06, "loss": 0.0203, "step": 745575 }, { "epoch": 7.33, "grad_norm": 2.7968506813049316, "learning_rate": 1.3493997438112547e-06, "loss": 0.0651, "step": 745600 }, { "epoch": 7.33, "grad_norm": 0.08949783444404602, "learning_rate": 1.349275621357006e-06, "loss": 0.0126, "step": 745625 }, { "epoch": 7.33, "grad_norm": 3.244798183441162, "learning_rate": 1.3491514989027577e-06, "loss": 0.0604, "step": 745650 }, { "epoch": 7.33, "grad_norm": 3.166959524154663, "learning_rate": 1.3490273764485094e-06, "loss": 0.012, "step": 745675 }, { "epoch": 7.33, "grad_norm": 3.5821733474731445, "learning_rate": 1.3489032539942606e-06, "loss": 0.0572, "step": 745700 }, { "epoch": 7.33, "grad_norm": 2.577791213989258, "learning_rate": 1.3487791315400122e-06, "loss": 0.0113, "step": 745725 }, { "epoch": 7.33, "grad_norm": 3.901756763458252, "learning_rate": 1.3486550090857636e-06, "loss": 0.084, "step": 745750 }, { "epoch": 7.33, "grad_norm": 7.545411586761475, "learning_rate": 1.3485308866315153e-06, "loss": 0.0151, "step": 745775 }, { "epoch": 7.33, "grad_norm": 3.101270914077759, "learning_rate": 1.348406764177267e-06, "loss": 0.0772, "step": 745800 }, { "epoch": 7.33, "grad_norm": 2.1782970428466797, "learning_rate": 1.3482826417230183e-06, "loss": 0.0244, "step": 745825 }, { "epoch": 7.33, "grad_norm": 4.6959333419799805, "learning_rate": 1.34815851926877e-06, "loss": 0.0498, "step": 745850 }, { "epoch": 7.33, "grad_norm": 1.7142555713653564, "learning_rate": 1.3480343968145216e-06, "loss": 0.0176, "step": 745875 }, { "epoch": 7.33, "grad_norm": 2.836258888244629, "learning_rate": 1.3479102743602728e-06, "loss": 0.0584, "step": 745900 }, { "epoch": 7.33, "grad_norm": 2.187591075897217, "learning_rate": 1.3477861519060244e-06, "loss": 0.0135, "step": 745925 }, { "epoch": 7.33, "grad_norm": 3.2185006141662598, "learning_rate": 1.347662029451776e-06, "loss": 0.0558, "step": 745950 }, { "epoch": 7.33, "grad_norm": 3.4104228019714355, "learning_rate": 1.3475379069975275e-06, "loss": 0.0131, "step": 745975 }, { "epoch": 7.33, "grad_norm": 2.581470489501953, "learning_rate": 1.3474137845432791e-06, "loss": 0.0467, "step": 746000 }, { "epoch": 7.34, "grad_norm": 1.408098578453064, "learning_rate": 1.3472896620890308e-06, "loss": 0.0166, "step": 746025 }, { "epoch": 7.34, "grad_norm": 2.8577024936676025, "learning_rate": 1.3471655396347822e-06, "loss": 0.0375, "step": 746050 }, { "epoch": 7.34, "grad_norm": 1.7661808729171753, "learning_rate": 1.3470414171805338e-06, "loss": 0.0124, "step": 746075 }, { "epoch": 7.34, "grad_norm": 2.6309995651245117, "learning_rate": 1.3469172947262855e-06, "loss": 0.0626, "step": 746100 }, { "epoch": 7.34, "grad_norm": 4.6784138679504395, "learning_rate": 1.3467931722720367e-06, "loss": 0.0155, "step": 746125 }, { "epoch": 7.34, "grad_norm": 2.673642873764038, "learning_rate": 1.3466690498177883e-06, "loss": 0.0425, "step": 746150 }, { "epoch": 7.34, "grad_norm": 0.5361775159835815, "learning_rate": 1.3465449273635397e-06, "loss": 0.0255, "step": 746175 }, { "epoch": 7.34, "grad_norm": 1.6111334562301636, "learning_rate": 1.3464208049092914e-06, "loss": 0.0644, "step": 746200 }, { "epoch": 7.34, "grad_norm": 3.6560823917388916, "learning_rate": 1.346296682455043e-06, "loss": 0.0139, "step": 746225 }, { "epoch": 7.34, "grad_norm": 2.8021819591522217, "learning_rate": 1.3461725600007944e-06, "loss": 0.0505, "step": 746250 }, { "epoch": 7.34, "grad_norm": 3.216521739959717, "learning_rate": 1.346048437546546e-06, "loss": 0.0257, "step": 746275 }, { "epoch": 7.34, "grad_norm": 3.3827481269836426, "learning_rate": 1.3459243150922977e-06, "loss": 0.0275, "step": 746300 }, { "epoch": 7.34, "grad_norm": 0.2220364660024643, "learning_rate": 1.345800192638049e-06, "loss": 0.0242, "step": 746325 }, { "epoch": 7.34, "grad_norm": 4.369443893432617, "learning_rate": 1.3456760701838005e-06, "loss": 0.0702, "step": 746350 }, { "epoch": 7.34, "grad_norm": 4.867763519287109, "learning_rate": 1.3455519477295522e-06, "loss": 0.0108, "step": 746375 }, { "epoch": 7.34, "grad_norm": 2.413283348083496, "learning_rate": 1.3454278252753036e-06, "loss": 0.0569, "step": 746400 }, { "epoch": 7.34, "grad_norm": 0.04445096105337143, "learning_rate": 1.3453037028210552e-06, "loss": 0.0247, "step": 746425 }, { "epoch": 7.34, "grad_norm": 3.0199944972991943, "learning_rate": 1.3451795803668069e-06, "loss": 0.074, "step": 746450 }, { "epoch": 7.34, "grad_norm": 7.414178371429443, "learning_rate": 1.3450554579125583e-06, "loss": 0.02, "step": 746475 }, { "epoch": 7.34, "grad_norm": 4.463293075561523, "learning_rate": 1.34493133545831e-06, "loss": 0.062, "step": 746500 }, { "epoch": 7.34, "grad_norm": 0.3504604995250702, "learning_rate": 1.3448072130040616e-06, "loss": 0.0235, "step": 746525 }, { "epoch": 7.34, "grad_norm": 5.270566463470459, "learning_rate": 1.344683090549813e-06, "loss": 0.0727, "step": 746550 }, { "epoch": 7.34, "grad_norm": 0.04920591413974762, "learning_rate": 1.3445589680955646e-06, "loss": 0.0143, "step": 746575 }, { "epoch": 7.34, "grad_norm": 2.1346096992492676, "learning_rate": 1.3444348456413158e-06, "loss": 0.0665, "step": 746600 }, { "epoch": 7.34, "grad_norm": 0.060503192245960236, "learning_rate": 1.3443107231870675e-06, "loss": 0.0212, "step": 746625 }, { "epoch": 7.34, "grad_norm": 3.6645588874816895, "learning_rate": 1.344186600732819e-06, "loss": 0.0615, "step": 746650 }, { "epoch": 7.34, "grad_norm": 3.987844944000244, "learning_rate": 1.3440624782785705e-06, "loss": 0.0171, "step": 746675 }, { "epoch": 7.34, "grad_norm": 3.058955669403076, "learning_rate": 1.3439383558243222e-06, "loss": 0.0479, "step": 746700 }, { "epoch": 7.34, "grad_norm": 0.14728176593780518, "learning_rate": 1.3438142333700738e-06, "loss": 0.022, "step": 746725 }, { "epoch": 7.34, "grad_norm": 4.204351425170898, "learning_rate": 1.3436901109158252e-06, "loss": 0.0422, "step": 746750 }, { "epoch": 7.34, "grad_norm": 1.103790044784546, "learning_rate": 1.3435659884615769e-06, "loss": 0.0054, "step": 746775 }, { "epoch": 7.34, "grad_norm": 4.056555271148682, "learning_rate": 1.3434418660073285e-06, "loss": 0.0586, "step": 746800 }, { "epoch": 7.34, "grad_norm": 1.4720865488052368, "learning_rate": 1.3433177435530797e-06, "loss": 0.0196, "step": 746825 }, { "epoch": 7.34, "grad_norm": 6.684233665466309, "learning_rate": 1.3431936210988313e-06, "loss": 0.0725, "step": 746850 }, { "epoch": 7.34, "grad_norm": 1.8197994232177734, "learning_rate": 1.343069498644583e-06, "loss": 0.0235, "step": 746875 }, { "epoch": 7.34, "grad_norm": 4.732654094696045, "learning_rate": 1.3429453761903344e-06, "loss": 0.0616, "step": 746900 }, { "epoch": 7.34, "grad_norm": 2.663700580596924, "learning_rate": 1.342821253736086e-06, "loss": 0.0244, "step": 746925 }, { "epoch": 7.34, "grad_norm": 5.39534330368042, "learning_rate": 1.3426971312818377e-06, "loss": 0.0544, "step": 746950 }, { "epoch": 7.34, "grad_norm": 2.3645055294036865, "learning_rate": 1.342573008827589e-06, "loss": 0.0153, "step": 746975 }, { "epoch": 7.34, "grad_norm": 3.527372360229492, "learning_rate": 1.3424488863733407e-06, "loss": 0.0596, "step": 747000 }, { "epoch": 7.34, "grad_norm": 0.4475361406803131, "learning_rate": 1.342324763919092e-06, "loss": 0.02, "step": 747025 }, { "epoch": 7.35, "grad_norm": 2.6356372833251953, "learning_rate": 1.3422006414648436e-06, "loss": 0.0593, "step": 747050 }, { "epoch": 7.35, "grad_norm": 26.24053955078125, "learning_rate": 1.3420765190105952e-06, "loss": 0.0167, "step": 747075 }, { "epoch": 7.35, "grad_norm": 2.9184229373931885, "learning_rate": 1.3419523965563466e-06, "loss": 0.0446, "step": 747100 }, { "epoch": 7.35, "grad_norm": 11.812712669372559, "learning_rate": 1.3418282741020983e-06, "loss": 0.0152, "step": 747125 }, { "epoch": 7.35, "grad_norm": 2.9136126041412354, "learning_rate": 1.3417041516478499e-06, "loss": 0.0496, "step": 747150 }, { "epoch": 7.35, "grad_norm": 29.895334243774414, "learning_rate": 1.3415800291936013e-06, "loss": 0.0184, "step": 747175 }, { "epoch": 7.35, "grad_norm": 2.1168277263641357, "learning_rate": 1.341455906739353e-06, "loss": 0.0414, "step": 747200 }, { "epoch": 7.35, "grad_norm": 4.7940850257873535, "learning_rate": 1.3413317842851046e-06, "loss": 0.0245, "step": 747225 }, { "epoch": 7.35, "grad_norm": 3.7956249713897705, "learning_rate": 1.3412076618308558e-06, "loss": 0.0521, "step": 747250 }, { "epoch": 7.35, "grad_norm": 0.4707043468952179, "learning_rate": 1.3410835393766074e-06, "loss": 0.0343, "step": 747275 }, { "epoch": 7.35, "grad_norm": 4.227505683898926, "learning_rate": 1.340959416922359e-06, "loss": 0.0608, "step": 747300 }, { "epoch": 7.35, "grad_norm": 0.05018968507647514, "learning_rate": 1.3408352944681105e-06, "loss": 0.0204, "step": 747325 }, { "epoch": 7.35, "grad_norm": 5.569882869720459, "learning_rate": 1.3407111720138621e-06, "loss": 0.0374, "step": 747350 }, { "epoch": 7.35, "grad_norm": 0.5927283763885498, "learning_rate": 1.3405870495596138e-06, "loss": 0.0234, "step": 747375 }, { "epoch": 7.35, "grad_norm": 2.7043237686157227, "learning_rate": 1.3404629271053652e-06, "loss": 0.0587, "step": 747400 }, { "epoch": 7.35, "grad_norm": 0.461761474609375, "learning_rate": 1.3403388046511168e-06, "loss": 0.0193, "step": 747425 }, { "epoch": 7.35, "grad_norm": 2.2386999130249023, "learning_rate": 1.340214682196868e-06, "loss": 0.0444, "step": 747450 }, { "epoch": 7.35, "grad_norm": 0.5977610945701599, "learning_rate": 1.3400905597426197e-06, "loss": 0.0157, "step": 747475 }, { "epoch": 7.35, "grad_norm": 2.7271742820739746, "learning_rate": 1.3399664372883713e-06, "loss": 0.0642, "step": 747500 }, { "epoch": 7.35, "grad_norm": 2.976710796356201, "learning_rate": 1.3398423148341227e-06, "loss": 0.0127, "step": 747525 }, { "epoch": 7.35, "grad_norm": 2.8084805011749268, "learning_rate": 1.3397181923798744e-06, "loss": 0.0439, "step": 747550 }, { "epoch": 7.35, "grad_norm": 6.014008522033691, "learning_rate": 1.339594069925626e-06, "loss": 0.0218, "step": 747575 }, { "epoch": 7.35, "grad_norm": 20.09133529663086, "learning_rate": 1.3394699474713774e-06, "loss": 0.0545, "step": 747600 }, { "epoch": 7.35, "grad_norm": 0.08218386024236679, "learning_rate": 1.339345825017129e-06, "loss": 0.0225, "step": 747625 }, { "epoch": 7.35, "grad_norm": 3.7805421352386475, "learning_rate": 1.3392217025628807e-06, "loss": 0.0637, "step": 747650 }, { "epoch": 7.35, "grad_norm": 4.911006927490234, "learning_rate": 1.339097580108632e-06, "loss": 0.0136, "step": 747675 }, { "epoch": 7.35, "grad_norm": 3.3234200477600098, "learning_rate": 1.3389734576543835e-06, "loss": 0.0765, "step": 747700 }, { "epoch": 7.35, "grad_norm": 1.3292732238769531, "learning_rate": 1.3388493352001352e-06, "loss": 0.017, "step": 747725 }, { "epoch": 7.35, "grad_norm": 2.088766098022461, "learning_rate": 1.3387252127458866e-06, "loss": 0.0617, "step": 747750 }, { "epoch": 7.35, "grad_norm": 2.3890268802642822, "learning_rate": 1.3386010902916382e-06, "loss": 0.0184, "step": 747775 }, { "epoch": 7.35, "grad_norm": 3.4187448024749756, "learning_rate": 1.3384769678373899e-06, "loss": 0.0577, "step": 747800 }, { "epoch": 7.35, "grad_norm": 6.271698951721191, "learning_rate": 1.3383528453831413e-06, "loss": 0.0131, "step": 747825 }, { "epoch": 7.35, "grad_norm": 3.576622247695923, "learning_rate": 1.338228722928893e-06, "loss": 0.0665, "step": 747850 }, { "epoch": 7.35, "grad_norm": 0.27960261702537537, "learning_rate": 1.3381046004746443e-06, "loss": 0.0275, "step": 747875 }, { "epoch": 7.35, "grad_norm": 4.740092754364014, "learning_rate": 1.337980478020396e-06, "loss": 0.049, "step": 747900 }, { "epoch": 7.35, "grad_norm": 4.3005900382995605, "learning_rate": 1.3378563555661476e-06, "loss": 0.0223, "step": 747925 }, { "epoch": 7.35, "grad_norm": 8.096970558166504, "learning_rate": 1.3377322331118988e-06, "loss": 0.0475, "step": 747950 }, { "epoch": 7.35, "grad_norm": 3.094172954559326, "learning_rate": 1.3376081106576505e-06, "loss": 0.0201, "step": 747975 }, { "epoch": 7.35, "grad_norm": 2.6908764839172363, "learning_rate": 1.337483988203402e-06, "loss": 0.047, "step": 748000 }, { "epoch": 7.35, "grad_norm": 2.180626153945923, "learning_rate": 1.3373598657491535e-06, "loss": 0.0144, "step": 748025 }, { "epoch": 7.35, "grad_norm": 2.3492629528045654, "learning_rate": 1.3372357432949052e-06, "loss": 0.0697, "step": 748050 }, { "epoch": 7.36, "grad_norm": 0.8741678595542908, "learning_rate": 1.3371116208406568e-06, "loss": 0.018, "step": 748075 }, { "epoch": 7.36, "grad_norm": 3.5431699752807617, "learning_rate": 1.3369874983864082e-06, "loss": 0.0436, "step": 748100 }, { "epoch": 7.36, "grad_norm": 0.22238558530807495, "learning_rate": 1.3368633759321598e-06, "loss": 0.0249, "step": 748125 }, { "epoch": 7.36, "grad_norm": 7.425432205200195, "learning_rate": 1.3367392534779115e-06, "loss": 0.0612, "step": 748150 }, { "epoch": 7.36, "grad_norm": 0.15989361703395844, "learning_rate": 1.3366151310236627e-06, "loss": 0.0161, "step": 748175 }, { "epoch": 7.36, "grad_norm": 3.0549731254577637, "learning_rate": 1.3364910085694143e-06, "loss": 0.0621, "step": 748200 }, { "epoch": 7.36, "grad_norm": 8.820355415344238, "learning_rate": 1.336366886115166e-06, "loss": 0.0221, "step": 748225 }, { "epoch": 7.36, "grad_norm": 2.7233238220214844, "learning_rate": 1.3362427636609174e-06, "loss": 0.0592, "step": 748250 }, { "epoch": 7.36, "grad_norm": 2.731046676635742, "learning_rate": 1.336118641206669e-06, "loss": 0.0122, "step": 748275 }, { "epoch": 7.36, "grad_norm": 3.527224540710449, "learning_rate": 1.3359945187524204e-06, "loss": 0.0614, "step": 748300 }, { "epoch": 7.36, "grad_norm": 0.48576346039772034, "learning_rate": 1.335870396298172e-06, "loss": 0.025, "step": 748325 }, { "epoch": 7.36, "grad_norm": 3.305788516998291, "learning_rate": 1.3357462738439237e-06, "loss": 0.0427, "step": 748350 }, { "epoch": 7.36, "grad_norm": 9.155681610107422, "learning_rate": 1.335622151389675e-06, "loss": 0.0244, "step": 748375 }, { "epoch": 7.36, "grad_norm": 3.950058937072754, "learning_rate": 1.3355029938335968e-06, "loss": 0.0628, "step": 748400 }, { "epoch": 7.36, "grad_norm": 0.09098733961582184, "learning_rate": 1.335378871379348e-06, "loss": 0.0179, "step": 748425 }, { "epoch": 7.36, "grad_norm": 3.419837713241577, "learning_rate": 1.3352547489250996e-06, "loss": 0.0526, "step": 748450 }, { "epoch": 7.36, "grad_norm": 0.5792692303657532, "learning_rate": 1.335130626470851e-06, "loss": 0.0227, "step": 748475 }, { "epoch": 7.36, "grad_norm": 3.0411651134490967, "learning_rate": 1.3350065040166027e-06, "loss": 0.0502, "step": 748500 }, { "epoch": 7.36, "grad_norm": 2.5841853618621826, "learning_rate": 1.3348823815623543e-06, "loss": 0.0229, "step": 748525 }, { "epoch": 7.36, "grad_norm": 3.1260244846343994, "learning_rate": 1.3347582591081058e-06, "loss": 0.0633, "step": 748550 }, { "epoch": 7.36, "grad_norm": 4.078797340393066, "learning_rate": 1.3346341366538574e-06, "loss": 0.0196, "step": 748575 }, { "epoch": 7.36, "grad_norm": 4.656012058258057, "learning_rate": 1.334510014199609e-06, "loss": 0.0513, "step": 748600 }, { "epoch": 7.36, "grad_norm": 5.116921901702881, "learning_rate": 1.3343858917453602e-06, "loss": 0.0159, "step": 748625 }, { "epoch": 7.36, "grad_norm": 2.505216598510742, "learning_rate": 1.3342617692911119e-06, "loss": 0.061, "step": 748650 }, { "epoch": 7.36, "grad_norm": 0.3861069679260254, "learning_rate": 1.3341376468368635e-06, "loss": 0.0093, "step": 748675 }, { "epoch": 7.36, "grad_norm": 2.693361282348633, "learning_rate": 1.334013524382615e-06, "loss": 0.0584, "step": 748700 }, { "epoch": 7.36, "grad_norm": 1.1429463624954224, "learning_rate": 1.3338894019283666e-06, "loss": 0.0212, "step": 748725 }, { "epoch": 7.36, "grad_norm": 1.9264005422592163, "learning_rate": 1.3337652794741182e-06, "loss": 0.0498, "step": 748750 }, { "epoch": 7.36, "grad_norm": 3.951916217803955, "learning_rate": 1.3336411570198696e-06, "loss": 0.0125, "step": 748775 }, { "epoch": 7.36, "grad_norm": 3.492471218109131, "learning_rate": 1.3335170345656213e-06, "loss": 0.0372, "step": 748800 }, { "epoch": 7.36, "grad_norm": 0.9299052357673645, "learning_rate": 1.333392912111373e-06, "loss": 0.0184, "step": 748825 }, { "epoch": 7.36, "grad_norm": 4.112927436828613, "learning_rate": 1.3332687896571241e-06, "loss": 0.0608, "step": 748850 }, { "epoch": 7.36, "grad_norm": 0.10087773948907852, "learning_rate": 1.3331446672028757e-06, "loss": 0.031, "step": 748875 }, { "epoch": 7.36, "grad_norm": 2.9098169803619385, "learning_rate": 1.3330205447486272e-06, "loss": 0.0614, "step": 748900 }, { "epoch": 7.36, "grad_norm": 1.0863347053527832, "learning_rate": 1.3328964222943788e-06, "loss": 0.012, "step": 748925 }, { "epoch": 7.36, "grad_norm": 12.403897285461426, "learning_rate": 1.3327722998401304e-06, "loss": 0.0765, "step": 748950 }, { "epoch": 7.36, "grad_norm": 0.19413594901561737, "learning_rate": 1.3326481773858819e-06, "loss": 0.0103, "step": 748975 }, { "epoch": 7.36, "grad_norm": 4.274318695068359, "learning_rate": 1.3325240549316335e-06, "loss": 0.0702, "step": 749000 }, { "epoch": 7.36, "grad_norm": 0.20155444741249084, "learning_rate": 1.3323999324773851e-06, "loss": 0.0336, "step": 749025 }, { "epoch": 7.36, "grad_norm": 3.16359281539917, "learning_rate": 1.3322758100231363e-06, "loss": 0.0556, "step": 749050 }, { "epoch": 7.37, "grad_norm": 0.09247276186943054, "learning_rate": 1.332151687568888e-06, "loss": 0.0248, "step": 749075 }, { "epoch": 7.37, "grad_norm": 2.592367172241211, "learning_rate": 1.3320275651146396e-06, "loss": 0.057, "step": 749100 }, { "epoch": 7.37, "grad_norm": 3.429260730743408, "learning_rate": 1.331903442660391e-06, "loss": 0.0173, "step": 749125 }, { "epoch": 7.37, "grad_norm": 1.5898867845535278, "learning_rate": 1.3317793202061427e-06, "loss": 0.0444, "step": 749150 }, { "epoch": 7.37, "grad_norm": 0.04042717441916466, "learning_rate": 1.3316551977518943e-06, "loss": 0.0153, "step": 749175 }, { "epoch": 7.37, "grad_norm": 3.444812536239624, "learning_rate": 1.3315310752976457e-06, "loss": 0.0538, "step": 749200 }, { "epoch": 7.37, "grad_norm": 0.13922932744026184, "learning_rate": 1.3314069528433974e-06, "loss": 0.0262, "step": 749225 }, { "epoch": 7.37, "grad_norm": 1.8614883422851562, "learning_rate": 1.331282830389149e-06, "loss": 0.0429, "step": 749250 }, { "epoch": 7.37, "grad_norm": 2.0771162509918213, "learning_rate": 1.3311587079349004e-06, "loss": 0.0241, "step": 749275 }, { "epoch": 7.37, "grad_norm": 4.259531497955322, "learning_rate": 1.3310345854806518e-06, "loss": 0.0641, "step": 749300 }, { "epoch": 7.37, "grad_norm": 0.37799108028411865, "learning_rate": 1.3309104630264033e-06, "loss": 0.0214, "step": 749325 }, { "epoch": 7.37, "grad_norm": 3.3463711738586426, "learning_rate": 1.330786340572155e-06, "loss": 0.0549, "step": 749350 }, { "epoch": 7.37, "grad_norm": 4.982774257659912, "learning_rate": 1.3306622181179065e-06, "loss": 0.0248, "step": 749375 }, { "epoch": 7.37, "grad_norm": 1.4701530933380127, "learning_rate": 1.330538095663658e-06, "loss": 0.0655, "step": 749400 }, { "epoch": 7.37, "grad_norm": 2.578073740005493, "learning_rate": 1.3304139732094096e-06, "loss": 0.0271, "step": 749425 }, { "epoch": 7.37, "grad_norm": 4.4728851318359375, "learning_rate": 1.3302898507551612e-06, "loss": 0.0868, "step": 749450 }, { "epoch": 7.37, "grad_norm": 0.19297818839550018, "learning_rate": 1.3301657283009127e-06, "loss": 0.0273, "step": 749475 }, { "epoch": 7.37, "grad_norm": 5.105835914611816, "learning_rate": 1.3300416058466643e-06, "loss": 0.0613, "step": 749500 }, { "epoch": 7.37, "grad_norm": 0.21308590471744537, "learning_rate": 1.329917483392416e-06, "loss": 0.0196, "step": 749525 }, { "epoch": 7.37, "grad_norm": 4.3759613037109375, "learning_rate": 1.3297933609381671e-06, "loss": 0.0487, "step": 749550 }, { "epoch": 7.37, "grad_norm": 1.7057138681411743, "learning_rate": 1.3296692384839188e-06, "loss": 0.0171, "step": 749575 }, { "epoch": 7.37, "grad_norm": 2.9723823070526123, "learning_rate": 1.3295451160296704e-06, "loss": 0.0719, "step": 749600 }, { "epoch": 7.37, "grad_norm": 6.313185691833496, "learning_rate": 1.3294209935754218e-06, "loss": 0.0186, "step": 749625 }, { "epoch": 7.37, "grad_norm": 6.046306610107422, "learning_rate": 1.3292968711211735e-06, "loss": 0.0482, "step": 749650 }, { "epoch": 7.37, "grad_norm": 7.881619453430176, "learning_rate": 1.329172748666925e-06, "loss": 0.0183, "step": 749675 }, { "epoch": 7.37, "grad_norm": 5.12229585647583, "learning_rate": 1.3290486262126765e-06, "loss": 0.0571, "step": 749700 }, { "epoch": 7.37, "grad_norm": 0.038263797760009766, "learning_rate": 1.3289245037584282e-06, "loss": 0.0231, "step": 749725 }, { "epoch": 7.37, "grad_norm": 2.5394585132598877, "learning_rate": 1.3288003813041794e-06, "loss": 0.0526, "step": 749750 }, { "epoch": 7.37, "grad_norm": 10.497075080871582, "learning_rate": 1.328676258849931e-06, "loss": 0.0136, "step": 749775 }, { "epoch": 7.37, "grad_norm": 2.5432307720184326, "learning_rate": 1.3285521363956826e-06, "loss": 0.0662, "step": 749800 }, { "epoch": 7.37, "grad_norm": 0.9079581499099731, "learning_rate": 1.328428013941434e-06, "loss": 0.0164, "step": 749825 }, { "epoch": 7.37, "grad_norm": 3.4745805263519287, "learning_rate": 1.3283038914871857e-06, "loss": 0.0437, "step": 749850 }, { "epoch": 7.37, "grad_norm": 1.2788686752319336, "learning_rate": 1.3281797690329373e-06, "loss": 0.0195, "step": 749875 }, { "epoch": 7.37, "grad_norm": 2.2650110721588135, "learning_rate": 1.3280556465786888e-06, "loss": 0.059, "step": 749900 }, { "epoch": 7.37, "grad_norm": 5.164060115814209, "learning_rate": 1.3279315241244404e-06, "loss": 0.0168, "step": 749925 }, { "epoch": 7.37, "grad_norm": 3.088386058807373, "learning_rate": 1.327807401670192e-06, "loss": 0.056, "step": 749950 }, { "epoch": 7.37, "grad_norm": 0.15362969040870667, "learning_rate": 1.3276832792159432e-06, "loss": 0.025, "step": 749975 }, { "epoch": 7.37, "grad_norm": 3.3067574501037598, "learning_rate": 1.3275591567616949e-06, "loss": 0.0509, "step": 750000 }, { "epoch": 7.37, "grad_norm": 0.47445252537727356, "learning_rate": 1.3274350343074465e-06, "loss": 0.0234, "step": 750025 }, { "epoch": 7.37, "grad_norm": 3.8389487266540527, "learning_rate": 1.327310911853198e-06, "loss": 0.0636, "step": 750050 }, { "epoch": 7.37, "grad_norm": 3.8167269229888916, "learning_rate": 1.3271867893989496e-06, "loss": 0.0119, "step": 750075 }, { "epoch": 7.38, "grad_norm": 3.2630181312561035, "learning_rate": 1.3270626669447012e-06, "loss": 0.0552, "step": 750100 }, { "epoch": 7.38, "grad_norm": 0.3112151324748993, "learning_rate": 1.3269385444904526e-06, "loss": 0.0206, "step": 750125 }, { "epoch": 7.38, "grad_norm": 3.110689640045166, "learning_rate": 1.3268144220362043e-06, "loss": 0.0465, "step": 750150 }, { "epoch": 7.38, "grad_norm": 5.013120174407959, "learning_rate": 1.3266902995819555e-06, "loss": 0.0149, "step": 750175 }, { "epoch": 7.38, "grad_norm": 2.645097255706787, "learning_rate": 1.3265661771277071e-06, "loss": 0.0739, "step": 750200 }, { "epoch": 7.38, "grad_norm": 13.70708179473877, "learning_rate": 1.3264420546734587e-06, "loss": 0.0257, "step": 750225 }, { "epoch": 7.38, "grad_norm": 2.4833922386169434, "learning_rate": 1.3263179322192102e-06, "loss": 0.0311, "step": 750250 }, { "epoch": 7.38, "grad_norm": 0.03278772905468941, "learning_rate": 1.3261938097649618e-06, "loss": 0.0227, "step": 750275 }, { "epoch": 7.38, "grad_norm": 2.5773825645446777, "learning_rate": 1.3260696873107134e-06, "loss": 0.0667, "step": 750300 }, { "epoch": 7.38, "grad_norm": 0.024727240204811096, "learning_rate": 1.3259455648564649e-06, "loss": 0.0174, "step": 750325 }, { "epoch": 7.38, "grad_norm": 3.4142093658447266, "learning_rate": 1.3258214424022165e-06, "loss": 0.0653, "step": 750350 }, { "epoch": 7.38, "grad_norm": 0.7378700971603394, "learning_rate": 1.3256973199479681e-06, "loss": 0.0177, "step": 750375 }, { "epoch": 7.38, "grad_norm": 2.707719087600708, "learning_rate": 1.3255731974937193e-06, "loss": 0.0385, "step": 750400 }, { "epoch": 7.38, "grad_norm": 3.865635395050049, "learning_rate": 1.325449075039471e-06, "loss": 0.0148, "step": 750425 }, { "epoch": 7.38, "grad_norm": 4.5454277992248535, "learning_rate": 1.3253249525852226e-06, "loss": 0.0545, "step": 750450 }, { "epoch": 7.38, "grad_norm": 0.5131127834320068, "learning_rate": 1.325200830130974e-06, "loss": 0.0286, "step": 750475 }, { "epoch": 7.38, "grad_norm": 3.5475800037384033, "learning_rate": 1.3250767076767257e-06, "loss": 0.0713, "step": 750500 }, { "epoch": 7.38, "grad_norm": 0.39035069942474365, "learning_rate": 1.3249525852224773e-06, "loss": 0.02, "step": 750525 }, { "epoch": 7.38, "grad_norm": 3.2227163314819336, "learning_rate": 1.3248284627682287e-06, "loss": 0.0612, "step": 750550 }, { "epoch": 7.38, "grad_norm": 3.2274370193481445, "learning_rate": 1.3247043403139804e-06, "loss": 0.0294, "step": 750575 }, { "epoch": 7.38, "grad_norm": 3.0479350090026855, "learning_rate": 1.3245802178597316e-06, "loss": 0.061, "step": 750600 }, { "epoch": 7.38, "grad_norm": 1.8164193630218506, "learning_rate": 1.3244560954054832e-06, "loss": 0.0221, "step": 750625 }, { "epoch": 7.38, "grad_norm": 11.90971565246582, "learning_rate": 1.3243369378494047e-06, "loss": 0.0623, "step": 750650 }, { "epoch": 7.38, "grad_norm": 0.051972080022096634, "learning_rate": 1.3242128153951563e-06, "loss": 0.017, "step": 750675 }, { "epoch": 7.38, "grad_norm": 6.751278877258301, "learning_rate": 1.324088692940908e-06, "loss": 0.0366, "step": 750700 }, { "epoch": 7.38, "grad_norm": 3.3900325298309326, "learning_rate": 1.3239645704866594e-06, "loss": 0.0145, "step": 750725 }, { "epoch": 7.38, "grad_norm": 4.385098457336426, "learning_rate": 1.323840448032411e-06, "loss": 0.0456, "step": 750750 }, { "epoch": 7.38, "grad_norm": 1.6726125478744507, "learning_rate": 1.3237163255781624e-06, "loss": 0.021, "step": 750775 }, { "epoch": 7.38, "grad_norm": 2.930330276489258, "learning_rate": 1.323592203123914e-06, "loss": 0.0397, "step": 750800 }, { "epoch": 7.38, "grad_norm": 0.44625967741012573, "learning_rate": 1.3234680806696657e-06, "loss": 0.0135, "step": 750825 }, { "epoch": 7.38, "grad_norm": 3.2761123180389404, "learning_rate": 1.323343958215417e-06, "loss": 0.051, "step": 750850 }, { "epoch": 7.38, "grad_norm": 5.954133033752441, "learning_rate": 1.3232198357611687e-06, "loss": 0.0221, "step": 750875 }, { "epoch": 7.38, "grad_norm": 1.9346120357513428, "learning_rate": 1.3230957133069204e-06, "loss": 0.0842, "step": 750900 }, { "epoch": 7.38, "grad_norm": 7.417819499969482, "learning_rate": 1.3229715908526716e-06, "loss": 0.0256, "step": 750925 }, { "epoch": 7.38, "grad_norm": 3.0709095001220703, "learning_rate": 1.3228474683984232e-06, "loss": 0.0766, "step": 750950 }, { "epoch": 7.38, "grad_norm": 7.680624008178711, "learning_rate": 1.3227233459441749e-06, "loss": 0.0209, "step": 750975 }, { "epoch": 7.38, "grad_norm": 5.018391132354736, "learning_rate": 1.3225992234899263e-06, "loss": 0.0649, "step": 751000 }, { "epoch": 7.38, "grad_norm": 2.1496400833129883, "learning_rate": 1.322475101035678e-06, "loss": 0.0234, "step": 751025 }, { "epoch": 7.38, "grad_norm": 3.7349538803100586, "learning_rate": 1.3223509785814295e-06, "loss": 0.0571, "step": 751050 }, { "epoch": 7.38, "grad_norm": 3.8472845554351807, "learning_rate": 1.322226856127181e-06, "loss": 0.0159, "step": 751075 }, { "epoch": 7.38, "grad_norm": 3.7817015647888184, "learning_rate": 1.3221027336729326e-06, "loss": 0.0484, "step": 751100 }, { "epoch": 7.39, "grad_norm": 2.1595020294189453, "learning_rate": 1.3219786112186842e-06, "loss": 0.0171, "step": 751125 }, { "epoch": 7.39, "grad_norm": 2.9595253467559814, "learning_rate": 1.3218544887644355e-06, "loss": 0.056, "step": 751150 }, { "epoch": 7.39, "grad_norm": 13.111555099487305, "learning_rate": 1.321730366310187e-06, "loss": 0.0293, "step": 751175 }, { "epoch": 7.39, "grad_norm": 3.7533164024353027, "learning_rate": 1.3216062438559385e-06, "loss": 0.0723, "step": 751200 }, { "epoch": 7.39, "grad_norm": 0.08680978417396545, "learning_rate": 1.3214821214016901e-06, "loss": 0.0222, "step": 751225 }, { "epoch": 7.39, "grad_norm": 4.038090229034424, "learning_rate": 1.3213579989474418e-06, "loss": 0.0482, "step": 751250 }, { "epoch": 7.39, "grad_norm": 0.038036469370126724, "learning_rate": 1.3212338764931932e-06, "loss": 0.0115, "step": 751275 }, { "epoch": 7.39, "grad_norm": 2.4841206073760986, "learning_rate": 1.3211097540389448e-06, "loss": 0.0491, "step": 751300 }, { "epoch": 7.39, "grad_norm": 6.861204147338867, "learning_rate": 1.3209856315846965e-06, "loss": 0.0336, "step": 751325 }, { "epoch": 7.39, "grad_norm": 1.7371745109558105, "learning_rate": 1.3208615091304477e-06, "loss": 0.0465, "step": 751350 }, { "epoch": 7.39, "grad_norm": 0.13980205357074738, "learning_rate": 1.3207373866761993e-06, "loss": 0.0186, "step": 751375 }, { "epoch": 7.39, "grad_norm": 3.7158429622650146, "learning_rate": 1.320613264221951e-06, "loss": 0.0372, "step": 751400 }, { "epoch": 7.39, "grad_norm": 0.9819027185440063, "learning_rate": 1.3204891417677024e-06, "loss": 0.0194, "step": 751425 }, { "epoch": 7.39, "grad_norm": 4.994433879852295, "learning_rate": 1.320365019313454e-06, "loss": 0.0554, "step": 751450 }, { "epoch": 7.39, "grad_norm": 0.5439885854721069, "learning_rate": 1.3202408968592056e-06, "loss": 0.0174, "step": 751475 }, { "epoch": 7.39, "grad_norm": 2.779240846633911, "learning_rate": 1.320116774404957e-06, "loss": 0.08, "step": 751500 }, { "epoch": 7.39, "grad_norm": 4.604491710662842, "learning_rate": 1.3199926519507087e-06, "loss": 0.0211, "step": 751525 }, { "epoch": 7.39, "grad_norm": 4.14910888671875, "learning_rate": 1.3198685294964603e-06, "loss": 0.0452, "step": 751550 }, { "epoch": 7.39, "grad_norm": 5.894243240356445, "learning_rate": 1.3197444070422116e-06, "loss": 0.018, "step": 751575 }, { "epoch": 7.39, "grad_norm": 3.746544361114502, "learning_rate": 1.3196202845879632e-06, "loss": 0.0504, "step": 751600 }, { "epoch": 7.39, "grad_norm": 3.7476847171783447, "learning_rate": 1.3194961621337146e-06, "loss": 0.0221, "step": 751625 }, { "epoch": 7.39, "grad_norm": 1.1868977546691895, "learning_rate": 1.3193720396794662e-06, "loss": 0.0571, "step": 751650 }, { "epoch": 7.39, "grad_norm": 3.9083917140960693, "learning_rate": 1.3192479172252179e-06, "loss": 0.0158, "step": 751675 }, { "epoch": 7.39, "grad_norm": 2.7658066749572754, "learning_rate": 1.3191237947709693e-06, "loss": 0.0585, "step": 751700 }, { "epoch": 7.39, "grad_norm": 0.3961333930492401, "learning_rate": 1.318999672316721e-06, "loss": 0.018, "step": 751725 }, { "epoch": 7.39, "grad_norm": 4.8891520500183105, "learning_rate": 1.3188755498624726e-06, "loss": 0.0553, "step": 751750 }, { "epoch": 7.39, "grad_norm": 0.022118357941508293, "learning_rate": 1.3187514274082238e-06, "loss": 0.018, "step": 751775 }, { "epoch": 7.39, "grad_norm": 3.361664056777954, "learning_rate": 1.3186273049539754e-06, "loss": 0.0637, "step": 751800 }, { "epoch": 7.39, "grad_norm": 1.5385679006576538, "learning_rate": 1.318503182499727e-06, "loss": 0.0088, "step": 751825 }, { "epoch": 7.39, "grad_norm": 2.833223342895508, "learning_rate": 1.3183790600454785e-06, "loss": 0.0533, "step": 751850 }, { "epoch": 7.39, "grad_norm": 2.6528396606445312, "learning_rate": 1.3182549375912301e-06, "loss": 0.0264, "step": 751875 }, { "epoch": 7.39, "grad_norm": 3.96364426612854, "learning_rate": 1.3181308151369817e-06, "loss": 0.0611, "step": 751900 }, { "epoch": 7.39, "grad_norm": 2.820615768432617, "learning_rate": 1.3180066926827332e-06, "loss": 0.0144, "step": 751925 }, { "epoch": 7.39, "grad_norm": 3.4599130153656006, "learning_rate": 1.3178825702284848e-06, "loss": 0.0419, "step": 751950 }, { "epoch": 7.39, "grad_norm": 7.921947479248047, "learning_rate": 1.3177584477742364e-06, "loss": 0.0156, "step": 751975 }, { "epoch": 7.39, "grad_norm": 3.8407461643218994, "learning_rate": 1.3176343253199877e-06, "loss": 0.0542, "step": 752000 }, { "epoch": 7.39, "grad_norm": 0.12107136100530624, "learning_rate": 1.3175102028657393e-06, "loss": 0.0144, "step": 752025 }, { "epoch": 7.39, "grad_norm": 2.9001641273498535, "learning_rate": 1.3173860804114907e-06, "loss": 0.0657, "step": 752050 }, { "epoch": 7.39, "grad_norm": 2.297785997390747, "learning_rate": 1.3172619579572423e-06, "loss": 0.0171, "step": 752075 }, { "epoch": 7.39, "grad_norm": 2.0085995197296143, "learning_rate": 1.317137835502994e-06, "loss": 0.0716, "step": 752100 }, { "epoch": 7.4, "grad_norm": 9.248775482177734, "learning_rate": 1.3170137130487454e-06, "loss": 0.0185, "step": 752125 }, { "epoch": 7.4, "grad_norm": 3.118098258972168, "learning_rate": 1.316889590594497e-06, "loss": 0.0435, "step": 752150 }, { "epoch": 7.4, "grad_norm": 0.15007708966732025, "learning_rate": 1.3167654681402487e-06, "loss": 0.0159, "step": 752175 }, { "epoch": 7.4, "grad_norm": 3.0846099853515625, "learning_rate": 1.316641345686e-06, "loss": 0.0458, "step": 752200 }, { "epoch": 7.4, "grad_norm": 2.1597676277160645, "learning_rate": 1.3165172232317517e-06, "loss": 0.0217, "step": 752225 }, { "epoch": 7.4, "grad_norm": 3.8746755123138428, "learning_rate": 1.3163931007775034e-06, "loss": 0.0594, "step": 752250 }, { "epoch": 7.4, "grad_norm": 12.332966804504395, "learning_rate": 1.3162689783232546e-06, "loss": 0.0184, "step": 752275 }, { "epoch": 7.4, "grad_norm": 3.710599184036255, "learning_rate": 1.3161448558690062e-06, "loss": 0.0719, "step": 752300 }, { "epoch": 7.4, "grad_norm": 0.8221030831336975, "learning_rate": 1.3160207334147578e-06, "loss": 0.0146, "step": 752325 }, { "epoch": 7.4, "grad_norm": 5.205862998962402, "learning_rate": 1.3158966109605093e-06, "loss": 0.0605, "step": 752350 }, { "epoch": 7.4, "grad_norm": 2.63407301902771, "learning_rate": 1.315772488506261e-06, "loss": 0.0179, "step": 752375 }, { "epoch": 7.4, "grad_norm": 4.400170803070068, "learning_rate": 1.3156483660520125e-06, "loss": 0.0716, "step": 752400 }, { "epoch": 7.4, "grad_norm": 0.36181142926216125, "learning_rate": 1.315524243597764e-06, "loss": 0.0337, "step": 752425 }, { "epoch": 7.4, "grad_norm": 3.7357120513916016, "learning_rate": 1.3154001211435156e-06, "loss": 0.0364, "step": 752450 }, { "epoch": 7.4, "grad_norm": 8.494694709777832, "learning_rate": 1.3152759986892668e-06, "loss": 0.0174, "step": 752475 }, { "epoch": 7.4, "grad_norm": 2.8737545013427734, "learning_rate": 1.3151518762350184e-06, "loss": 0.0395, "step": 752500 }, { "epoch": 7.4, "grad_norm": 0.1812334954738617, "learning_rate": 1.31502775378077e-06, "loss": 0.0186, "step": 752525 }, { "epoch": 7.4, "grad_norm": 3.485948085784912, "learning_rate": 1.3149036313265215e-06, "loss": 0.079, "step": 752550 }, { "epoch": 7.4, "grad_norm": 1.1289666891098022, "learning_rate": 1.3147795088722731e-06, "loss": 0.017, "step": 752575 }, { "epoch": 7.4, "grad_norm": 2.7167887687683105, "learning_rate": 1.3146553864180248e-06, "loss": 0.0711, "step": 752600 }, { "epoch": 7.4, "grad_norm": 2.189403772354126, "learning_rate": 1.3145312639637762e-06, "loss": 0.0199, "step": 752625 }, { "epoch": 7.4, "grad_norm": 2.908890962600708, "learning_rate": 1.3144071415095278e-06, "loss": 0.0641, "step": 752650 }, { "epoch": 7.4, "grad_norm": 9.822772979736328, "learning_rate": 1.3142830190552795e-06, "loss": 0.0181, "step": 752675 }, { "epoch": 7.4, "grad_norm": 4.442080974578857, "learning_rate": 1.3141588966010307e-06, "loss": 0.0623, "step": 752700 }, { "epoch": 7.4, "grad_norm": 0.09189841896295547, "learning_rate": 1.3140347741467823e-06, "loss": 0.0175, "step": 752725 }, { "epoch": 7.4, "grad_norm": 2.991314172744751, "learning_rate": 1.313910651692534e-06, "loss": 0.0505, "step": 752750 }, { "epoch": 7.4, "grad_norm": 0.753844141960144, "learning_rate": 1.3137865292382854e-06, "loss": 0.0272, "step": 752775 }, { "epoch": 7.4, "grad_norm": 2.0337138175964355, "learning_rate": 1.313662406784037e-06, "loss": 0.0495, "step": 752800 }, { "epoch": 7.4, "grad_norm": 3.166072130203247, "learning_rate": 1.3135382843297886e-06, "loss": 0.014, "step": 752825 }, { "epoch": 7.4, "grad_norm": 2.961038112640381, "learning_rate": 1.31341416187554e-06, "loss": 0.0431, "step": 752850 }, { "epoch": 7.4, "grad_norm": 5.413321018218994, "learning_rate": 1.3132900394212917e-06, "loss": 0.0201, "step": 752875 }, { "epoch": 7.4, "grad_norm": 2.6448051929473877, "learning_rate": 1.313165916967043e-06, "loss": 0.0526, "step": 752900 }, { "epoch": 7.4, "grad_norm": 0.18368537724018097, "learning_rate": 1.3130417945127945e-06, "loss": 0.0155, "step": 752925 }, { "epoch": 7.4, "grad_norm": 4.231170654296875, "learning_rate": 1.3129176720585462e-06, "loss": 0.0533, "step": 752950 }, { "epoch": 7.4, "grad_norm": 2.1922571659088135, "learning_rate": 1.3127935496042976e-06, "loss": 0.0163, "step": 752975 }, { "epoch": 7.4, "grad_norm": 4.0100908279418945, "learning_rate": 1.3126743920482193e-06, "loss": 0.0735, "step": 753000 }, { "epoch": 7.4, "grad_norm": 7.561148166656494, "learning_rate": 1.3125502695939707e-06, "loss": 0.0213, "step": 753025 }, { "epoch": 7.4, "grad_norm": 1.579872488975525, "learning_rate": 1.3124261471397223e-06, "loss": 0.0506, "step": 753050 }, { "epoch": 7.4, "grad_norm": 0.12848621606826782, "learning_rate": 1.3123020246854737e-06, "loss": 0.0224, "step": 753075 }, { "epoch": 7.4, "grad_norm": 2.5212457180023193, "learning_rate": 1.3121779022312254e-06, "loss": 0.0566, "step": 753100 }, { "epoch": 7.4, "grad_norm": 2.770132064819336, "learning_rate": 1.312053779776977e-06, "loss": 0.0176, "step": 753125 }, { "epoch": 7.41, "grad_norm": 3.313922882080078, "learning_rate": 1.3119296573227282e-06, "loss": 0.064, "step": 753150 }, { "epoch": 7.41, "grad_norm": 8.429808616638184, "learning_rate": 1.3118055348684799e-06, "loss": 0.0191, "step": 753175 }, { "epoch": 7.41, "grad_norm": 5.477858066558838, "learning_rate": 1.3116814124142315e-06, "loss": 0.0777, "step": 753200 }, { "epoch": 7.41, "grad_norm": 1.2446599006652832, "learning_rate": 1.311557289959983e-06, "loss": 0.0168, "step": 753225 }, { "epoch": 7.41, "grad_norm": 3.0482194423675537, "learning_rate": 1.3114331675057346e-06, "loss": 0.0657, "step": 753250 }, { "epoch": 7.41, "grad_norm": 0.6935056447982788, "learning_rate": 1.3113090450514862e-06, "loss": 0.0149, "step": 753275 }, { "epoch": 7.41, "grad_norm": 2.773987054824829, "learning_rate": 1.3111849225972376e-06, "loss": 0.0568, "step": 753300 }, { "epoch": 7.41, "grad_norm": 14.74503231048584, "learning_rate": 1.3110608001429893e-06, "loss": 0.0148, "step": 753325 }, { "epoch": 7.41, "grad_norm": 4.220544338226318, "learning_rate": 1.3109366776887409e-06, "loss": 0.039, "step": 753350 }, { "epoch": 7.41, "grad_norm": 0.19352741539478302, "learning_rate": 1.310812555234492e-06, "loss": 0.0256, "step": 753375 }, { "epoch": 7.41, "grad_norm": 3.9101099967956543, "learning_rate": 1.3106884327802437e-06, "loss": 0.0753, "step": 753400 }, { "epoch": 7.41, "grad_norm": 0.8353865146636963, "learning_rate": 1.3105643103259954e-06, "loss": 0.013, "step": 753425 }, { "epoch": 7.41, "grad_norm": 2.914672374725342, "learning_rate": 1.3104401878717468e-06, "loss": 0.0585, "step": 753450 }, { "epoch": 7.41, "grad_norm": 6.70404052734375, "learning_rate": 1.3103160654174984e-06, "loss": 0.0164, "step": 753475 }, { "epoch": 7.41, "grad_norm": 3.743574619293213, "learning_rate": 1.3101919429632498e-06, "loss": 0.0661, "step": 753500 }, { "epoch": 7.41, "grad_norm": 0.9380714893341064, "learning_rate": 1.3100678205090015e-06, "loss": 0.0214, "step": 753525 }, { "epoch": 7.41, "grad_norm": 3.30206036567688, "learning_rate": 1.3099436980547531e-06, "loss": 0.0602, "step": 753550 }, { "epoch": 7.41, "grad_norm": 0.10782419145107269, "learning_rate": 1.3098195756005043e-06, "loss": 0.0257, "step": 753575 }, { "epoch": 7.41, "grad_norm": 2.4804675579071045, "learning_rate": 1.309695453146256e-06, "loss": 0.0743, "step": 753600 }, { "epoch": 7.41, "grad_norm": 0.6603549122810364, "learning_rate": 1.3095713306920076e-06, "loss": 0.0162, "step": 753625 }, { "epoch": 7.41, "grad_norm": 1.8561333417892456, "learning_rate": 1.309447208237759e-06, "loss": 0.082, "step": 753650 }, { "epoch": 7.41, "grad_norm": 0.10545488446950912, "learning_rate": 1.3093230857835107e-06, "loss": 0.0223, "step": 753675 }, { "epoch": 7.41, "grad_norm": 5.474987030029297, "learning_rate": 1.3091989633292623e-06, "loss": 0.0588, "step": 753700 }, { "epoch": 7.41, "grad_norm": 0.023748930543661118, "learning_rate": 1.3090748408750137e-06, "loss": 0.0137, "step": 753725 }, { "epoch": 7.41, "grad_norm": 5.080842018127441, "learning_rate": 1.3089507184207654e-06, "loss": 0.0836, "step": 753750 }, { "epoch": 7.41, "grad_norm": 10.982206344604492, "learning_rate": 1.308826595966517e-06, "loss": 0.0147, "step": 753775 }, { "epoch": 7.41, "grad_norm": 1.3172965049743652, "learning_rate": 1.3087024735122684e-06, "loss": 0.0544, "step": 753800 }, { "epoch": 7.41, "grad_norm": 3.992640972137451, "learning_rate": 1.30857835105802e-06, "loss": 0.0185, "step": 753825 }, { "epoch": 7.41, "grad_norm": 2.4592714309692383, "learning_rate": 1.3084542286037717e-06, "loss": 0.0406, "step": 753850 }, { "epoch": 7.41, "grad_norm": 0.8228847980499268, "learning_rate": 1.3083301061495229e-06, "loss": 0.0338, "step": 753875 }, { "epoch": 7.41, "grad_norm": 4.2088942527771, "learning_rate": 1.3082059836952745e-06, "loss": 0.0577, "step": 753900 }, { "epoch": 7.41, "grad_norm": 0.2177865207195282, "learning_rate": 1.308081861241026e-06, "loss": 0.02, "step": 753925 }, { "epoch": 7.41, "grad_norm": 3.9029154777526855, "learning_rate": 1.3079577387867776e-06, "loss": 0.0635, "step": 753950 }, { "epoch": 7.41, "grad_norm": 1.4593008756637573, "learning_rate": 1.3078336163325292e-06, "loss": 0.021, "step": 753975 }, { "epoch": 7.41, "grad_norm": 1.6358956098556519, "learning_rate": 1.3077094938782806e-06, "loss": 0.0421, "step": 754000 }, { "epoch": 7.41, "grad_norm": 0.8466283082962036, "learning_rate": 1.3075853714240323e-06, "loss": 0.0128, "step": 754025 }, { "epoch": 7.41, "grad_norm": 2.979621410369873, "learning_rate": 1.307461248969784e-06, "loss": 0.0469, "step": 754050 }, { "epoch": 7.41, "grad_norm": 4.7758588790893555, "learning_rate": 1.3073371265155351e-06, "loss": 0.0228, "step": 754075 }, { "epoch": 7.41, "grad_norm": 2.8029000759124756, "learning_rate": 1.3072130040612868e-06, "loss": 0.069, "step": 754100 }, { "epoch": 7.41, "grad_norm": 8.861159324645996, "learning_rate": 1.3070888816070384e-06, "loss": 0.0159, "step": 754125 }, { "epoch": 7.41, "grad_norm": 4.242405891418457, "learning_rate": 1.3069647591527898e-06, "loss": 0.0523, "step": 754150 }, { "epoch": 7.42, "grad_norm": 0.6505650281906128, "learning_rate": 1.3068406366985415e-06, "loss": 0.016, "step": 754175 }, { "epoch": 7.42, "grad_norm": 3.0565073490142822, "learning_rate": 1.306716514244293e-06, "loss": 0.0436, "step": 754200 }, { "epoch": 7.42, "grad_norm": 0.3447290360927582, "learning_rate": 1.3065923917900445e-06, "loss": 0.0139, "step": 754225 }, { "epoch": 7.42, "grad_norm": 2.0770206451416016, "learning_rate": 1.3064682693357961e-06, "loss": 0.0554, "step": 754250 }, { "epoch": 7.42, "grad_norm": 0.3525185286998749, "learning_rate": 1.3063441468815478e-06, "loss": 0.0181, "step": 754275 }, { "epoch": 7.42, "grad_norm": 2.8461475372314453, "learning_rate": 1.306220024427299e-06, "loss": 0.0584, "step": 754300 }, { "epoch": 7.42, "grad_norm": 2.291381359100342, "learning_rate": 1.3060959019730506e-06, "loss": 0.0183, "step": 754325 }, { "epoch": 7.42, "grad_norm": 3.230052947998047, "learning_rate": 1.305971779518802e-06, "loss": 0.0711, "step": 754350 }, { "epoch": 7.42, "grad_norm": 13.982190132141113, "learning_rate": 1.3058476570645537e-06, "loss": 0.04, "step": 754375 }, { "epoch": 7.42, "grad_norm": 3.26582670211792, "learning_rate": 1.3057235346103053e-06, "loss": 0.0542, "step": 754400 }, { "epoch": 7.42, "grad_norm": 0.24418388307094574, "learning_rate": 1.3055994121560567e-06, "loss": 0.0096, "step": 754425 }, { "epoch": 7.42, "grad_norm": 5.527246952056885, "learning_rate": 1.3054752897018084e-06, "loss": 0.0712, "step": 754450 }, { "epoch": 7.42, "grad_norm": 8.301974296569824, "learning_rate": 1.30535116724756e-06, "loss": 0.0223, "step": 754475 }, { "epoch": 7.42, "grad_norm": 2.6519968509674072, "learning_rate": 1.3052270447933112e-06, "loss": 0.0613, "step": 754500 }, { "epoch": 7.42, "grad_norm": 6.537342071533203, "learning_rate": 1.3051029223390629e-06, "loss": 0.0207, "step": 754525 }, { "epoch": 7.42, "grad_norm": 4.323464870452881, "learning_rate": 1.3049787998848145e-06, "loss": 0.0486, "step": 754550 }, { "epoch": 7.42, "grad_norm": 1.3312861919403076, "learning_rate": 1.304854677430566e-06, "loss": 0.0214, "step": 754575 }, { "epoch": 7.42, "grad_norm": 2.073038101196289, "learning_rate": 1.3047305549763176e-06, "loss": 0.0514, "step": 754600 }, { "epoch": 7.42, "grad_norm": 0.01844799891114235, "learning_rate": 1.3046064325220692e-06, "loss": 0.0233, "step": 754625 }, { "epoch": 7.42, "grad_norm": 3.3613266944885254, "learning_rate": 1.3044823100678206e-06, "loss": 0.0818, "step": 754650 }, { "epoch": 7.42, "grad_norm": 0.817742645740509, "learning_rate": 1.3043581876135722e-06, "loss": 0.017, "step": 754675 }, { "epoch": 7.42, "grad_norm": 2.2819876670837402, "learning_rate": 1.3042340651593239e-06, "loss": 0.0548, "step": 754700 }, { "epoch": 7.42, "grad_norm": 4.893318176269531, "learning_rate": 1.304109942705075e-06, "loss": 0.0277, "step": 754725 }, { "epoch": 7.42, "grad_norm": 1.3631032705307007, "learning_rate": 1.3039858202508267e-06, "loss": 0.049, "step": 754750 }, { "epoch": 7.42, "grad_norm": 5.269723415374756, "learning_rate": 1.3038616977965782e-06, "loss": 0.0289, "step": 754775 }, { "epoch": 7.42, "grad_norm": 9.75298023223877, "learning_rate": 1.3037375753423298e-06, "loss": 0.048, "step": 754800 }, { "epoch": 7.42, "grad_norm": 4.164652347564697, "learning_rate": 1.3036134528880814e-06, "loss": 0.0125, "step": 754825 }, { "epoch": 7.42, "grad_norm": 8.093255996704102, "learning_rate": 1.3034893304338328e-06, "loss": 0.0641, "step": 754850 }, { "epoch": 7.42, "grad_norm": 1.2770441770553589, "learning_rate": 1.3033652079795845e-06, "loss": 0.0219, "step": 754875 }, { "epoch": 7.42, "grad_norm": 3.628133773803711, "learning_rate": 1.3032410855253361e-06, "loss": 0.073, "step": 754900 }, { "epoch": 7.42, "grad_norm": 1.827487587928772, "learning_rate": 1.3031169630710873e-06, "loss": 0.0206, "step": 754925 }, { "epoch": 7.42, "grad_norm": 2.646393299102783, "learning_rate": 1.302992840616839e-06, "loss": 0.0495, "step": 754950 }, { "epoch": 7.42, "grad_norm": 5.240152835845947, "learning_rate": 1.3028687181625906e-06, "loss": 0.0121, "step": 754975 }, { "epoch": 7.42, "grad_norm": 2.3202414512634277, "learning_rate": 1.302744595708342e-06, "loss": 0.0475, "step": 755000 }, { "epoch": 7.42, "grad_norm": 0.7460674047470093, "learning_rate": 1.3026204732540937e-06, "loss": 0.0162, "step": 755025 }, { "epoch": 7.42, "grad_norm": 3.6392581462860107, "learning_rate": 1.3024963507998453e-06, "loss": 0.0501, "step": 755050 }, { "epoch": 7.42, "grad_norm": 2.261118173599243, "learning_rate": 1.3023722283455967e-06, "loss": 0.0219, "step": 755075 }, { "epoch": 7.42, "grad_norm": 2.4897353649139404, "learning_rate": 1.3022481058913483e-06, "loss": 0.0677, "step": 755100 }, { "epoch": 7.42, "grad_norm": 0.09490379691123962, "learning_rate": 1.3021239834371e-06, "loss": 0.0148, "step": 755125 }, { "epoch": 7.42, "grad_norm": 5.5486531257629395, "learning_rate": 1.3019998609828514e-06, "loss": 0.0505, "step": 755150 }, { "epoch": 7.43, "grad_norm": 1.7989705801010132, "learning_rate": 1.301875738528603e-06, "loss": 0.0107, "step": 755175 }, { "epoch": 7.43, "grad_norm": 3.5253663063049316, "learning_rate": 1.3017516160743543e-06, "loss": 0.0578, "step": 755200 }, { "epoch": 7.43, "grad_norm": 6.6095194816589355, "learning_rate": 1.3016274936201059e-06, "loss": 0.0189, "step": 755225 }, { "epoch": 7.43, "grad_norm": 2.758937120437622, "learning_rate": 1.3015083360640273e-06, "loss": 0.0656, "step": 755250 }, { "epoch": 7.43, "grad_norm": 5.9999823570251465, "learning_rate": 1.301384213609779e-06, "loss": 0.0199, "step": 755275 }, { "epoch": 7.43, "grad_norm": 1.4605116844177246, "learning_rate": 1.3012600911555304e-06, "loss": 0.0462, "step": 755300 }, { "epoch": 7.43, "grad_norm": 27.738252639770508, "learning_rate": 1.301135968701282e-06, "loss": 0.0225, "step": 755325 }, { "epoch": 7.43, "grad_norm": 4.013640880584717, "learning_rate": 1.3010118462470337e-06, "loss": 0.0627, "step": 755350 }, { "epoch": 7.43, "grad_norm": 0.7450574040412903, "learning_rate": 1.300887723792785e-06, "loss": 0.0258, "step": 755375 }, { "epoch": 7.43, "grad_norm": 4.671994686126709, "learning_rate": 1.3007636013385367e-06, "loss": 0.0758, "step": 755400 }, { "epoch": 7.43, "grad_norm": 7.28766393661499, "learning_rate": 1.3006394788842884e-06, "loss": 0.0198, "step": 755425 }, { "epoch": 7.43, "grad_norm": 3.2776682376861572, "learning_rate": 1.3005153564300396e-06, "loss": 0.0576, "step": 755450 }, { "epoch": 7.43, "grad_norm": 12.11996841430664, "learning_rate": 1.3003912339757912e-06, "loss": 0.0218, "step": 755475 }, { "epoch": 7.43, "grad_norm": 3.2499327659606934, "learning_rate": 1.3002671115215428e-06, "loss": 0.0447, "step": 755500 }, { "epoch": 7.43, "grad_norm": 0.006150875240564346, "learning_rate": 1.3001429890672943e-06, "loss": 0.0229, "step": 755525 }, { "epoch": 7.43, "grad_norm": 3.8857007026672363, "learning_rate": 1.300018866613046e-06, "loss": 0.0607, "step": 755550 }, { "epoch": 7.43, "grad_norm": 2.2535581588745117, "learning_rate": 1.2998947441587975e-06, "loss": 0.0213, "step": 755575 }, { "epoch": 7.43, "grad_norm": 2.258681297302246, "learning_rate": 1.299770621704549e-06, "loss": 0.0515, "step": 755600 }, { "epoch": 7.43, "grad_norm": 11.97329330444336, "learning_rate": 1.2996464992503006e-06, "loss": 0.0214, "step": 755625 }, { "epoch": 7.43, "grad_norm": 4.3379693031311035, "learning_rate": 1.2995223767960522e-06, "loss": 0.0441, "step": 755650 }, { "epoch": 7.43, "grad_norm": 0.15906769037246704, "learning_rate": 1.2993982543418034e-06, "loss": 0.0167, "step": 755675 }, { "epoch": 7.43, "grad_norm": 4.344699859619141, "learning_rate": 1.299274131887555e-06, "loss": 0.0639, "step": 755700 }, { "epoch": 7.43, "grad_norm": 1.0188283920288086, "learning_rate": 1.2991500094333065e-06, "loss": 0.0168, "step": 755725 }, { "epoch": 7.43, "grad_norm": 2.940509557723999, "learning_rate": 1.2990258869790581e-06, "loss": 0.0593, "step": 755750 }, { "epoch": 7.43, "grad_norm": 8.92876148223877, "learning_rate": 1.2989017645248098e-06, "loss": 0.0188, "step": 755775 }, { "epoch": 7.43, "grad_norm": 2.1984150409698486, "learning_rate": 1.2987776420705612e-06, "loss": 0.0523, "step": 755800 }, { "epoch": 7.43, "grad_norm": 2.2669079303741455, "learning_rate": 1.2986535196163128e-06, "loss": 0.0138, "step": 755825 }, { "epoch": 7.43, "grad_norm": 5.538407802581787, "learning_rate": 1.2985293971620645e-06, "loss": 0.0702, "step": 755850 }, { "epoch": 7.43, "grad_norm": 1.7852187156677246, "learning_rate": 1.2984052747078157e-06, "loss": 0.0404, "step": 755875 }, { "epoch": 7.43, "grad_norm": 2.56266450881958, "learning_rate": 1.2982811522535673e-06, "loss": 0.051, "step": 755900 }, { "epoch": 7.43, "grad_norm": 8.606154441833496, "learning_rate": 1.298157029799319e-06, "loss": 0.0227, "step": 755925 }, { "epoch": 7.43, "grad_norm": 2.3230559825897217, "learning_rate": 1.2980329073450704e-06, "loss": 0.0357, "step": 755950 }, { "epoch": 7.43, "grad_norm": 10.09509563446045, "learning_rate": 1.297908784890822e-06, "loss": 0.0171, "step": 755975 }, { "epoch": 7.43, "grad_norm": 5.2555317878723145, "learning_rate": 1.2977846624365736e-06, "loss": 0.0479, "step": 756000 }, { "epoch": 7.43, "grad_norm": 2.9781229496002197, "learning_rate": 1.297660539982325e-06, "loss": 0.0139, "step": 756025 }, { "epoch": 7.43, "grad_norm": 3.819070339202881, "learning_rate": 1.2975364175280767e-06, "loss": 0.0616, "step": 756050 }, { "epoch": 7.43, "grad_norm": 0.6608113050460815, "learning_rate": 1.2974122950738283e-06, "loss": 0.0224, "step": 756075 }, { "epoch": 7.43, "grad_norm": 2.8090920448303223, "learning_rate": 1.2972881726195795e-06, "loss": 0.0664, "step": 756100 }, { "epoch": 7.43, "grad_norm": 7.886991024017334, "learning_rate": 1.2971640501653312e-06, "loss": 0.0213, "step": 756125 }, { "epoch": 7.43, "grad_norm": 2.9731600284576416, "learning_rate": 1.2970399277110826e-06, "loss": 0.0598, "step": 756150 }, { "epoch": 7.43, "grad_norm": 0.24085889756679535, "learning_rate": 1.2969158052568342e-06, "loss": 0.0173, "step": 756175 }, { "epoch": 7.44, "grad_norm": 2.671292304992676, "learning_rate": 1.2967916828025859e-06, "loss": 0.0445, "step": 756200 }, { "epoch": 7.44, "grad_norm": 2.3172929286956787, "learning_rate": 1.2966675603483373e-06, "loss": 0.022, "step": 756225 }, { "epoch": 7.44, "grad_norm": 3.6511290073394775, "learning_rate": 1.296543437894089e-06, "loss": 0.0597, "step": 756250 }, { "epoch": 7.44, "grad_norm": 10.590229034423828, "learning_rate": 1.2964193154398406e-06, "loss": 0.0117, "step": 756275 }, { "epoch": 7.44, "grad_norm": 4.286077499389648, "learning_rate": 1.2962951929855918e-06, "loss": 0.0385, "step": 756300 }, { "epoch": 7.44, "grad_norm": 6.734408855438232, "learning_rate": 1.2961710705313434e-06, "loss": 0.0165, "step": 756325 }, { "epoch": 7.44, "grad_norm": 2.42551326751709, "learning_rate": 1.296046948077095e-06, "loss": 0.0581, "step": 756350 }, { "epoch": 7.44, "grad_norm": 3.7660257816314697, "learning_rate": 1.2959228256228465e-06, "loss": 0.0268, "step": 756375 }, { "epoch": 7.44, "grad_norm": 4.040659427642822, "learning_rate": 1.295798703168598e-06, "loss": 0.062, "step": 756400 }, { "epoch": 7.44, "grad_norm": 0.35814160108566284, "learning_rate": 1.2956745807143497e-06, "loss": 0.0167, "step": 756425 }, { "epoch": 7.44, "grad_norm": 4.719918251037598, "learning_rate": 1.2955504582601012e-06, "loss": 0.0433, "step": 756450 }, { "epoch": 7.44, "grad_norm": 1.920304536819458, "learning_rate": 1.2954263358058528e-06, "loss": 0.0196, "step": 756475 }, { "epoch": 7.44, "grad_norm": 3.6396186351776123, "learning_rate": 1.2953022133516044e-06, "loss": 0.0507, "step": 756500 }, { "epoch": 7.44, "grad_norm": 11.642277717590332, "learning_rate": 1.2951780908973556e-06, "loss": 0.0215, "step": 756525 }, { "epoch": 7.44, "grad_norm": 3.164841651916504, "learning_rate": 1.2950539684431073e-06, "loss": 0.0636, "step": 756550 }, { "epoch": 7.44, "grad_norm": 8.916535377502441, "learning_rate": 1.2949298459888587e-06, "loss": 0.0146, "step": 756575 }, { "epoch": 7.44, "grad_norm": 3.3033533096313477, "learning_rate": 1.2948057235346103e-06, "loss": 0.0711, "step": 756600 }, { "epoch": 7.44, "grad_norm": 0.03571299463510513, "learning_rate": 1.294681601080362e-06, "loss": 0.0229, "step": 756625 }, { "epoch": 7.44, "grad_norm": 2.4992713928222656, "learning_rate": 1.2945574786261134e-06, "loss": 0.0495, "step": 756650 }, { "epoch": 7.44, "grad_norm": 0.1389598548412323, "learning_rate": 1.294433356171865e-06, "loss": 0.0144, "step": 756675 }, { "epoch": 7.44, "grad_norm": 4.972487926483154, "learning_rate": 1.2943092337176167e-06, "loss": 0.0455, "step": 756700 }, { "epoch": 7.44, "grad_norm": 6.559937000274658, "learning_rate": 1.294185111263368e-06, "loss": 0.0179, "step": 756725 }, { "epoch": 7.44, "grad_norm": 4.118383884429932, "learning_rate": 1.2940609888091197e-06, "loss": 0.054, "step": 756750 }, { "epoch": 7.44, "grad_norm": 1.8061057329177856, "learning_rate": 1.2939368663548714e-06, "loss": 0.0114, "step": 756775 }, { "epoch": 7.44, "grad_norm": 5.984543323516846, "learning_rate": 1.2938127439006226e-06, "loss": 0.0602, "step": 756800 }, { "epoch": 7.44, "grad_norm": 16.05179214477539, "learning_rate": 1.2936886214463742e-06, "loss": 0.0173, "step": 756825 }, { "epoch": 7.44, "grad_norm": 2.8819591999053955, "learning_rate": 1.2935644989921258e-06, "loss": 0.0827, "step": 756850 }, { "epoch": 7.44, "grad_norm": 0.01770523190498352, "learning_rate": 1.2934403765378773e-06, "loss": 0.0126, "step": 756875 }, { "epoch": 7.44, "grad_norm": 40.97759246826172, "learning_rate": 1.2933162540836289e-06, "loss": 0.067, "step": 756900 }, { "epoch": 7.44, "grad_norm": 0.13498324155807495, "learning_rate": 1.2931921316293805e-06, "loss": 0.0165, "step": 756925 }, { "epoch": 7.44, "grad_norm": 3.22178053855896, "learning_rate": 1.293068009175132e-06, "loss": 0.0366, "step": 756950 }, { "epoch": 7.44, "grad_norm": 1.681357979774475, "learning_rate": 1.2929438867208836e-06, "loss": 0.0193, "step": 756975 }, { "epoch": 7.44, "grad_norm": 3.0857584476470947, "learning_rate": 1.2928197642666352e-06, "loss": 0.0849, "step": 757000 }, { "epoch": 7.44, "grad_norm": 0.2165658324956894, "learning_rate": 1.2926956418123864e-06, "loss": 0.0125, "step": 757025 }, { "epoch": 7.44, "grad_norm": 6.391250133514404, "learning_rate": 1.292571519358138e-06, "loss": 0.062, "step": 757050 }, { "epoch": 7.44, "grad_norm": 0.26705917716026306, "learning_rate": 1.2924473969038895e-06, "loss": 0.0205, "step": 757075 }, { "epoch": 7.44, "grad_norm": 5.254971981048584, "learning_rate": 1.2923232744496411e-06, "loss": 0.088, "step": 757100 }, { "epoch": 7.44, "grad_norm": 3.6486685276031494, "learning_rate": 1.2921991519953928e-06, "loss": 0.0176, "step": 757125 }, { "epoch": 7.44, "grad_norm": 2.8462564945220947, "learning_rate": 1.2920750295411442e-06, "loss": 0.0655, "step": 757150 }, { "epoch": 7.44, "grad_norm": 0.02903890050947666, "learning_rate": 1.2919509070868958e-06, "loss": 0.0219, "step": 757175 }, { "epoch": 7.44, "grad_norm": 3.0915796756744385, "learning_rate": 1.2918267846326475e-06, "loss": 0.0728, "step": 757200 }, { "epoch": 7.45, "grad_norm": 0.07937489449977875, "learning_rate": 1.2917026621783987e-06, "loss": 0.0207, "step": 757225 }, { "epoch": 7.45, "grad_norm": 2.504371166229248, "learning_rate": 1.2915785397241503e-06, "loss": 0.0364, "step": 757250 }, { "epoch": 7.45, "grad_norm": 1.7884316444396973, "learning_rate": 1.291454417269902e-06, "loss": 0.024, "step": 757275 }, { "epoch": 7.45, "grad_norm": 2.185734987258911, "learning_rate": 1.2913302948156534e-06, "loss": 0.0427, "step": 757300 }, { "epoch": 7.45, "grad_norm": 6.890895843505859, "learning_rate": 1.291206172361405e-06, "loss": 0.0234, "step": 757325 }, { "epoch": 7.45, "grad_norm": 3.6132748126983643, "learning_rate": 1.2910820499071566e-06, "loss": 0.0676, "step": 757350 }, { "epoch": 7.45, "grad_norm": 2.1955437660217285, "learning_rate": 1.290957927452908e-06, "loss": 0.0207, "step": 757375 }, { "epoch": 7.45, "grad_norm": 2.909472942352295, "learning_rate": 1.2908338049986597e-06, "loss": 0.0618, "step": 757400 }, { "epoch": 7.45, "grad_norm": 1.4586366415023804, "learning_rate": 1.2907096825444113e-06, "loss": 0.0186, "step": 757425 }, { "epoch": 7.45, "grad_norm": 3.5658607482910156, "learning_rate": 1.2905855600901625e-06, "loss": 0.0722, "step": 757450 }, { "epoch": 7.45, "grad_norm": 14.917112350463867, "learning_rate": 1.2904614376359142e-06, "loss": 0.0279, "step": 757475 }, { "epoch": 7.45, "grad_norm": 2.8430960178375244, "learning_rate": 1.2903373151816656e-06, "loss": 0.0432, "step": 757500 }, { "epoch": 7.45, "grad_norm": 0.6543266177177429, "learning_rate": 1.2902131927274172e-06, "loss": 0.0136, "step": 757525 }, { "epoch": 7.45, "grad_norm": 2.659770965576172, "learning_rate": 1.2900890702731689e-06, "loss": 0.0463, "step": 757550 }, { "epoch": 7.45, "grad_norm": 1.6234475374221802, "learning_rate": 1.2899649478189203e-06, "loss": 0.0105, "step": 757575 }, { "epoch": 7.45, "grad_norm": 3.7806406021118164, "learning_rate": 1.289840825364672e-06, "loss": 0.0716, "step": 757600 }, { "epoch": 7.45, "grad_norm": 0.1760006695985794, "learning_rate": 1.2897167029104236e-06, "loss": 0.0227, "step": 757625 }, { "epoch": 7.45, "grad_norm": 3.3173515796661377, "learning_rate": 1.2895925804561748e-06, "loss": 0.0626, "step": 757650 }, { "epoch": 7.45, "grad_norm": 2.01172137260437, "learning_rate": 1.2894684580019264e-06, "loss": 0.0302, "step": 757675 }, { "epoch": 7.45, "grad_norm": 5.046072006225586, "learning_rate": 1.289344335547678e-06, "loss": 0.0396, "step": 757700 }, { "epoch": 7.45, "grad_norm": 4.586207389831543, "learning_rate": 1.2892202130934295e-06, "loss": 0.0223, "step": 757725 }, { "epoch": 7.45, "grad_norm": 3.262488842010498, "learning_rate": 1.289096090639181e-06, "loss": 0.0533, "step": 757750 }, { "epoch": 7.45, "grad_norm": 1.1553117036819458, "learning_rate": 1.2889719681849327e-06, "loss": 0.0128, "step": 757775 }, { "epoch": 7.45, "grad_norm": 4.47023868560791, "learning_rate": 1.2888478457306842e-06, "loss": 0.0728, "step": 757800 }, { "epoch": 7.45, "grad_norm": 6.048096656799316, "learning_rate": 1.2887237232764358e-06, "loss": 0.0186, "step": 757825 }, { "epoch": 7.45, "grad_norm": 3.4411914348602295, "learning_rate": 1.2885996008221874e-06, "loss": 0.0671, "step": 757850 }, { "epoch": 7.45, "grad_norm": 0.2513507306575775, "learning_rate": 1.2884754783679386e-06, "loss": 0.0186, "step": 757875 }, { "epoch": 7.45, "grad_norm": 2.945676803588867, "learning_rate": 1.2883513559136903e-06, "loss": 0.0606, "step": 757900 }, { "epoch": 7.45, "grad_norm": 3.240070104598999, "learning_rate": 1.2882272334594417e-06, "loss": 0.0225, "step": 757925 }, { "epoch": 7.45, "grad_norm": 4.865067005157471, "learning_rate": 1.2881031110051933e-06, "loss": 0.066, "step": 757950 }, { "epoch": 7.45, "grad_norm": 0.5305169224739075, "learning_rate": 1.287978988550945e-06, "loss": 0.0151, "step": 757975 }, { "epoch": 7.45, "grad_norm": 2.9760184288024902, "learning_rate": 1.2878548660966964e-06, "loss": 0.0773, "step": 758000 }, { "epoch": 7.45, "grad_norm": 0.5559650659561157, "learning_rate": 1.287730743642448e-06, "loss": 0.0378, "step": 758025 }, { "epoch": 7.45, "grad_norm": 2.308629035949707, "learning_rate": 1.2876066211881997e-06, "loss": 0.0523, "step": 758050 }, { "epoch": 7.45, "grad_norm": 3.111820697784424, "learning_rate": 1.287482498733951e-06, "loss": 0.0136, "step": 758075 }, { "epoch": 7.45, "grad_norm": 2.4387428760528564, "learning_rate": 1.2873583762797027e-06, "loss": 0.0685, "step": 758100 }, { "epoch": 7.45, "grad_norm": 0.06880086660385132, "learning_rate": 1.2872342538254543e-06, "loss": 0.0211, "step": 758125 }, { "epoch": 7.45, "grad_norm": 1.4145954847335815, "learning_rate": 1.2871101313712056e-06, "loss": 0.0557, "step": 758150 }, { "epoch": 7.45, "grad_norm": 11.283943176269531, "learning_rate": 1.2869860089169572e-06, "loss": 0.0173, "step": 758175 }, { "epoch": 7.45, "grad_norm": 2.0878987312316895, "learning_rate": 1.2868618864627088e-06, "loss": 0.0789, "step": 758200 }, { "epoch": 7.45, "grad_norm": 0.8418247699737549, "learning_rate": 1.2867377640084603e-06, "loss": 0.0184, "step": 758225 }, { "epoch": 7.46, "grad_norm": 2.334571361541748, "learning_rate": 1.2866136415542119e-06, "loss": 0.0705, "step": 758250 }, { "epoch": 7.46, "grad_norm": 1.0275059938430786, "learning_rate": 1.2864895190999635e-06, "loss": 0.0146, "step": 758275 }, { "epoch": 7.46, "grad_norm": 2.7752251625061035, "learning_rate": 1.286365396645715e-06, "loss": 0.038, "step": 758300 }, { "epoch": 7.46, "grad_norm": 0.5089795589447021, "learning_rate": 1.2862412741914666e-06, "loss": 0.0207, "step": 758325 }, { "epoch": 7.46, "grad_norm": 6.230045795440674, "learning_rate": 1.2861171517372178e-06, "loss": 0.0668, "step": 758350 }, { "epoch": 7.46, "grad_norm": 0.22406171262264252, "learning_rate": 1.2859930292829694e-06, "loss": 0.0263, "step": 758375 }, { "epoch": 7.46, "grad_norm": 4.148075580596924, "learning_rate": 1.285868906828721e-06, "loss": 0.0494, "step": 758400 }, { "epoch": 7.46, "grad_norm": 10.12962532043457, "learning_rate": 1.2857447843744725e-06, "loss": 0.0198, "step": 758425 }, { "epoch": 7.46, "grad_norm": 7.62822961807251, "learning_rate": 1.2856206619202241e-06, "loss": 0.061, "step": 758450 }, { "epoch": 7.46, "grad_norm": 0.13905958831310272, "learning_rate": 1.2854965394659758e-06, "loss": 0.0128, "step": 758475 }, { "epoch": 7.46, "grad_norm": 2.986755132675171, "learning_rate": 1.2853724170117272e-06, "loss": 0.063, "step": 758500 }, { "epoch": 7.46, "grad_norm": 3.6705896854400635, "learning_rate": 1.2852482945574788e-06, "loss": 0.0366, "step": 758525 }, { "epoch": 7.46, "grad_norm": 5.629089832305908, "learning_rate": 1.2851241721032304e-06, "loss": 0.0572, "step": 758550 }, { "epoch": 7.46, "grad_norm": 0.09577088803052902, "learning_rate": 1.2850000496489817e-06, "loss": 0.0122, "step": 758575 }, { "epoch": 7.46, "grad_norm": 1.9348063468933105, "learning_rate": 1.2848759271947333e-06, "loss": 0.0629, "step": 758600 }, { "epoch": 7.46, "grad_norm": 0.7614064812660217, "learning_rate": 1.284751804740485e-06, "loss": 0.0211, "step": 758625 }, { "epoch": 7.46, "grad_norm": 5.8320441246032715, "learning_rate": 1.2846276822862364e-06, "loss": 0.0539, "step": 758650 }, { "epoch": 7.46, "grad_norm": 0.8445900082588196, "learning_rate": 1.284503559831988e-06, "loss": 0.02, "step": 758675 }, { "epoch": 7.46, "grad_norm": 2.7486915588378906, "learning_rate": 1.2843794373777396e-06, "loss": 0.0511, "step": 758700 }, { "epoch": 7.46, "grad_norm": 1.8480889797210693, "learning_rate": 1.284255314923491e-06, "loss": 0.0235, "step": 758725 }, { "epoch": 7.46, "grad_norm": 3.375227451324463, "learning_rate": 1.2841311924692427e-06, "loss": 0.0606, "step": 758750 }, { "epoch": 7.46, "grad_norm": 1.1892271041870117, "learning_rate": 1.2840070700149939e-06, "loss": 0.0083, "step": 758775 }, { "epoch": 7.46, "grad_norm": 4.999575614929199, "learning_rate": 1.2838829475607455e-06, "loss": 0.0546, "step": 758800 }, { "epoch": 7.46, "grad_norm": 8.185867309570312, "learning_rate": 1.2837588251064972e-06, "loss": 0.0126, "step": 758825 }, { "epoch": 7.46, "grad_norm": 5.2284440994262695, "learning_rate": 1.2836347026522486e-06, "loss": 0.0658, "step": 758850 }, { "epoch": 7.46, "grad_norm": 9.012045860290527, "learning_rate": 1.2835105801980002e-06, "loss": 0.017, "step": 758875 }, { "epoch": 7.46, "grad_norm": 4.015483856201172, "learning_rate": 1.2833864577437519e-06, "loss": 0.054, "step": 758900 }, { "epoch": 7.46, "grad_norm": 0.2507995069026947, "learning_rate": 1.2832623352895033e-06, "loss": 0.0222, "step": 758925 }, { "epoch": 7.46, "grad_norm": 2.750129222869873, "learning_rate": 1.283138212835255e-06, "loss": 0.0746, "step": 758950 }, { "epoch": 7.46, "grad_norm": 10.926107406616211, "learning_rate": 1.2830140903810065e-06, "loss": 0.0236, "step": 758975 }, { "epoch": 7.46, "grad_norm": 4.148299694061279, "learning_rate": 1.282894932824928e-06, "loss": 0.0605, "step": 759000 }, { "epoch": 7.46, "grad_norm": 0.18938525021076202, "learning_rate": 1.2827708103706792e-06, "loss": 0.0171, "step": 759025 }, { "epoch": 7.46, "grad_norm": 2.402782678604126, "learning_rate": 1.2826466879164308e-06, "loss": 0.0577, "step": 759050 }, { "epoch": 7.46, "grad_norm": 1.8884764909744263, "learning_rate": 1.2825225654621825e-06, "loss": 0.0211, "step": 759075 }, { "epoch": 7.46, "grad_norm": 2.5654520988464355, "learning_rate": 1.282398443007934e-06, "loss": 0.0556, "step": 759100 }, { "epoch": 7.46, "grad_norm": 5.322185039520264, "learning_rate": 1.2822743205536855e-06, "loss": 0.0275, "step": 759125 }, { "epoch": 7.46, "grad_norm": 4.708038806915283, "learning_rate": 1.2821501980994372e-06, "loss": 0.0471, "step": 759150 }, { "epoch": 7.46, "grad_norm": 1.86948561668396, "learning_rate": 1.2820260756451886e-06, "loss": 0.0211, "step": 759175 }, { "epoch": 7.46, "grad_norm": 3.5937328338623047, "learning_rate": 1.2819019531909402e-06, "loss": 0.0618, "step": 759200 }, { "epoch": 7.46, "grad_norm": 0.4612032175064087, "learning_rate": 1.2817778307366919e-06, "loss": 0.0229, "step": 759225 }, { "epoch": 7.47, "grad_norm": 4.467270851135254, "learning_rate": 1.281653708282443e-06, "loss": 0.0732, "step": 759250 }, { "epoch": 7.47, "grad_norm": 5.428332805633545, "learning_rate": 1.2815295858281947e-06, "loss": 0.0188, "step": 759275 }, { "epoch": 7.47, "grad_norm": 2.8888769149780273, "learning_rate": 1.2814054633739461e-06, "loss": 0.0412, "step": 759300 }, { "epoch": 7.47, "grad_norm": 2.194640636444092, "learning_rate": 1.2812813409196978e-06, "loss": 0.0149, "step": 759325 }, { "epoch": 7.47, "grad_norm": 3.1147730350494385, "learning_rate": 1.2811572184654494e-06, "loss": 0.0688, "step": 759350 }, { "epoch": 7.47, "grad_norm": 0.46292993426322937, "learning_rate": 1.2810330960112008e-06, "loss": 0.0155, "step": 759375 }, { "epoch": 7.47, "grad_norm": 4.061058521270752, "learning_rate": 1.2809089735569525e-06, "loss": 0.0504, "step": 759400 }, { "epoch": 7.47, "grad_norm": 0.4345809817314148, "learning_rate": 1.280784851102704e-06, "loss": 0.0217, "step": 759425 }, { "epoch": 7.47, "grad_norm": 2.092421531677246, "learning_rate": 1.2806607286484555e-06, "loss": 0.0579, "step": 759450 }, { "epoch": 7.47, "grad_norm": 0.5229388475418091, "learning_rate": 1.2805366061942072e-06, "loss": 0.0168, "step": 759475 }, { "epoch": 7.47, "grad_norm": 3.2848739624023438, "learning_rate": 1.2804124837399588e-06, "loss": 0.0553, "step": 759500 }, { "epoch": 7.47, "grad_norm": 1.7426021099090576, "learning_rate": 1.28028836128571e-06, "loss": 0.0284, "step": 759525 }, { "epoch": 7.47, "grad_norm": 2.6346356868743896, "learning_rate": 1.2801642388314616e-06, "loss": 0.038, "step": 759550 }, { "epoch": 7.47, "grad_norm": 4.7201056480407715, "learning_rate": 1.2800401163772133e-06, "loss": 0.0325, "step": 759575 }, { "epoch": 7.47, "grad_norm": 2.8601651191711426, "learning_rate": 1.2799159939229647e-06, "loss": 0.0596, "step": 759600 }, { "epoch": 7.47, "grad_norm": 0.29263225197792053, "learning_rate": 1.2797918714687163e-06, "loss": 0.025, "step": 759625 }, { "epoch": 7.47, "grad_norm": 2.615981340408325, "learning_rate": 1.279667749014468e-06, "loss": 0.0491, "step": 759650 }, { "epoch": 7.47, "grad_norm": 6.667312145233154, "learning_rate": 1.2795436265602194e-06, "loss": 0.0277, "step": 759675 }, { "epoch": 7.47, "grad_norm": 3.1073803901672363, "learning_rate": 1.279419504105971e-06, "loss": 0.0474, "step": 759700 }, { "epoch": 7.47, "grad_norm": 1.1049168109893799, "learning_rate": 1.2792953816517222e-06, "loss": 0.0121, "step": 759725 }, { "epoch": 7.47, "grad_norm": 2.952869176864624, "learning_rate": 1.2791712591974739e-06, "loss": 0.0521, "step": 759750 }, { "epoch": 7.47, "grad_norm": 1.801936388015747, "learning_rate": 1.2790471367432255e-06, "loss": 0.0154, "step": 759775 }, { "epoch": 7.47, "grad_norm": 3.84340500831604, "learning_rate": 1.278923014288977e-06, "loss": 0.0458, "step": 759800 }, { "epoch": 7.47, "grad_norm": 11.071475982666016, "learning_rate": 1.2787988918347286e-06, "loss": 0.0284, "step": 759825 }, { "epoch": 7.47, "grad_norm": 5.204946517944336, "learning_rate": 1.2786747693804802e-06, "loss": 0.0578, "step": 759850 }, { "epoch": 7.47, "grad_norm": 0.17323777079582214, "learning_rate": 1.2785506469262316e-06, "loss": 0.0184, "step": 759875 }, { "epoch": 7.47, "grad_norm": 3.6830246448516846, "learning_rate": 1.2784265244719833e-06, "loss": 0.0583, "step": 759900 }, { "epoch": 7.47, "grad_norm": 0.37094053626060486, "learning_rate": 1.278302402017735e-06, "loss": 0.0272, "step": 759925 }, { "epoch": 7.47, "grad_norm": 2.3231730461120605, "learning_rate": 1.278178279563486e-06, "loss": 0.0753, "step": 759950 }, { "epoch": 7.47, "grad_norm": 0.3329813778400421, "learning_rate": 1.2780541571092377e-06, "loss": 0.0173, "step": 759975 }, { "epoch": 7.47, "grad_norm": 2.5397982597351074, "learning_rate": 1.2779300346549894e-06, "loss": 0.0791, "step": 760000 }, { "epoch": 7.47, "eval_loss": 0.8989831805229187, "eval_runtime": 6161.2213, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.192, "eval_wer": 0.11221270842721946, "step": 760000 }, { "epoch": 7.47, "grad_norm": 1.2675672769546509, "learning_rate": 1.2778059122007408e-06, "loss": 0.0288, "step": 760025 }, { "epoch": 7.47, "grad_norm": 3.7806813716888428, "learning_rate": 1.2776817897464924e-06, "loss": 0.0472, "step": 760050 }, { "epoch": 7.47, "grad_norm": 0.36760663986206055, "learning_rate": 1.277557667292244e-06, "loss": 0.0179, "step": 760075 }, { "epoch": 7.47, "grad_norm": 3.8918228149414062, "learning_rate": 1.2774335448379955e-06, "loss": 0.0633, "step": 760100 }, { "epoch": 7.47, "grad_norm": 18.738285064697266, "learning_rate": 1.2773094223837471e-06, "loss": 0.0291, "step": 760125 }, { "epoch": 7.47, "grad_norm": 3.2733354568481445, "learning_rate": 1.2771852999294983e-06, "loss": 0.0488, "step": 760150 }, { "epoch": 7.47, "grad_norm": 0.04425923153758049, "learning_rate": 1.27706117747525e-06, "loss": 0.0127, "step": 760175 }, { "epoch": 7.47, "grad_norm": 1.7188912630081177, "learning_rate": 1.2769370550210016e-06, "loss": 0.0437, "step": 760200 }, { "epoch": 7.47, "grad_norm": 10.81671142578125, "learning_rate": 1.276812932566753e-06, "loss": 0.0181, "step": 760225 }, { "epoch": 7.47, "grad_norm": 4.656999588012695, "learning_rate": 1.2766888101125047e-06, "loss": 0.0621, "step": 760250 }, { "epoch": 7.48, "grad_norm": 10.330126762390137, "learning_rate": 1.2765646876582563e-06, "loss": 0.0241, "step": 760275 }, { "epoch": 7.48, "grad_norm": 4.219662666320801, "learning_rate": 1.2764405652040077e-06, "loss": 0.0655, "step": 760300 }, { "epoch": 7.48, "grad_norm": 0.16583473980426788, "learning_rate": 1.2763164427497594e-06, "loss": 0.0192, "step": 760325 }, { "epoch": 7.48, "grad_norm": 2.7840006351470947, "learning_rate": 1.276192320295511e-06, "loss": 0.0538, "step": 760350 }, { "epoch": 7.48, "grad_norm": 7.185707092285156, "learning_rate": 1.2760681978412622e-06, "loss": 0.0224, "step": 760375 }, { "epoch": 7.48, "grad_norm": 5.892234802246094, "learning_rate": 1.2759440753870138e-06, "loss": 0.06, "step": 760400 }, { "epoch": 7.48, "grad_norm": 0.07755271345376968, "learning_rate": 1.2758199529327655e-06, "loss": 0.0152, "step": 760425 }, { "epoch": 7.48, "grad_norm": 2.6727418899536133, "learning_rate": 1.275695830478517e-06, "loss": 0.0449, "step": 760450 }, { "epoch": 7.48, "grad_norm": 11.070197105407715, "learning_rate": 1.2755717080242685e-06, "loss": 0.0159, "step": 760475 }, { "epoch": 7.48, "grad_norm": 1.5974596738815308, "learning_rate": 1.2754475855700202e-06, "loss": 0.069, "step": 760500 }, { "epoch": 7.48, "grad_norm": 2.3288350105285645, "learning_rate": 1.2753234631157716e-06, "loss": 0.0178, "step": 760525 }, { "epoch": 7.48, "grad_norm": 4.194362640380859, "learning_rate": 1.2751993406615232e-06, "loss": 0.045, "step": 760550 }, { "epoch": 7.48, "grad_norm": 0.44864508509635925, "learning_rate": 1.2750752182072744e-06, "loss": 0.0257, "step": 760575 }, { "epoch": 7.48, "grad_norm": 3.9630134105682373, "learning_rate": 1.274951095753026e-06, "loss": 0.0557, "step": 760600 }, { "epoch": 7.48, "grad_norm": 10.431435585021973, "learning_rate": 1.2748269732987777e-06, "loss": 0.018, "step": 760625 }, { "epoch": 7.48, "grad_norm": 1.9608309268951416, "learning_rate": 1.2747028508445291e-06, "loss": 0.0572, "step": 760650 }, { "epoch": 7.48, "grad_norm": 3.779146909713745, "learning_rate": 1.2745787283902808e-06, "loss": 0.0287, "step": 760675 }, { "epoch": 7.48, "grad_norm": 2.4684040546417236, "learning_rate": 1.2744546059360324e-06, "loss": 0.0559, "step": 760700 }, { "epoch": 7.48, "grad_norm": 3.4207191467285156, "learning_rate": 1.2743304834817838e-06, "loss": 0.0238, "step": 760725 }, { "epoch": 7.48, "grad_norm": 2.1300535202026367, "learning_rate": 1.2742063610275355e-06, "loss": 0.0631, "step": 760750 }, { "epoch": 7.48, "grad_norm": 1.576987385749817, "learning_rate": 1.274082238573287e-06, "loss": 0.028, "step": 760775 }, { "epoch": 7.48, "grad_norm": 4.011472225189209, "learning_rate": 1.2739581161190385e-06, "loss": 0.057, "step": 760800 }, { "epoch": 7.48, "grad_norm": 0.545913815498352, "learning_rate": 1.27383399366479e-06, "loss": 0.0196, "step": 760825 }, { "epoch": 7.48, "grad_norm": 1.9424673318862915, "learning_rate": 1.2737098712105416e-06, "loss": 0.0433, "step": 760850 }, { "epoch": 7.48, "grad_norm": 0.7190180420875549, "learning_rate": 1.273585748756293e-06, "loss": 0.0171, "step": 760875 }, { "epoch": 7.48, "grad_norm": 3.09889817237854, "learning_rate": 1.2734616263020446e-06, "loss": 0.0707, "step": 760900 }, { "epoch": 7.48, "grad_norm": 3.1458396911621094, "learning_rate": 1.2733375038477963e-06, "loss": 0.0188, "step": 760925 }, { "epoch": 7.48, "grad_norm": 3.1453819274902344, "learning_rate": 1.2732133813935477e-06, "loss": 0.0466, "step": 760950 }, { "epoch": 7.48, "grad_norm": 0.07688786089420319, "learning_rate": 1.2730892589392993e-06, "loss": 0.0261, "step": 760975 }, { "epoch": 7.48, "grad_norm": 3.88810133934021, "learning_rate": 1.272965136485051e-06, "loss": 0.0863, "step": 761000 }, { "epoch": 7.48, "grad_norm": 0.1679755300283432, "learning_rate": 1.2728410140308024e-06, "loss": 0.0167, "step": 761025 }, { "epoch": 7.48, "grad_norm": 1.5309336185455322, "learning_rate": 1.272716891576554e-06, "loss": 0.0467, "step": 761050 }, { "epoch": 7.48, "grad_norm": 0.6575078964233398, "learning_rate": 1.2725927691223052e-06, "loss": 0.0199, "step": 761075 }, { "epoch": 7.48, "grad_norm": 8.225763320922852, "learning_rate": 1.2724686466680569e-06, "loss": 0.0728, "step": 761100 }, { "epoch": 7.48, "grad_norm": 2.2910923957824707, "learning_rate": 1.2723445242138085e-06, "loss": 0.0165, "step": 761125 }, { "epoch": 7.48, "grad_norm": 4.118120193481445, "learning_rate": 1.27222040175956e-06, "loss": 0.0565, "step": 761150 }, { "epoch": 7.48, "grad_norm": 0.6982630491256714, "learning_rate": 1.2720962793053116e-06, "loss": 0.0245, "step": 761175 }, { "epoch": 7.48, "grad_norm": 1.901121973991394, "learning_rate": 1.2719721568510632e-06, "loss": 0.068, "step": 761200 }, { "epoch": 7.48, "grad_norm": 2.9554805755615234, "learning_rate": 1.2718480343968146e-06, "loss": 0.0205, "step": 761225 }, { "epoch": 7.48, "grad_norm": 6.464521408081055, "learning_rate": 1.2717239119425663e-06, "loss": 0.051, "step": 761250 }, { "epoch": 7.48, "grad_norm": 1.931984782218933, "learning_rate": 1.2715997894883179e-06, "loss": 0.0241, "step": 761275 }, { "epoch": 7.49, "grad_norm": 2.3345019817352295, "learning_rate": 1.271475667034069e-06, "loss": 0.0525, "step": 761300 }, { "epoch": 7.49, "grad_norm": 3.3168652057647705, "learning_rate": 1.2713515445798207e-06, "loss": 0.0174, "step": 761325 }, { "epoch": 7.49, "grad_norm": 8.82376480102539, "learning_rate": 1.2712274221255724e-06, "loss": 0.0582, "step": 761350 }, { "epoch": 7.49, "grad_norm": 2.568298101425171, "learning_rate": 1.2711032996713238e-06, "loss": 0.0272, "step": 761375 }, { "epoch": 7.49, "grad_norm": 5.662960529327393, "learning_rate": 1.2709791772170754e-06, "loss": 0.0416, "step": 761400 }, { "epoch": 7.49, "grad_norm": 6.146183013916016, "learning_rate": 1.270855054762827e-06, "loss": 0.0177, "step": 761425 }, { "epoch": 7.49, "grad_norm": 3.293382167816162, "learning_rate": 1.2707309323085785e-06, "loss": 0.0567, "step": 761450 }, { "epoch": 7.49, "grad_norm": 1.538264513015747, "learning_rate": 1.2706068098543301e-06, "loss": 0.0159, "step": 761475 }, { "epoch": 7.49, "grad_norm": 3.0394890308380127, "learning_rate": 1.2704826874000813e-06, "loss": 0.0502, "step": 761500 }, { "epoch": 7.49, "grad_norm": 1.5386954545974731, "learning_rate": 1.270358564945833e-06, "loss": 0.0284, "step": 761525 }, { "epoch": 7.49, "grad_norm": 4.136843681335449, "learning_rate": 1.2702344424915846e-06, "loss": 0.0617, "step": 761550 }, { "epoch": 7.49, "grad_norm": 0.971339762210846, "learning_rate": 1.270110320037336e-06, "loss": 0.0209, "step": 761575 }, { "epoch": 7.49, "grad_norm": 3.8707058429718018, "learning_rate": 1.2699861975830877e-06, "loss": 0.0484, "step": 761600 }, { "epoch": 7.49, "grad_norm": 5.804393291473389, "learning_rate": 1.2698620751288393e-06, "loss": 0.0131, "step": 761625 }, { "epoch": 7.49, "grad_norm": 2.142302989959717, "learning_rate": 1.2697379526745907e-06, "loss": 0.0483, "step": 761650 }, { "epoch": 7.49, "grad_norm": 2.57299542427063, "learning_rate": 1.2696138302203424e-06, "loss": 0.0188, "step": 761675 }, { "epoch": 7.49, "grad_norm": 3.922818660736084, "learning_rate": 1.269489707766094e-06, "loss": 0.0513, "step": 761700 }, { "epoch": 7.49, "grad_norm": 9.348496437072754, "learning_rate": 1.2693655853118452e-06, "loss": 0.022, "step": 761725 }, { "epoch": 7.49, "grad_norm": 1.7000035047531128, "learning_rate": 1.2692414628575968e-06, "loss": 0.0653, "step": 761750 }, { "epoch": 7.49, "grad_norm": 1.7098252773284912, "learning_rate": 1.2691173404033485e-06, "loss": 0.0108, "step": 761775 }, { "epoch": 7.49, "grad_norm": 4.968264579772949, "learning_rate": 1.2689932179490999e-06, "loss": 0.0517, "step": 761800 }, { "epoch": 7.49, "grad_norm": 0.22234153747558594, "learning_rate": 1.2688690954948515e-06, "loss": 0.0118, "step": 761825 }, { "epoch": 7.49, "grad_norm": 3.7105343341827393, "learning_rate": 1.2687449730406032e-06, "loss": 0.0499, "step": 761850 }, { "epoch": 7.49, "grad_norm": 0.10645300894975662, "learning_rate": 1.2686208505863546e-06, "loss": 0.0374, "step": 761875 }, { "epoch": 7.49, "grad_norm": 3.6608593463897705, "learning_rate": 1.2684967281321062e-06, "loss": 0.0671, "step": 761900 }, { "epoch": 7.49, "grad_norm": 4.367428779602051, "learning_rate": 1.2683726056778574e-06, "loss": 0.0143, "step": 761925 }, { "epoch": 7.49, "grad_norm": 2.3888304233551025, "learning_rate": 1.268248483223609e-06, "loss": 0.0541, "step": 761950 }, { "epoch": 7.49, "grad_norm": 0.2938309609889984, "learning_rate": 1.2681243607693607e-06, "loss": 0.0202, "step": 761975 }, { "epoch": 7.49, "grad_norm": 2.7626516819000244, "learning_rate": 1.2680002383151121e-06, "loss": 0.0566, "step": 762000 }, { "epoch": 7.49, "grad_norm": 2.3780808448791504, "learning_rate": 1.2678761158608638e-06, "loss": 0.023, "step": 762025 }, { "epoch": 7.49, "grad_norm": 1.3811688423156738, "learning_rate": 1.2677519934066154e-06, "loss": 0.0554, "step": 762050 }, { "epoch": 7.49, "grad_norm": 0.904899001121521, "learning_rate": 1.2676278709523668e-06, "loss": 0.0173, "step": 762075 }, { "epoch": 7.49, "grad_norm": 4.041590213775635, "learning_rate": 1.2675037484981185e-06, "loss": 0.0721, "step": 762100 }, { "epoch": 7.49, "grad_norm": 0.32614973187446594, "learning_rate": 1.26737962604387e-06, "loss": 0.0238, "step": 762125 }, { "epoch": 7.49, "grad_norm": 1.8050880432128906, "learning_rate": 1.2672555035896213e-06, "loss": 0.0578, "step": 762150 }, { "epoch": 7.49, "grad_norm": 0.04961396008729935, "learning_rate": 1.267131381135373e-06, "loss": 0.0168, "step": 762175 }, { "epoch": 7.49, "grad_norm": 2.5327298641204834, "learning_rate": 1.2670072586811246e-06, "loss": 0.0435, "step": 762200 }, { "epoch": 7.49, "grad_norm": 3.622424840927124, "learning_rate": 1.266883136226876e-06, "loss": 0.0203, "step": 762225 }, { "epoch": 7.49, "grad_norm": 3.140659809112549, "learning_rate": 1.2667590137726276e-06, "loss": 0.0539, "step": 762250 }, { "epoch": 7.49, "grad_norm": 1.91055166721344, "learning_rate": 1.2666348913183793e-06, "loss": 0.0197, "step": 762275 }, { "epoch": 7.5, "grad_norm": 2.9120380878448486, "learning_rate": 1.2665107688641307e-06, "loss": 0.0594, "step": 762300 }, { "epoch": 7.5, "grad_norm": 1.772578477859497, "learning_rate": 1.2663866464098823e-06, "loss": 0.0147, "step": 762325 }, { "epoch": 7.5, "grad_norm": 3.5102338790893555, "learning_rate": 1.2662625239556337e-06, "loss": 0.0689, "step": 762350 }, { "epoch": 7.5, "grad_norm": 0.5148151516914368, "learning_rate": 1.2661384015013854e-06, "loss": 0.0154, "step": 762375 }, { "epoch": 7.5, "grad_norm": 5.52614688873291, "learning_rate": 1.266014279047137e-06, "loss": 0.0793, "step": 762400 }, { "epoch": 7.5, "grad_norm": 8.338818550109863, "learning_rate": 1.2658901565928882e-06, "loss": 0.0158, "step": 762425 }, { "epoch": 7.5, "grad_norm": 3.193574905395508, "learning_rate": 1.2657660341386399e-06, "loss": 0.0757, "step": 762450 }, { "epoch": 7.5, "grad_norm": 1.0827136039733887, "learning_rate": 1.2656419116843915e-06, "loss": 0.0194, "step": 762475 }, { "epoch": 7.5, "grad_norm": 3.624732494354248, "learning_rate": 1.265522754128313e-06, "loss": 0.0494, "step": 762500 }, { "epoch": 7.5, "grad_norm": 3.7051265239715576, "learning_rate": 1.2653986316740644e-06, "loss": 0.0187, "step": 762525 }, { "epoch": 7.5, "grad_norm": 3.890045166015625, "learning_rate": 1.265274509219816e-06, "loss": 0.0624, "step": 762550 }, { "epoch": 7.5, "grad_norm": 9.817095756530762, "learning_rate": 1.2651503867655676e-06, "loss": 0.0188, "step": 762575 }, { "epoch": 7.5, "grad_norm": 2.425462484359741, "learning_rate": 1.265026264311319e-06, "loss": 0.0573, "step": 762600 }, { "epoch": 7.5, "grad_norm": 0.0666879415512085, "learning_rate": 1.2649021418570707e-06, "loss": 0.0193, "step": 762625 }, { "epoch": 7.5, "grad_norm": 6.939404010772705, "learning_rate": 1.2647780194028223e-06, "loss": 0.0524, "step": 762650 }, { "epoch": 7.5, "grad_norm": 0.03384080156683922, "learning_rate": 1.2646538969485735e-06, "loss": 0.0158, "step": 762675 }, { "epoch": 7.5, "grad_norm": 6.113892555236816, "learning_rate": 1.2645297744943252e-06, "loss": 0.0486, "step": 762700 }, { "epoch": 7.5, "grad_norm": 0.1148962527513504, "learning_rate": 1.2644056520400768e-06, "loss": 0.0251, "step": 762725 }, { "epoch": 7.5, "grad_norm": 6.3615546226501465, "learning_rate": 1.2642815295858282e-06, "loss": 0.0656, "step": 762750 }, { "epoch": 7.5, "grad_norm": 0.24267283082008362, "learning_rate": 1.2641574071315799e-06, "loss": 0.0196, "step": 762775 }, { "epoch": 7.5, "grad_norm": 3.3915483951568604, "learning_rate": 1.2640332846773315e-06, "loss": 0.0489, "step": 762800 }, { "epoch": 7.5, "grad_norm": 3.6217308044433594, "learning_rate": 1.263909162223083e-06, "loss": 0.0138, "step": 762825 }, { "epoch": 7.5, "grad_norm": 3.5000221729278564, "learning_rate": 1.2637850397688346e-06, "loss": 0.0425, "step": 762850 }, { "epoch": 7.5, "grad_norm": 0.28430595993995667, "learning_rate": 1.2636609173145858e-06, "loss": 0.0253, "step": 762875 }, { "epoch": 7.5, "grad_norm": 2.7584569454193115, "learning_rate": 1.2635367948603374e-06, "loss": 0.0802, "step": 762900 }, { "epoch": 7.5, "grad_norm": 2.7821054458618164, "learning_rate": 1.263412672406089e-06, "loss": 0.0192, "step": 762925 }, { "epoch": 7.5, "grad_norm": 5.242539882659912, "learning_rate": 1.2632885499518405e-06, "loss": 0.0834, "step": 762950 }, { "epoch": 7.5, "grad_norm": 8.758655548095703, "learning_rate": 1.2631644274975921e-06, "loss": 0.0284, "step": 762975 }, { "epoch": 7.5, "grad_norm": 3.0200467109680176, "learning_rate": 1.2630403050433437e-06, "loss": 0.0417, "step": 763000 }, { "epoch": 7.5, "grad_norm": 3.169008255004883, "learning_rate": 1.2629161825890952e-06, "loss": 0.018, "step": 763025 }, { "epoch": 7.5, "grad_norm": 2.3172402381896973, "learning_rate": 1.2627920601348468e-06, "loss": 0.0592, "step": 763050 }, { "epoch": 7.5, "grad_norm": 1.7039624452590942, "learning_rate": 1.2626679376805984e-06, "loss": 0.0193, "step": 763075 }, { "epoch": 7.5, "grad_norm": 3.3748347759246826, "learning_rate": 1.2625438152263496e-06, "loss": 0.0444, "step": 763100 }, { "epoch": 7.5, "grad_norm": 0.3758859932422638, "learning_rate": 1.2624196927721013e-06, "loss": 0.0266, "step": 763125 }, { "epoch": 7.5, "grad_norm": 4.305924415588379, "learning_rate": 1.262295570317853e-06, "loss": 0.051, "step": 763150 }, { "epoch": 7.5, "grad_norm": 0.8373677730560303, "learning_rate": 1.2621714478636043e-06, "loss": 0.0198, "step": 763175 }, { "epoch": 7.5, "grad_norm": 2.8497769832611084, "learning_rate": 1.262047325409356e-06, "loss": 0.0557, "step": 763200 }, { "epoch": 7.5, "grad_norm": 5.511923789978027, "learning_rate": 1.2619232029551076e-06, "loss": 0.0137, "step": 763225 }, { "epoch": 7.5, "grad_norm": 4.359581470489502, "learning_rate": 1.261799080500859e-06, "loss": 0.0686, "step": 763250 }, { "epoch": 7.5, "grad_norm": 5.914535999298096, "learning_rate": 1.2616749580466107e-06, "loss": 0.021, "step": 763275 }, { "epoch": 7.5, "grad_norm": 3.8534860610961914, "learning_rate": 1.2615508355923619e-06, "loss": 0.0391, "step": 763300 }, { "epoch": 7.51, "grad_norm": 7.714034557342529, "learning_rate": 1.2614267131381135e-06, "loss": 0.0234, "step": 763325 }, { "epoch": 7.51, "grad_norm": 3.7814602851867676, "learning_rate": 1.2613025906838651e-06, "loss": 0.0655, "step": 763350 }, { "epoch": 7.51, "grad_norm": 2.009857177734375, "learning_rate": 1.2611784682296166e-06, "loss": 0.0206, "step": 763375 }, { "epoch": 7.51, "grad_norm": 5.705352306365967, "learning_rate": 1.2610543457753682e-06, "loss": 0.0661, "step": 763400 }, { "epoch": 7.51, "grad_norm": 6.085624694824219, "learning_rate": 1.2609302233211198e-06, "loss": 0.0177, "step": 763425 }, { "epoch": 7.51, "grad_norm": 2.2996816635131836, "learning_rate": 1.2608061008668713e-06, "loss": 0.0499, "step": 763450 }, { "epoch": 7.51, "grad_norm": 1.7084689140319824, "learning_rate": 1.260681978412623e-06, "loss": 0.023, "step": 763475 }, { "epoch": 7.51, "grad_norm": 3.376237630844116, "learning_rate": 1.2605578559583745e-06, "loss": 0.0677, "step": 763500 }, { "epoch": 7.51, "grad_norm": 0.3724709451198578, "learning_rate": 1.2604337335041257e-06, "loss": 0.0277, "step": 763525 }, { "epoch": 7.51, "grad_norm": 3.1100635528564453, "learning_rate": 1.2603096110498774e-06, "loss": 0.0756, "step": 763550 }, { "epoch": 7.51, "grad_norm": 4.6819915771484375, "learning_rate": 1.260185488595629e-06, "loss": 0.0233, "step": 763575 }, { "epoch": 7.51, "grad_norm": 4.944086074829102, "learning_rate": 1.2600613661413804e-06, "loss": 0.0458, "step": 763600 }, { "epoch": 7.51, "grad_norm": 0.08813393115997314, "learning_rate": 1.259937243687132e-06, "loss": 0.026, "step": 763625 }, { "epoch": 7.51, "grad_norm": 3.0728282928466797, "learning_rate": 1.2598131212328837e-06, "loss": 0.0583, "step": 763650 }, { "epoch": 7.51, "grad_norm": 14.751315116882324, "learning_rate": 1.2596889987786351e-06, "loss": 0.0318, "step": 763675 }, { "epoch": 7.51, "grad_norm": 3.6745309829711914, "learning_rate": 1.2595648763243868e-06, "loss": 0.0387, "step": 763700 }, { "epoch": 7.51, "grad_norm": 0.6205117106437683, "learning_rate": 1.2594407538701382e-06, "loss": 0.0161, "step": 763725 }, { "epoch": 7.51, "grad_norm": 2.770939588546753, "learning_rate": 1.2593166314158898e-06, "loss": 0.062, "step": 763750 }, { "epoch": 7.51, "grad_norm": 0.9569296836853027, "learning_rate": 1.2591925089616415e-06, "loss": 0.0349, "step": 763775 }, { "epoch": 7.51, "grad_norm": 2.185429811477661, "learning_rate": 1.2590683865073927e-06, "loss": 0.0509, "step": 763800 }, { "epoch": 7.51, "grad_norm": 13.737534523010254, "learning_rate": 1.2589442640531443e-06, "loss": 0.0166, "step": 763825 }, { "epoch": 7.51, "grad_norm": 1.7472928762435913, "learning_rate": 1.258820141598896e-06, "loss": 0.0595, "step": 763850 }, { "epoch": 7.51, "grad_norm": 12.310335159301758, "learning_rate": 1.2586960191446474e-06, "loss": 0.0342, "step": 763875 }, { "epoch": 7.51, "grad_norm": 2.4959490299224854, "learning_rate": 1.258571896690399e-06, "loss": 0.071, "step": 763900 }, { "epoch": 7.51, "grad_norm": 2.460909128189087, "learning_rate": 1.2584477742361506e-06, "loss": 0.009, "step": 763925 }, { "epoch": 7.51, "grad_norm": 4.277076244354248, "learning_rate": 1.258323651781902e-06, "loss": 0.0767, "step": 763950 }, { "epoch": 7.51, "grad_norm": 5.708071231842041, "learning_rate": 1.2581995293276537e-06, "loss": 0.0191, "step": 763975 }, { "epoch": 7.51, "grad_norm": 3.7817907333374023, "learning_rate": 1.2580754068734053e-06, "loss": 0.071, "step": 764000 }, { "epoch": 7.51, "grad_norm": 4.365590572357178, "learning_rate": 1.2579512844191565e-06, "loss": 0.0154, "step": 764025 }, { "epoch": 7.51, "grad_norm": 3.1999893188476562, "learning_rate": 1.2578271619649082e-06, "loss": 0.0602, "step": 764050 }, { "epoch": 7.51, "grad_norm": 0.0860685408115387, "learning_rate": 1.2577030395106598e-06, "loss": 0.0117, "step": 764075 }, { "epoch": 7.51, "grad_norm": 3.1343953609466553, "learning_rate": 1.2575789170564112e-06, "loss": 0.0502, "step": 764100 }, { "epoch": 7.51, "grad_norm": 1.147229790687561, "learning_rate": 1.2574547946021629e-06, "loss": 0.0163, "step": 764125 }, { "epoch": 7.51, "grad_norm": 3.631112575531006, "learning_rate": 1.2573306721479143e-06, "loss": 0.0721, "step": 764150 }, { "epoch": 7.51, "grad_norm": 7.960690021514893, "learning_rate": 1.257206549693666e-06, "loss": 0.0361, "step": 764175 }, { "epoch": 7.51, "grad_norm": 2.973668336868286, "learning_rate": 1.2570824272394176e-06, "loss": 0.0551, "step": 764200 }, { "epoch": 7.51, "grad_norm": 11.767520904541016, "learning_rate": 1.2569583047851688e-06, "loss": 0.0154, "step": 764225 }, { "epoch": 7.51, "grad_norm": 5.087958812713623, "learning_rate": 1.2568341823309204e-06, "loss": 0.0576, "step": 764250 }, { "epoch": 7.51, "grad_norm": 9.096543312072754, "learning_rate": 1.256710059876672e-06, "loss": 0.0286, "step": 764275 }, { "epoch": 7.51, "grad_norm": 3.1696739196777344, "learning_rate": 1.2565859374224235e-06, "loss": 0.0624, "step": 764300 }, { "epoch": 7.51, "grad_norm": 0.6031559705734253, "learning_rate": 1.256461814968175e-06, "loss": 0.0249, "step": 764325 }, { "epoch": 7.52, "grad_norm": 3.8149213790893555, "learning_rate": 1.2563376925139267e-06, "loss": 0.0421, "step": 764350 }, { "epoch": 7.52, "grad_norm": 0.3126656711101532, "learning_rate": 1.2562135700596782e-06, "loss": 0.0222, "step": 764375 }, { "epoch": 7.52, "grad_norm": 3.4754998683929443, "learning_rate": 1.2560894476054298e-06, "loss": 0.0637, "step": 764400 }, { "epoch": 7.52, "grad_norm": 10.932624816894531, "learning_rate": 1.2559653251511814e-06, "loss": 0.0168, "step": 764425 }, { "epoch": 7.52, "grad_norm": 4.0034966468811035, "learning_rate": 1.2558412026969326e-06, "loss": 0.0936, "step": 764450 }, { "epoch": 7.52, "grad_norm": 1.0117526054382324, "learning_rate": 1.2557170802426843e-06, "loss": 0.0202, "step": 764475 }, { "epoch": 7.52, "grad_norm": 3.0157253742218018, "learning_rate": 1.255592957788436e-06, "loss": 0.0683, "step": 764500 }, { "epoch": 7.52, "grad_norm": 0.4158603847026825, "learning_rate": 1.2554688353341873e-06, "loss": 0.0205, "step": 764525 }, { "epoch": 7.52, "grad_norm": 2.7345099449157715, "learning_rate": 1.255344712879939e-06, "loss": 0.0389, "step": 764550 }, { "epoch": 7.52, "grad_norm": 2.724853038787842, "learning_rate": 1.2552205904256906e-06, "loss": 0.0272, "step": 764575 }, { "epoch": 7.52, "grad_norm": 2.559288501739502, "learning_rate": 1.255096467971442e-06, "loss": 0.0697, "step": 764600 }, { "epoch": 7.52, "grad_norm": 5.553203582763672, "learning_rate": 1.2549723455171937e-06, "loss": 0.0118, "step": 764625 }, { "epoch": 7.52, "grad_norm": 2.83270263671875, "learning_rate": 1.2548482230629449e-06, "loss": 0.0545, "step": 764650 }, { "epoch": 7.52, "grad_norm": 0.02716943621635437, "learning_rate": 1.2547241006086965e-06, "loss": 0.0299, "step": 764675 }, { "epoch": 7.52, "grad_norm": 3.926054000854492, "learning_rate": 1.2545999781544481e-06, "loss": 0.0555, "step": 764700 }, { "epoch": 7.52, "grad_norm": 11.16059684753418, "learning_rate": 1.2544758557001996e-06, "loss": 0.0121, "step": 764725 }, { "epoch": 7.52, "grad_norm": 7.813675403594971, "learning_rate": 1.2543517332459512e-06, "loss": 0.0565, "step": 764750 }, { "epoch": 7.52, "grad_norm": 1.3690301179885864, "learning_rate": 1.2542276107917028e-06, "loss": 0.0204, "step": 764775 }, { "epoch": 7.52, "grad_norm": 3.6471080780029297, "learning_rate": 1.2541034883374543e-06, "loss": 0.0749, "step": 764800 }, { "epoch": 7.52, "grad_norm": 7.066189765930176, "learning_rate": 1.253979365883206e-06, "loss": 0.0223, "step": 764825 }, { "epoch": 7.52, "grad_norm": 3.801754951477051, "learning_rate": 1.2538602083271273e-06, "loss": 0.036, "step": 764850 }, { "epoch": 7.52, "grad_norm": 2.5732202529907227, "learning_rate": 1.253736085872879e-06, "loss": 0.0175, "step": 764875 }, { "epoch": 7.52, "grad_norm": 4.479698657989502, "learning_rate": 1.2536119634186302e-06, "loss": 0.0529, "step": 764900 }, { "epoch": 7.52, "grad_norm": 0.44592902064323425, "learning_rate": 1.2534878409643818e-06, "loss": 0.018, "step": 764925 }, { "epoch": 7.52, "grad_norm": 5.182014465332031, "learning_rate": 1.2533637185101335e-06, "loss": 0.0647, "step": 764950 }, { "epoch": 7.52, "grad_norm": 0.1430581659078598, "learning_rate": 1.2532395960558849e-06, "loss": 0.0304, "step": 764975 }, { "epoch": 7.52, "grad_norm": 3.6999568939208984, "learning_rate": 1.2531154736016365e-06, "loss": 0.0479, "step": 765000 }, { "epoch": 7.52, "grad_norm": 10.997979164123535, "learning_rate": 1.2529913511473882e-06, "loss": 0.0146, "step": 765025 }, { "epoch": 7.52, "grad_norm": 1.9131748676300049, "learning_rate": 1.2528672286931396e-06, "loss": 0.0584, "step": 765050 }, { "epoch": 7.52, "grad_norm": 4.403040409088135, "learning_rate": 1.2527431062388912e-06, "loss": 0.0222, "step": 765075 }, { "epoch": 7.52, "grad_norm": 3.017259120941162, "learning_rate": 1.2526189837846428e-06, "loss": 0.065, "step": 765100 }, { "epoch": 7.52, "grad_norm": 0.09908262640237808, "learning_rate": 1.252494861330394e-06, "loss": 0.0167, "step": 765125 }, { "epoch": 7.52, "grad_norm": 4.175155162811279, "learning_rate": 1.2523707388761457e-06, "loss": 0.077, "step": 765150 }, { "epoch": 7.52, "grad_norm": 5.867783546447754, "learning_rate": 1.2522466164218971e-06, "loss": 0.0191, "step": 765175 }, { "epoch": 7.52, "grad_norm": 3.1820991039276123, "learning_rate": 1.2521224939676488e-06, "loss": 0.0662, "step": 765200 }, { "epoch": 7.52, "grad_norm": 1.7236590385437012, "learning_rate": 1.2519983715134004e-06, "loss": 0.0177, "step": 765225 }, { "epoch": 7.52, "grad_norm": 4.180325508117676, "learning_rate": 1.2518742490591518e-06, "loss": 0.0406, "step": 765250 }, { "epoch": 7.52, "grad_norm": 0.24880996346473694, "learning_rate": 1.2517501266049034e-06, "loss": 0.0145, "step": 765275 }, { "epoch": 7.52, "grad_norm": 3.606372833251953, "learning_rate": 1.251626004150655e-06, "loss": 0.0742, "step": 765300 }, { "epoch": 7.52, "grad_norm": 3.3569934368133545, "learning_rate": 1.2515018816964065e-06, "loss": 0.0201, "step": 765325 }, { "epoch": 7.53, "grad_norm": 5.838570594787598, "learning_rate": 1.2513777592421581e-06, "loss": 0.0585, "step": 765350 }, { "epoch": 7.53, "grad_norm": 2.8884260654449463, "learning_rate": 1.2512536367879098e-06, "loss": 0.0234, "step": 765375 }, { "epoch": 7.53, "grad_norm": 3.6563000679016113, "learning_rate": 1.251129514333661e-06, "loss": 0.042, "step": 765400 }, { "epoch": 7.53, "grad_norm": 9.154345512390137, "learning_rate": 1.2510053918794126e-06, "loss": 0.0137, "step": 765425 }, { "epoch": 7.53, "grad_norm": 4.138910293579102, "learning_rate": 1.2508812694251643e-06, "loss": 0.0546, "step": 765450 }, { "epoch": 7.53, "grad_norm": 6.925927639007568, "learning_rate": 1.2507571469709157e-06, "loss": 0.0142, "step": 765475 }, { "epoch": 7.53, "grad_norm": 2.354942798614502, "learning_rate": 1.2506330245166673e-06, "loss": 0.0518, "step": 765500 }, { "epoch": 7.53, "grad_norm": 1.9542820453643799, "learning_rate": 1.250508902062419e-06, "loss": 0.0283, "step": 765525 }, { "epoch": 7.53, "grad_norm": 3.7172768115997314, "learning_rate": 1.2503847796081704e-06, "loss": 0.0601, "step": 765550 }, { "epoch": 7.53, "grad_norm": 13.10283088684082, "learning_rate": 1.250260657153922e-06, "loss": 0.0194, "step": 765575 }, { "epoch": 7.53, "grad_norm": 2.770968437194824, "learning_rate": 1.2501365346996732e-06, "loss": 0.0584, "step": 765600 }, { "epoch": 7.53, "grad_norm": 1.0604325532913208, "learning_rate": 1.2500124122454249e-06, "loss": 0.0155, "step": 765625 }, { "epoch": 7.53, "grad_norm": 2.611267328262329, "learning_rate": 1.2498882897911765e-06, "loss": 0.0543, "step": 765650 }, { "epoch": 7.53, "grad_norm": 3.614518404006958, "learning_rate": 1.2497641673369281e-06, "loss": 0.0331, "step": 765675 }, { "epoch": 7.53, "grad_norm": 7.354953765869141, "learning_rate": 1.2496400448826795e-06, "loss": 0.0884, "step": 765700 }, { "epoch": 7.53, "grad_norm": 2.4235849380493164, "learning_rate": 1.249515922428431e-06, "loss": 0.0115, "step": 765725 }, { "epoch": 7.53, "grad_norm": 2.0972952842712402, "learning_rate": 1.2493917999741826e-06, "loss": 0.0628, "step": 765750 }, { "epoch": 7.53, "grad_norm": 0.8355620503425598, "learning_rate": 1.2492676775199342e-06, "loss": 0.0119, "step": 765775 }, { "epoch": 7.53, "grad_norm": 3.2070298194885254, "learning_rate": 1.2491435550656857e-06, "loss": 0.0627, "step": 765800 }, { "epoch": 7.53, "grad_norm": 0.4112853705883026, "learning_rate": 1.2490194326114373e-06, "loss": 0.0271, "step": 765825 }, { "epoch": 7.53, "grad_norm": 2.7593815326690674, "learning_rate": 1.2488953101571887e-06, "loss": 0.0446, "step": 765850 }, { "epoch": 7.53, "grad_norm": 2.2300326824188232, "learning_rate": 1.2487711877029404e-06, "loss": 0.0144, "step": 765875 }, { "epoch": 7.53, "grad_norm": 4.639429569244385, "learning_rate": 1.248647065248692e-06, "loss": 0.0679, "step": 765900 }, { "epoch": 7.53, "grad_norm": 0.7298957705497742, "learning_rate": 1.2485229427944434e-06, "loss": 0.0347, "step": 765925 }, { "epoch": 7.53, "grad_norm": 2.456355094909668, "learning_rate": 1.2483988203401948e-06, "loss": 0.0586, "step": 765950 }, { "epoch": 7.53, "grad_norm": 0.0806313157081604, "learning_rate": 1.2482746978859465e-06, "loss": 0.0101, "step": 765975 }, { "epoch": 7.53, "grad_norm": 3.642526865005493, "learning_rate": 1.2481505754316981e-06, "loss": 0.0437, "step": 766000 }, { "epoch": 7.53, "grad_norm": 2.7142558097839355, "learning_rate": 1.2480264529774495e-06, "loss": 0.0177, "step": 766025 }, { "epoch": 7.53, "grad_norm": 2.8243064880371094, "learning_rate": 1.247902330523201e-06, "loss": 0.0522, "step": 766050 }, { "epoch": 7.53, "grad_norm": 6.370485305786133, "learning_rate": 1.2477782080689526e-06, "loss": 0.0206, "step": 766075 }, { "epoch": 7.53, "grad_norm": 2.2946112155914307, "learning_rate": 1.2476540856147042e-06, "loss": 0.0422, "step": 766100 }, { "epoch": 7.53, "grad_norm": 3.725273609161377, "learning_rate": 1.2475299631604556e-06, "loss": 0.0175, "step": 766125 }, { "epoch": 7.53, "grad_norm": 4.734489440917969, "learning_rate": 1.247405840706207e-06, "loss": 0.0602, "step": 766150 }, { "epoch": 7.53, "grad_norm": 2.4889461994171143, "learning_rate": 1.2472817182519587e-06, "loss": 0.0202, "step": 766175 }, { "epoch": 7.53, "grad_norm": 2.1319615840911865, "learning_rate": 1.2471575957977103e-06, "loss": 0.0637, "step": 766200 }, { "epoch": 7.53, "grad_norm": 1.2442952394485474, "learning_rate": 1.2470334733434618e-06, "loss": 0.0154, "step": 766225 }, { "epoch": 7.53, "grad_norm": 2.689046859741211, "learning_rate": 1.2469093508892134e-06, "loss": 0.0539, "step": 766250 }, { "epoch": 7.53, "grad_norm": 0.4946264624595642, "learning_rate": 1.2467852284349648e-06, "loss": 0.016, "step": 766275 }, { "epoch": 7.53, "grad_norm": 4.206204891204834, "learning_rate": 1.2466611059807165e-06, "loss": 0.0592, "step": 766300 }, { "epoch": 7.53, "grad_norm": 6.221276760101318, "learning_rate": 1.246536983526468e-06, "loss": 0.0315, "step": 766325 }, { "epoch": 7.53, "grad_norm": 2.7913196086883545, "learning_rate": 1.2464128610722195e-06, "loss": 0.0653, "step": 766350 }, { "epoch": 7.54, "grad_norm": 0.620822012424469, "learning_rate": 1.246288738617971e-06, "loss": 0.019, "step": 766375 }, { "epoch": 7.54, "grad_norm": 3.304929256439209, "learning_rate": 1.2461646161637226e-06, "loss": 0.0847, "step": 766400 }, { "epoch": 7.54, "grad_norm": 0.3546832501888275, "learning_rate": 1.2460404937094742e-06, "loss": 0.0234, "step": 766425 }, { "epoch": 7.54, "grad_norm": 2.634141445159912, "learning_rate": 1.2459163712552256e-06, "loss": 0.0619, "step": 766450 }, { "epoch": 7.54, "grad_norm": 2.6615471839904785, "learning_rate": 1.245792248800977e-06, "loss": 0.0257, "step": 766475 }, { "epoch": 7.54, "grad_norm": 4.132228374481201, "learning_rate": 1.2456681263467287e-06, "loss": 0.0703, "step": 766500 }, { "epoch": 7.54, "grad_norm": 0.46363815665245056, "learning_rate": 1.2455440038924803e-06, "loss": 0.0228, "step": 766525 }, { "epoch": 7.54, "grad_norm": 4.600427150726318, "learning_rate": 1.2454198814382317e-06, "loss": 0.0562, "step": 766550 }, { "epoch": 7.54, "grad_norm": 6.358227252960205, "learning_rate": 1.2452957589839832e-06, "loss": 0.0273, "step": 766575 }, { "epoch": 7.54, "grad_norm": 2.7104885578155518, "learning_rate": 1.2451716365297348e-06, "loss": 0.0494, "step": 766600 }, { "epoch": 7.54, "grad_norm": 12.425772666931152, "learning_rate": 1.2450475140754864e-06, "loss": 0.0163, "step": 766625 }, { "epoch": 7.54, "grad_norm": 2.0454187393188477, "learning_rate": 1.2449233916212379e-06, "loss": 0.0863, "step": 766650 }, { "epoch": 7.54, "grad_norm": 0.18493284285068512, "learning_rate": 1.2447992691669895e-06, "loss": 0.0189, "step": 766675 }, { "epoch": 7.54, "grad_norm": 3.2615766525268555, "learning_rate": 1.2446751467127411e-06, "loss": 0.0707, "step": 766700 }, { "epoch": 7.54, "grad_norm": 0.2406970113515854, "learning_rate": 1.2445510242584926e-06, "loss": 0.0207, "step": 766725 }, { "epoch": 7.54, "grad_norm": 3.965693712234497, "learning_rate": 1.2444269018042442e-06, "loss": 0.0295, "step": 766750 }, { "epoch": 7.54, "grad_norm": 0.2548573911190033, "learning_rate": 1.2443027793499956e-06, "loss": 0.0241, "step": 766775 }, { "epoch": 7.54, "grad_norm": 2.58512806892395, "learning_rate": 1.2441786568957473e-06, "loss": 0.046, "step": 766800 }, { "epoch": 7.54, "grad_norm": 2.113947629928589, "learning_rate": 1.2440545344414987e-06, "loss": 0.0102, "step": 766825 }, { "epoch": 7.54, "grad_norm": 3.3402366638183594, "learning_rate": 1.2439304119872503e-06, "loss": 0.0555, "step": 766850 }, { "epoch": 7.54, "grad_norm": 11.829389572143555, "learning_rate": 1.2438062895330017e-06, "loss": 0.0168, "step": 766875 }, { "epoch": 7.54, "grad_norm": 3.636143207550049, "learning_rate": 1.2436821670787534e-06, "loss": 0.0484, "step": 766900 }, { "epoch": 7.54, "grad_norm": 3.002575397491455, "learning_rate": 1.243558044624505e-06, "loss": 0.026, "step": 766925 }, { "epoch": 7.54, "grad_norm": 3.0101892948150635, "learning_rate": 1.2434339221702564e-06, "loss": 0.068, "step": 766950 }, { "epoch": 7.54, "grad_norm": 1.4156553745269775, "learning_rate": 1.2433097997160078e-06, "loss": 0.0249, "step": 766975 }, { "epoch": 7.54, "grad_norm": 2.53562593460083, "learning_rate": 1.2431856772617595e-06, "loss": 0.0413, "step": 767000 }, { "epoch": 7.54, "grad_norm": 0.7470487356185913, "learning_rate": 1.2430615548075111e-06, "loss": 0.0186, "step": 767025 }, { "epoch": 7.54, "grad_norm": 5.604240894317627, "learning_rate": 1.2429374323532625e-06, "loss": 0.0727, "step": 767050 }, { "epoch": 7.54, "grad_norm": 8.207830429077148, "learning_rate": 1.242813309899014e-06, "loss": 0.0129, "step": 767075 }, { "epoch": 7.54, "grad_norm": 4.036797046661377, "learning_rate": 1.2426891874447656e-06, "loss": 0.049, "step": 767100 }, { "epoch": 7.54, "grad_norm": 1.6324236392974854, "learning_rate": 1.2425650649905172e-06, "loss": 0.0192, "step": 767125 }, { "epoch": 7.54, "grad_norm": 2.595224618911743, "learning_rate": 1.2424409425362687e-06, "loss": 0.0639, "step": 767150 }, { "epoch": 7.54, "grad_norm": 5.884554386138916, "learning_rate": 1.2423168200820203e-06, "loss": 0.0221, "step": 767175 }, { "epoch": 7.54, "grad_norm": 3.213865041732788, "learning_rate": 1.2421926976277717e-06, "loss": 0.058, "step": 767200 }, { "epoch": 7.54, "grad_norm": 0.4751433730125427, "learning_rate": 1.2420685751735234e-06, "loss": 0.031, "step": 767225 }, { "epoch": 7.54, "grad_norm": 2.562743902206421, "learning_rate": 1.2419444527192748e-06, "loss": 0.0476, "step": 767250 }, { "epoch": 7.54, "grad_norm": 4.7204060554504395, "learning_rate": 1.2418203302650264e-06, "loss": 0.0104, "step": 767275 }, { "epoch": 7.54, "grad_norm": 4.220816612243652, "learning_rate": 1.2416962078107778e-06, "loss": 0.065, "step": 767300 }, { "epoch": 7.54, "grad_norm": 2.7512848377227783, "learning_rate": 1.2415720853565295e-06, "loss": 0.0181, "step": 767325 }, { "epoch": 7.54, "grad_norm": 4.41118049621582, "learning_rate": 1.241447962902281e-06, "loss": 0.0509, "step": 767350 }, { "epoch": 7.54, "grad_norm": 4.395380020141602, "learning_rate": 1.2413238404480325e-06, "loss": 0.0223, "step": 767375 }, { "epoch": 7.55, "grad_norm": 3.0066821575164795, "learning_rate": 1.241199717993784e-06, "loss": 0.0736, "step": 767400 }, { "epoch": 7.55, "grad_norm": 0.06250201910734177, "learning_rate": 1.2410755955395356e-06, "loss": 0.0312, "step": 767425 }, { "epoch": 7.55, "grad_norm": 2.8615176677703857, "learning_rate": 1.2409514730852872e-06, "loss": 0.061, "step": 767450 }, { "epoch": 7.55, "grad_norm": 0.025060871616005898, "learning_rate": 1.2408273506310386e-06, "loss": 0.0082, "step": 767475 }, { "epoch": 7.55, "grad_norm": 3.006528854370117, "learning_rate": 1.24070322817679e-06, "loss": 0.075, "step": 767500 }, { "epoch": 7.55, "grad_norm": 0.7158713340759277, "learning_rate": 1.2405791057225417e-06, "loss": 0.0185, "step": 767525 }, { "epoch": 7.55, "grad_norm": 3.2257988452911377, "learning_rate": 1.2404549832682933e-06, "loss": 0.0652, "step": 767550 }, { "epoch": 7.55, "grad_norm": 2.034463882446289, "learning_rate": 1.2403308608140448e-06, "loss": 0.035, "step": 767575 }, { "epoch": 7.55, "grad_norm": 2.273806571960449, "learning_rate": 1.2402067383597964e-06, "loss": 0.0435, "step": 767600 }, { "epoch": 7.55, "grad_norm": 5.489996910095215, "learning_rate": 1.2400826159055478e-06, "loss": 0.0194, "step": 767625 }, { "epoch": 7.55, "grad_norm": 3.519664764404297, "learning_rate": 1.2399584934512995e-06, "loss": 0.0547, "step": 767650 }, { "epoch": 7.55, "grad_norm": 4.061778545379639, "learning_rate": 1.2398343709970509e-06, "loss": 0.0184, "step": 767675 }, { "epoch": 7.55, "grad_norm": 2.9562020301818848, "learning_rate": 1.2397102485428025e-06, "loss": 0.0367, "step": 767700 }, { "epoch": 7.55, "grad_norm": 0.08467260003089905, "learning_rate": 1.239586126088554e-06, "loss": 0.0132, "step": 767725 }, { "epoch": 7.55, "grad_norm": 4.201779842376709, "learning_rate": 1.2394620036343056e-06, "loss": 0.0598, "step": 767750 }, { "epoch": 7.55, "grad_norm": 7.492549419403076, "learning_rate": 1.2393378811800572e-06, "loss": 0.0175, "step": 767775 }, { "epoch": 7.55, "grad_norm": 6.251669883728027, "learning_rate": 1.2392137587258086e-06, "loss": 0.0553, "step": 767800 }, { "epoch": 7.55, "grad_norm": 7.211582183837891, "learning_rate": 1.23908963627156e-06, "loss": 0.0081, "step": 767825 }, { "epoch": 7.55, "grad_norm": 3.783348321914673, "learning_rate": 1.2389655138173117e-06, "loss": 0.0603, "step": 767850 }, { "epoch": 7.55, "grad_norm": 0.04088772460818291, "learning_rate": 1.2388413913630633e-06, "loss": 0.0185, "step": 767875 }, { "epoch": 7.55, "grad_norm": 4.4658732414245605, "learning_rate": 1.2387172689088147e-06, "loss": 0.0705, "step": 767900 }, { "epoch": 7.55, "grad_norm": 1.5874711275100708, "learning_rate": 1.2385931464545662e-06, "loss": 0.0158, "step": 767925 }, { "epoch": 7.55, "grad_norm": 3.1930861473083496, "learning_rate": 1.2384690240003178e-06, "loss": 0.0928, "step": 767950 }, { "epoch": 7.55, "grad_norm": 1.2493319511413574, "learning_rate": 1.2383449015460694e-06, "loss": 0.0197, "step": 767975 }, { "epoch": 7.55, "grad_norm": 2.7617228031158447, "learning_rate": 1.2382207790918209e-06, "loss": 0.0389, "step": 768000 }, { "epoch": 7.55, "grad_norm": 2.101294755935669, "learning_rate": 1.2380966566375725e-06, "loss": 0.0184, "step": 768025 }, { "epoch": 7.55, "grad_norm": 3.3580193519592285, "learning_rate": 1.2379725341833241e-06, "loss": 0.0476, "step": 768050 }, { "epoch": 7.55, "grad_norm": 0.2631143629550934, "learning_rate": 1.2378484117290756e-06, "loss": 0.0052, "step": 768075 }, { "epoch": 7.55, "grad_norm": 1.8637892007827759, "learning_rate": 1.237724289274827e-06, "loss": 0.0663, "step": 768100 }, { "epoch": 7.55, "grad_norm": 3.5774004459381104, "learning_rate": 1.2376001668205786e-06, "loss": 0.02, "step": 768125 }, { "epoch": 7.55, "grad_norm": 2.972522258758545, "learning_rate": 1.2374760443663302e-06, "loss": 0.0534, "step": 768150 }, { "epoch": 7.55, "grad_norm": 1.0040596723556519, "learning_rate": 1.2373519219120817e-06, "loss": 0.0246, "step": 768175 }, { "epoch": 7.55, "grad_norm": 4.438982009887695, "learning_rate": 1.2372277994578333e-06, "loss": 0.0376, "step": 768200 }, { "epoch": 7.55, "grad_norm": 0.19855929911136627, "learning_rate": 1.2371036770035847e-06, "loss": 0.0154, "step": 768225 }, { "epoch": 7.55, "grad_norm": 3.9017367362976074, "learning_rate": 1.2369795545493364e-06, "loss": 0.0606, "step": 768250 }, { "epoch": 7.55, "grad_norm": 6.491658687591553, "learning_rate": 1.2368554320950878e-06, "loss": 0.0216, "step": 768275 }, { "epoch": 7.55, "grad_norm": 4.108695030212402, "learning_rate": 1.2367313096408394e-06, "loss": 0.0554, "step": 768300 }, { "epoch": 7.55, "grad_norm": 3.6665947437286377, "learning_rate": 1.2366071871865908e-06, "loss": 0.0236, "step": 768325 }, { "epoch": 7.55, "grad_norm": 2.85829758644104, "learning_rate": 1.2364830647323425e-06, "loss": 0.0675, "step": 768350 }, { "epoch": 7.55, "grad_norm": 8.320930480957031, "learning_rate": 1.2363589422780941e-06, "loss": 0.0216, "step": 768375 }, { "epoch": 7.56, "grad_norm": 3.127695322036743, "learning_rate": 1.2362348198238455e-06, "loss": 0.0607, "step": 768400 }, { "epoch": 7.56, "grad_norm": 8.14572525024414, "learning_rate": 1.236110697369597e-06, "loss": 0.0168, "step": 768425 }, { "epoch": 7.56, "grad_norm": 4.296749114990234, "learning_rate": 1.2359865749153486e-06, "loss": 0.057, "step": 768450 }, { "epoch": 7.56, "grad_norm": 5.084029197692871, "learning_rate": 1.2358624524611002e-06, "loss": 0.0231, "step": 768475 }, { "epoch": 7.56, "grad_norm": 3.148286819458008, "learning_rate": 1.2357383300068517e-06, "loss": 0.0518, "step": 768500 }, { "epoch": 7.56, "grad_norm": 16.91378402709961, "learning_rate": 1.235614207552603e-06, "loss": 0.0247, "step": 768525 }, { "epoch": 7.56, "grad_norm": 2.1328542232513428, "learning_rate": 1.2354900850983547e-06, "loss": 0.0539, "step": 768550 }, { "epoch": 7.56, "grad_norm": 0.2720291018486023, "learning_rate": 1.2353659626441063e-06, "loss": 0.0162, "step": 768575 }, { "epoch": 7.56, "grad_norm": 1.4603033065795898, "learning_rate": 1.2352418401898578e-06, "loss": 0.0424, "step": 768600 }, { "epoch": 7.56, "grad_norm": 0.2750827670097351, "learning_rate": 1.2351177177356094e-06, "loss": 0.0105, "step": 768625 }, { "epoch": 7.56, "grad_norm": 3.070389986038208, "learning_rate": 1.2349935952813608e-06, "loss": 0.0561, "step": 768650 }, { "epoch": 7.56, "grad_norm": 0.24712195992469788, "learning_rate": 1.2348694728271125e-06, "loss": 0.0132, "step": 768675 }, { "epoch": 7.56, "grad_norm": 4.350121021270752, "learning_rate": 1.2347453503728639e-06, "loss": 0.0582, "step": 768700 }, { "epoch": 7.56, "grad_norm": 1.185802698135376, "learning_rate": 1.2346212279186155e-06, "loss": 0.0246, "step": 768725 }, { "epoch": 7.56, "grad_norm": 4.539974212646484, "learning_rate": 1.234497105464367e-06, "loss": 0.0606, "step": 768750 }, { "epoch": 7.56, "grad_norm": 8.99578857421875, "learning_rate": 1.2343729830101186e-06, "loss": 0.0266, "step": 768775 }, { "epoch": 7.56, "grad_norm": 3.177593946456909, "learning_rate": 1.2342488605558702e-06, "loss": 0.0704, "step": 768800 }, { "epoch": 7.56, "grad_norm": 0.03182937577366829, "learning_rate": 1.2341247381016216e-06, "loss": 0.0303, "step": 768825 }, { "epoch": 7.56, "grad_norm": 5.030117034912109, "learning_rate": 1.234005580545543e-06, "loss": 0.0758, "step": 768850 }, { "epoch": 7.56, "grad_norm": 11.276786804199219, "learning_rate": 1.2338814580912945e-06, "loss": 0.0244, "step": 768875 }, { "epoch": 7.56, "grad_norm": 4.306757926940918, "learning_rate": 1.2337573356370461e-06, "loss": 0.0526, "step": 768900 }, { "epoch": 7.56, "grad_norm": 4.512560844421387, "learning_rate": 1.2336332131827978e-06, "loss": 0.0232, "step": 768925 }, { "epoch": 7.56, "grad_norm": 5.425692558288574, "learning_rate": 1.2335090907285492e-06, "loss": 0.0686, "step": 768950 }, { "epoch": 7.56, "grad_norm": 0.016700822860002518, "learning_rate": 1.2333849682743008e-06, "loss": 0.0209, "step": 768975 }, { "epoch": 7.56, "grad_norm": 2.5809497833251953, "learning_rate": 1.2332608458200523e-06, "loss": 0.0334, "step": 769000 }, { "epoch": 7.56, "grad_norm": 19.83787727355957, "learning_rate": 1.233136723365804e-06, "loss": 0.0349, "step": 769025 }, { "epoch": 7.56, "grad_norm": 4.436513900756836, "learning_rate": 1.2330126009115555e-06, "loss": 0.0598, "step": 769050 }, { "epoch": 7.56, "grad_norm": 0.06729784607887268, "learning_rate": 1.232888478457307e-06, "loss": 0.0208, "step": 769075 }, { "epoch": 7.56, "grad_norm": 2.827328681945801, "learning_rate": 1.2327643560030584e-06, "loss": 0.0249, "step": 769100 }, { "epoch": 7.56, "grad_norm": 2.4960014820098877, "learning_rate": 1.23264023354881e-06, "loss": 0.019, "step": 769125 }, { "epoch": 7.56, "grad_norm": 2.0302016735076904, "learning_rate": 1.2325161110945616e-06, "loss": 0.0465, "step": 769150 }, { "epoch": 7.56, "grad_norm": 1.995134711265564, "learning_rate": 1.232391988640313e-06, "loss": 0.021, "step": 769175 }, { "epoch": 7.56, "grad_norm": 5.138845443725586, "learning_rate": 1.2322678661860645e-06, "loss": 0.0631, "step": 769200 }, { "epoch": 7.56, "grad_norm": 2.351501941680908, "learning_rate": 1.2321437437318161e-06, "loss": 0.029, "step": 769225 }, { "epoch": 7.56, "grad_norm": 5.416782379150391, "learning_rate": 1.2320196212775678e-06, "loss": 0.0484, "step": 769250 }, { "epoch": 7.56, "grad_norm": 8.576581954956055, "learning_rate": 1.2318954988233192e-06, "loss": 0.0171, "step": 769275 }, { "epoch": 7.56, "grad_norm": 3.5276358127593994, "learning_rate": 1.2317713763690706e-06, "loss": 0.0647, "step": 769300 }, { "epoch": 7.56, "grad_norm": 33.53401184082031, "learning_rate": 1.2316472539148222e-06, "loss": 0.0177, "step": 769325 }, { "epoch": 7.56, "grad_norm": 2.5906941890716553, "learning_rate": 1.2315231314605739e-06, "loss": 0.0545, "step": 769350 }, { "epoch": 7.56, "grad_norm": 1.0935232639312744, "learning_rate": 1.2313990090063253e-06, "loss": 0.0223, "step": 769375 }, { "epoch": 7.56, "grad_norm": 4.106019496917725, "learning_rate": 1.231274886552077e-06, "loss": 0.0504, "step": 769400 }, { "epoch": 7.57, "grad_norm": 0.1302890181541443, "learning_rate": 1.2311507640978284e-06, "loss": 0.0215, "step": 769425 }, { "epoch": 7.57, "grad_norm": 4.9502081871032715, "learning_rate": 1.23102664164358e-06, "loss": 0.0457, "step": 769450 }, { "epoch": 7.57, "grad_norm": 10.866389274597168, "learning_rate": 1.2309025191893316e-06, "loss": 0.0153, "step": 769475 }, { "epoch": 7.57, "grad_norm": 5.325601100921631, "learning_rate": 1.230778396735083e-06, "loss": 0.0876, "step": 769500 }, { "epoch": 7.57, "grad_norm": 4.409078121185303, "learning_rate": 1.2306542742808347e-06, "loss": 0.0282, "step": 769525 }, { "epoch": 7.57, "grad_norm": 3.0747177600860596, "learning_rate": 1.2305301518265861e-06, "loss": 0.0648, "step": 769550 }, { "epoch": 7.57, "grad_norm": 0.2084532231092453, "learning_rate": 1.2304060293723377e-06, "loss": 0.0117, "step": 769575 }, { "epoch": 7.57, "grad_norm": 3.9863827228546143, "learning_rate": 1.2302819069180892e-06, "loss": 0.0688, "step": 769600 }, { "epoch": 7.57, "grad_norm": 0.16524450480937958, "learning_rate": 1.2301577844638408e-06, "loss": 0.021, "step": 769625 }, { "epoch": 7.57, "grad_norm": 3.1148955821990967, "learning_rate": 1.2300336620095924e-06, "loss": 0.0453, "step": 769650 }, { "epoch": 7.57, "grad_norm": 0.9310137033462524, "learning_rate": 1.2299095395553439e-06, "loss": 0.0184, "step": 769675 }, { "epoch": 7.57, "grad_norm": 5.062459468841553, "learning_rate": 1.2297854171010953e-06, "loss": 0.0782, "step": 769700 }, { "epoch": 7.57, "grad_norm": 5.016522407531738, "learning_rate": 1.229661294646847e-06, "loss": 0.0215, "step": 769725 }, { "epoch": 7.57, "grad_norm": 4.1834821701049805, "learning_rate": 1.2295371721925986e-06, "loss": 0.0964, "step": 769750 }, { "epoch": 7.57, "grad_norm": 4.272343158721924, "learning_rate": 1.22941304973835e-06, "loss": 0.0253, "step": 769775 }, { "epoch": 7.57, "grad_norm": 2.2624943256378174, "learning_rate": 1.2292889272841014e-06, "loss": 0.0637, "step": 769800 }, { "epoch": 7.57, "grad_norm": 0.4862065315246582, "learning_rate": 1.229164804829853e-06, "loss": 0.0154, "step": 769825 }, { "epoch": 7.57, "grad_norm": 2.398101568222046, "learning_rate": 1.2290406823756047e-06, "loss": 0.065, "step": 769850 }, { "epoch": 7.57, "grad_norm": 2.0088894367218018, "learning_rate": 1.228916559921356e-06, "loss": 0.0289, "step": 769875 }, { "epoch": 7.57, "grad_norm": 3.3734548091888428, "learning_rate": 1.2287924374671077e-06, "loss": 0.0436, "step": 769900 }, { "epoch": 7.57, "grad_norm": 9.292278289794922, "learning_rate": 1.2286683150128592e-06, "loss": 0.0128, "step": 769925 }, { "epoch": 7.57, "grad_norm": 3.224436044692993, "learning_rate": 1.2285441925586108e-06, "loss": 0.0273, "step": 769950 }, { "epoch": 7.57, "grad_norm": 0.09193209558725357, "learning_rate": 1.2284200701043622e-06, "loss": 0.033, "step": 769975 }, { "epoch": 7.57, "grad_norm": 3.2208826541900635, "learning_rate": 1.2282959476501138e-06, "loss": 0.0573, "step": 770000 }, { "epoch": 7.57, "grad_norm": 1.2900725603103638, "learning_rate": 1.2281718251958653e-06, "loss": 0.0183, "step": 770025 }, { "epoch": 7.57, "grad_norm": 3.8884661197662354, "learning_rate": 1.228047702741617e-06, "loss": 0.0528, "step": 770050 }, { "epoch": 7.57, "grad_norm": 0.09360143542289734, "learning_rate": 1.2279235802873685e-06, "loss": 0.0254, "step": 770075 }, { "epoch": 7.57, "grad_norm": 2.919433116912842, "learning_rate": 1.22779945783312e-06, "loss": 0.0501, "step": 770100 }, { "epoch": 7.57, "grad_norm": 3.179827928543091, "learning_rate": 1.2276753353788714e-06, "loss": 0.0124, "step": 770125 }, { "epoch": 7.57, "grad_norm": 2.4907994270324707, "learning_rate": 1.227551212924623e-06, "loss": 0.0609, "step": 770150 }, { "epoch": 7.57, "grad_norm": 7.483229160308838, "learning_rate": 1.2274270904703747e-06, "loss": 0.0239, "step": 770175 }, { "epoch": 7.57, "grad_norm": 1.563388466835022, "learning_rate": 1.2273079329142961e-06, "loss": 0.0509, "step": 770200 }, { "epoch": 7.57, "grad_norm": 8.086978912353516, "learning_rate": 1.2271838104600475e-06, "loss": 0.0116, "step": 770225 }, { "epoch": 7.57, "grad_norm": 2.2460672855377197, "learning_rate": 1.2270596880057992e-06, "loss": 0.0768, "step": 770250 }, { "epoch": 7.57, "grad_norm": 0.9476963877677917, "learning_rate": 1.2269355655515506e-06, "loss": 0.0156, "step": 770275 }, { "epoch": 7.57, "grad_norm": 3.3270938396453857, "learning_rate": 1.2268114430973022e-06, "loss": 0.0639, "step": 770300 }, { "epoch": 7.57, "grad_norm": 5.570344924926758, "learning_rate": 1.2266873206430537e-06, "loss": 0.0123, "step": 770325 }, { "epoch": 7.57, "grad_norm": 2.7158963680267334, "learning_rate": 1.2265631981888053e-06, "loss": 0.0583, "step": 770350 }, { "epoch": 7.57, "grad_norm": 11.916387557983398, "learning_rate": 1.2264390757345567e-06, "loss": 0.023, "step": 770375 }, { "epoch": 7.57, "grad_norm": 4.217799186706543, "learning_rate": 1.2263149532803083e-06, "loss": 0.0565, "step": 770400 }, { "epoch": 7.57, "grad_norm": 1.1553692817687988, "learning_rate": 1.22619083082606e-06, "loss": 0.0267, "step": 770425 }, { "epoch": 7.58, "grad_norm": 2.868917226791382, "learning_rate": 1.2260667083718114e-06, "loss": 0.0429, "step": 770450 }, { "epoch": 7.58, "grad_norm": 0.3434557616710663, "learning_rate": 1.2259425859175628e-06, "loss": 0.0188, "step": 770475 }, { "epoch": 7.58, "grad_norm": 3.142296314239502, "learning_rate": 1.2258184634633145e-06, "loss": 0.0761, "step": 770500 }, { "epoch": 7.58, "grad_norm": 5.341429233551025, "learning_rate": 1.225694341009066e-06, "loss": 0.0156, "step": 770525 }, { "epoch": 7.58, "grad_norm": 3.047337532043457, "learning_rate": 1.2255702185548175e-06, "loss": 0.0453, "step": 770550 }, { "epoch": 7.58, "grad_norm": 6.80040168762207, "learning_rate": 1.225446096100569e-06, "loss": 0.0213, "step": 770575 }, { "epoch": 7.58, "grad_norm": 3.8212881088256836, "learning_rate": 1.2253219736463206e-06, "loss": 0.0754, "step": 770600 }, { "epoch": 7.58, "grad_norm": 6.883050441741943, "learning_rate": 1.2251978511920722e-06, "loss": 0.0171, "step": 770625 }, { "epoch": 7.58, "grad_norm": 4.005136966705322, "learning_rate": 1.2250737287378236e-06, "loss": 0.0416, "step": 770650 }, { "epoch": 7.58, "grad_norm": 0.019309833645820618, "learning_rate": 1.2249496062835753e-06, "loss": 0.0181, "step": 770675 }, { "epoch": 7.58, "grad_norm": 3.184645891189575, "learning_rate": 1.2248254838293267e-06, "loss": 0.0416, "step": 770700 }, { "epoch": 7.58, "grad_norm": 3.378464460372925, "learning_rate": 1.2247013613750783e-06, "loss": 0.013, "step": 770725 }, { "epoch": 7.58, "grad_norm": 2.049696922302246, "learning_rate": 1.2245772389208298e-06, "loss": 0.041, "step": 770750 }, { "epoch": 7.58, "grad_norm": 0.2570379972457886, "learning_rate": 1.2244531164665814e-06, "loss": 0.0107, "step": 770775 }, { "epoch": 7.58, "grad_norm": 2.233778953552246, "learning_rate": 1.2243289940123328e-06, "loss": 0.0472, "step": 770800 }, { "epoch": 7.58, "grad_norm": 3.3198630809783936, "learning_rate": 1.2242048715580844e-06, "loss": 0.0156, "step": 770825 }, { "epoch": 7.58, "grad_norm": 3.7369189262390137, "learning_rate": 1.224080749103836e-06, "loss": 0.0638, "step": 770850 }, { "epoch": 7.58, "grad_norm": 1.8578699827194214, "learning_rate": 1.2239566266495875e-06, "loss": 0.0204, "step": 770875 }, { "epoch": 7.58, "grad_norm": 2.9671170711517334, "learning_rate": 1.223832504195339e-06, "loss": 0.0709, "step": 770900 }, { "epoch": 7.58, "grad_norm": 5.533029079437256, "learning_rate": 1.2237083817410906e-06, "loss": 0.0258, "step": 770925 }, { "epoch": 7.58, "grad_norm": 4.720176696777344, "learning_rate": 1.2235842592868422e-06, "loss": 0.055, "step": 770950 }, { "epoch": 7.58, "grad_norm": 1.7615265846252441, "learning_rate": 1.2234601368325936e-06, "loss": 0.0246, "step": 770975 }, { "epoch": 7.58, "grad_norm": 5.768747806549072, "learning_rate": 1.2233360143783453e-06, "loss": 0.0547, "step": 771000 }, { "epoch": 7.58, "grad_norm": 0.3176534175872803, "learning_rate": 1.2232118919240969e-06, "loss": 0.0306, "step": 771025 }, { "epoch": 7.58, "grad_norm": 3.623544931411743, "learning_rate": 1.2230877694698483e-06, "loss": 0.0577, "step": 771050 }, { "epoch": 7.58, "grad_norm": 12.192919731140137, "learning_rate": 1.2229636470155997e-06, "loss": 0.0209, "step": 771075 }, { "epoch": 7.58, "grad_norm": 3.362708806991577, "learning_rate": 1.2228395245613514e-06, "loss": 0.0458, "step": 771100 }, { "epoch": 7.58, "grad_norm": 0.5420225858688354, "learning_rate": 1.222715402107103e-06, "loss": 0.0225, "step": 771125 }, { "epoch": 7.58, "grad_norm": 4.801219463348389, "learning_rate": 1.2225912796528544e-06, "loss": 0.0518, "step": 771150 }, { "epoch": 7.58, "grad_norm": 0.0437735840678215, "learning_rate": 1.2224671571986059e-06, "loss": 0.0108, "step": 771175 }, { "epoch": 7.58, "grad_norm": 3.321685552597046, "learning_rate": 1.2223430347443575e-06, "loss": 0.0858, "step": 771200 }, { "epoch": 7.58, "grad_norm": 0.036275465041399, "learning_rate": 1.2222189122901091e-06, "loss": 0.0215, "step": 771225 }, { "epoch": 7.58, "grad_norm": 4.765471935272217, "learning_rate": 1.2220947898358605e-06, "loss": 0.043, "step": 771250 }, { "epoch": 7.58, "grad_norm": 1.2430129051208496, "learning_rate": 1.2219706673816122e-06, "loss": 0.0331, "step": 771275 }, { "epoch": 7.58, "grad_norm": 4.170726299285889, "learning_rate": 1.2218465449273636e-06, "loss": 0.0522, "step": 771300 }, { "epoch": 7.58, "grad_norm": 3.7819674015045166, "learning_rate": 1.2217224224731152e-06, "loss": 0.0216, "step": 771325 }, { "epoch": 7.58, "grad_norm": 2.780799388885498, "learning_rate": 1.2215983000188667e-06, "loss": 0.0606, "step": 771350 }, { "epoch": 7.58, "grad_norm": 1.6857352256774902, "learning_rate": 1.2214741775646183e-06, "loss": 0.0143, "step": 771375 }, { "epoch": 7.58, "grad_norm": 3.3476414680480957, "learning_rate": 1.2213500551103697e-06, "loss": 0.0428, "step": 771400 }, { "epoch": 7.58, "grad_norm": 11.346930503845215, "learning_rate": 1.2212259326561214e-06, "loss": 0.0147, "step": 771425 }, { "epoch": 7.59, "grad_norm": 3.366344451904297, "learning_rate": 1.221101810201873e-06, "loss": 0.0584, "step": 771450 }, { "epoch": 7.59, "grad_norm": 0.7151883840560913, "learning_rate": 1.2209776877476244e-06, "loss": 0.0137, "step": 771475 }, { "epoch": 7.59, "grad_norm": 2.4668638706207275, "learning_rate": 1.2208535652933758e-06, "loss": 0.0644, "step": 771500 }, { "epoch": 7.59, "grad_norm": 1.0256668329238892, "learning_rate": 1.2207294428391275e-06, "loss": 0.0259, "step": 771525 }, { "epoch": 7.59, "grad_norm": 2.1247198581695557, "learning_rate": 1.220605320384879e-06, "loss": 0.0639, "step": 771550 }, { "epoch": 7.59, "grad_norm": 4.6897292137146, "learning_rate": 1.2204811979306305e-06, "loss": 0.0163, "step": 771575 }, { "epoch": 7.59, "grad_norm": 1.5398322343826294, "learning_rate": 1.220357075476382e-06, "loss": 0.0543, "step": 771600 }, { "epoch": 7.59, "grad_norm": 10.927668571472168, "learning_rate": 1.2202329530221336e-06, "loss": 0.0222, "step": 771625 }, { "epoch": 7.59, "grad_norm": 2.479550838470459, "learning_rate": 1.2201088305678852e-06, "loss": 0.0505, "step": 771650 }, { "epoch": 7.59, "grad_norm": 0.3777962327003479, "learning_rate": 1.2199847081136366e-06, "loss": 0.0239, "step": 771675 }, { "epoch": 7.59, "grad_norm": 3.0008387565612793, "learning_rate": 1.2198605856593883e-06, "loss": 0.0567, "step": 771700 }, { "epoch": 7.59, "grad_norm": 0.44692692160606384, "learning_rate": 1.2197364632051397e-06, "loss": 0.0182, "step": 771725 }, { "epoch": 7.59, "grad_norm": 3.3794422149658203, "learning_rate": 1.2196123407508913e-06, "loss": 0.0731, "step": 771750 }, { "epoch": 7.59, "grad_norm": 0.40744122862815857, "learning_rate": 1.2194882182966428e-06, "loss": 0.0217, "step": 771775 }, { "epoch": 7.59, "grad_norm": 4.289844989776611, "learning_rate": 1.2193640958423944e-06, "loss": 0.0481, "step": 771800 }, { "epoch": 7.59, "grad_norm": 1.0716849565505981, "learning_rate": 1.2192399733881458e-06, "loss": 0.0182, "step": 771825 }, { "epoch": 7.59, "grad_norm": 1.5859018564224243, "learning_rate": 1.2191158509338975e-06, "loss": 0.0529, "step": 771850 }, { "epoch": 7.59, "grad_norm": 1.905495524406433, "learning_rate": 1.218991728479649e-06, "loss": 0.0178, "step": 771875 }, { "epoch": 7.59, "grad_norm": 5.3434576988220215, "learning_rate": 1.2188676060254005e-06, "loss": 0.0722, "step": 771900 }, { "epoch": 7.59, "grad_norm": 3.121619939804077, "learning_rate": 1.218743483571152e-06, "loss": 0.0256, "step": 771925 }, { "epoch": 7.59, "grad_norm": 3.302845001220703, "learning_rate": 1.2186193611169036e-06, "loss": 0.0614, "step": 771950 }, { "epoch": 7.59, "grad_norm": 1.8564707040786743, "learning_rate": 1.2184952386626552e-06, "loss": 0.0179, "step": 771975 }, { "epoch": 7.59, "grad_norm": 1.8927040100097656, "learning_rate": 1.2183711162084066e-06, "loss": 0.0428, "step": 772000 }, { "epoch": 7.59, "grad_norm": 1.2287958860397339, "learning_rate": 1.218246993754158e-06, "loss": 0.0185, "step": 772025 }, { "epoch": 7.59, "grad_norm": 2.787158727645874, "learning_rate": 1.2181228712999097e-06, "loss": 0.0625, "step": 772050 }, { "epoch": 7.59, "grad_norm": 3.05090594291687, "learning_rate": 1.2179987488456613e-06, "loss": 0.0156, "step": 772075 }, { "epoch": 7.59, "grad_norm": 2.2035768032073975, "learning_rate": 1.2178746263914127e-06, "loss": 0.0637, "step": 772100 }, { "epoch": 7.59, "grad_norm": 0.42987269163131714, "learning_rate": 1.2177505039371644e-06, "loss": 0.0402, "step": 772125 }, { "epoch": 7.59, "grad_norm": 3.0573160648345947, "learning_rate": 1.2176263814829158e-06, "loss": 0.0686, "step": 772150 }, { "epoch": 7.59, "grad_norm": 0.10403869301080704, "learning_rate": 1.2175022590286674e-06, "loss": 0.02, "step": 772175 }, { "epoch": 7.59, "grad_norm": 2.794208526611328, "learning_rate": 1.2173781365744189e-06, "loss": 0.0726, "step": 772200 }, { "epoch": 7.59, "grad_norm": 1.8512858152389526, "learning_rate": 1.2172540141201705e-06, "loss": 0.0244, "step": 772225 }, { "epoch": 7.59, "grad_norm": 4.538145542144775, "learning_rate": 1.217129891665922e-06, "loss": 0.0797, "step": 772250 }, { "epoch": 7.59, "grad_norm": 5.144410133361816, "learning_rate": 1.2170057692116736e-06, "loss": 0.0202, "step": 772275 }, { "epoch": 7.59, "grad_norm": 4.195774555206299, "learning_rate": 1.2168816467574252e-06, "loss": 0.0587, "step": 772300 }, { "epoch": 7.59, "grad_norm": 0.03322805464267731, "learning_rate": 1.2167575243031766e-06, "loss": 0.018, "step": 772325 }, { "epoch": 7.59, "grad_norm": 2.874459981918335, "learning_rate": 1.216633401848928e-06, "loss": 0.0694, "step": 772350 }, { "epoch": 7.59, "grad_norm": 7.184656143188477, "learning_rate": 1.2165092793946797e-06, "loss": 0.0123, "step": 772375 }, { "epoch": 7.59, "grad_norm": 4.419103622436523, "learning_rate": 1.2163851569404313e-06, "loss": 0.0713, "step": 772400 }, { "epoch": 7.59, "grad_norm": 1.4040526151657104, "learning_rate": 1.2162610344861827e-06, "loss": 0.014, "step": 772425 }, { "epoch": 7.59, "grad_norm": 3.0901646614074707, "learning_rate": 1.2161369120319344e-06, "loss": 0.0382, "step": 772450 }, { "epoch": 7.6, "grad_norm": 2.6703054904937744, "learning_rate": 1.216012789577686e-06, "loss": 0.0153, "step": 772475 }, { "epoch": 7.6, "grad_norm": 3.1115963459014893, "learning_rate": 1.2158886671234374e-06, "loss": 0.0474, "step": 772500 }, { "epoch": 7.6, "grad_norm": 0.47189855575561523, "learning_rate": 1.2157645446691888e-06, "loss": 0.0307, "step": 772525 }, { "epoch": 7.6, "grad_norm": 2.7423958778381348, "learning_rate": 1.2156404222149405e-06, "loss": 0.0511, "step": 772550 }, { "epoch": 7.6, "grad_norm": 2.1504766941070557, "learning_rate": 1.2155162997606921e-06, "loss": 0.022, "step": 772575 }, { "epoch": 7.6, "grad_norm": 3.2919085025787354, "learning_rate": 1.2153921773064435e-06, "loss": 0.0493, "step": 772600 }, { "epoch": 7.6, "grad_norm": 4.409681797027588, "learning_rate": 1.215268054852195e-06, "loss": 0.0147, "step": 772625 }, { "epoch": 7.6, "grad_norm": 3.2505619525909424, "learning_rate": 1.2151439323979466e-06, "loss": 0.0408, "step": 772650 }, { "epoch": 7.6, "grad_norm": 18.767330169677734, "learning_rate": 1.2150198099436982e-06, "loss": 0.0263, "step": 772675 }, { "epoch": 7.6, "grad_norm": 4.297287464141846, "learning_rate": 1.2148956874894497e-06, "loss": 0.0621, "step": 772700 }, { "epoch": 7.6, "grad_norm": 0.08654635399580002, "learning_rate": 1.2147715650352013e-06, "loss": 0.0173, "step": 772725 }, { "epoch": 7.6, "grad_norm": 2.74839186668396, "learning_rate": 1.2146474425809527e-06, "loss": 0.0381, "step": 772750 }, { "epoch": 7.6, "grad_norm": 1.2967551946640015, "learning_rate": 1.2145233201267043e-06, "loss": 0.0232, "step": 772775 }, { "epoch": 7.6, "grad_norm": 2.084038257598877, "learning_rate": 1.214399197672456e-06, "loss": 0.0601, "step": 772800 }, { "epoch": 7.6, "grad_norm": 0.02239333651959896, "learning_rate": 1.2142750752182074e-06, "loss": 0.0151, "step": 772825 }, { "epoch": 7.6, "grad_norm": 2.5207581520080566, "learning_rate": 1.2141509527639588e-06, "loss": 0.0624, "step": 772850 }, { "epoch": 7.6, "grad_norm": 2.0751571655273438, "learning_rate": 1.2140268303097105e-06, "loss": 0.0155, "step": 772875 }, { "epoch": 7.6, "grad_norm": 2.6433489322662354, "learning_rate": 1.213907672753632e-06, "loss": 0.0471, "step": 772900 }, { "epoch": 7.6, "grad_norm": 3.091564655303955, "learning_rate": 1.2137835502993836e-06, "loss": 0.0307, "step": 772925 }, { "epoch": 7.6, "grad_norm": 2.5647010803222656, "learning_rate": 1.213659427845135e-06, "loss": 0.0644, "step": 772950 }, { "epoch": 7.6, "grad_norm": 0.12776871025562286, "learning_rate": 1.2135353053908866e-06, "loss": 0.0137, "step": 772975 }, { "epoch": 7.6, "grad_norm": 4.628912448883057, "learning_rate": 1.213411182936638e-06, "loss": 0.0711, "step": 773000 }, { "epoch": 7.6, "grad_norm": 0.055878594517707825, "learning_rate": 1.2132870604823897e-06, "loss": 0.0184, "step": 773025 }, { "epoch": 7.6, "grad_norm": 5.814827919006348, "learning_rate": 1.213162938028141e-06, "loss": 0.0635, "step": 773050 }, { "epoch": 7.6, "grad_norm": 5.876716136932373, "learning_rate": 1.2130388155738927e-06, "loss": 0.0204, "step": 773075 }, { "epoch": 7.6, "grad_norm": 2.7496702671051025, "learning_rate": 1.2129146931196441e-06, "loss": 0.0534, "step": 773100 }, { "epoch": 7.6, "grad_norm": 3.7992827892303467, "learning_rate": 1.2127905706653958e-06, "loss": 0.0234, "step": 773125 }, { "epoch": 7.6, "grad_norm": 4.808215618133545, "learning_rate": 1.2126664482111474e-06, "loss": 0.0674, "step": 773150 }, { "epoch": 7.6, "grad_norm": 12.185460090637207, "learning_rate": 1.2125423257568988e-06, "loss": 0.0124, "step": 773175 }, { "epoch": 7.6, "grad_norm": 4.660836219787598, "learning_rate": 1.2124182033026503e-06, "loss": 0.096, "step": 773200 }, { "epoch": 7.6, "grad_norm": 6.782020568847656, "learning_rate": 1.212294080848402e-06, "loss": 0.0104, "step": 773225 }, { "epoch": 7.6, "grad_norm": 3.5893044471740723, "learning_rate": 1.2121699583941535e-06, "loss": 0.0575, "step": 773250 }, { "epoch": 7.6, "grad_norm": 1.5040956735610962, "learning_rate": 1.212045835939905e-06, "loss": 0.0248, "step": 773275 }, { "epoch": 7.6, "grad_norm": 1.9062317609786987, "learning_rate": 1.2119217134856564e-06, "loss": 0.0523, "step": 773300 }, { "epoch": 7.6, "grad_norm": 6.953103542327881, "learning_rate": 1.211797591031408e-06, "loss": 0.0239, "step": 773325 }, { "epoch": 7.6, "grad_norm": 4.149497032165527, "learning_rate": 1.2116734685771597e-06, "loss": 0.0589, "step": 773350 }, { "epoch": 7.6, "grad_norm": 4.3959455490112305, "learning_rate": 1.211549346122911e-06, "loss": 0.0299, "step": 773375 }, { "epoch": 7.6, "grad_norm": 3.546260356903076, "learning_rate": 1.2114252236686627e-06, "loss": 0.0556, "step": 773400 }, { "epoch": 7.6, "grad_norm": 8.039292335510254, "learning_rate": 1.2113011012144141e-06, "loss": 0.0312, "step": 773425 }, { "epoch": 7.6, "grad_norm": 3.2705459594726562, "learning_rate": 1.2111769787601658e-06, "loss": 0.0564, "step": 773450 }, { "epoch": 7.6, "grad_norm": 7.271119594573975, "learning_rate": 1.2110528563059172e-06, "loss": 0.0172, "step": 773475 }, { "epoch": 7.61, "grad_norm": 3.1824474334716797, "learning_rate": 1.2109287338516688e-06, "loss": 0.0687, "step": 773500 }, { "epoch": 7.61, "grad_norm": 0.6802160739898682, "learning_rate": 1.2108046113974202e-06, "loss": 0.0095, "step": 773525 }, { "epoch": 7.61, "grad_norm": 7.170080661773682, "learning_rate": 1.2106804889431719e-06, "loss": 0.0607, "step": 773550 }, { "epoch": 7.61, "grad_norm": 0.600445568561554, "learning_rate": 1.2105563664889235e-06, "loss": 0.0334, "step": 773575 }, { "epoch": 7.61, "grad_norm": 2.9448869228363037, "learning_rate": 1.210432244034675e-06, "loss": 0.0635, "step": 773600 }, { "epoch": 7.61, "grad_norm": 0.470052570104599, "learning_rate": 1.2103081215804264e-06, "loss": 0.0168, "step": 773625 }, { "epoch": 7.61, "grad_norm": 3.7931365966796875, "learning_rate": 1.210183999126178e-06, "loss": 0.0521, "step": 773650 }, { "epoch": 7.61, "grad_norm": 2.9154212474823, "learning_rate": 1.2100598766719296e-06, "loss": 0.0193, "step": 773675 }, { "epoch": 7.61, "grad_norm": 3.8455684185028076, "learning_rate": 1.209935754217681e-06, "loss": 0.077, "step": 773700 }, { "epoch": 7.61, "grad_norm": 7.811640739440918, "learning_rate": 1.2098116317634325e-06, "loss": 0.0214, "step": 773725 }, { "epoch": 7.61, "grad_norm": 3.376696825027466, "learning_rate": 1.2096875093091841e-06, "loss": 0.0604, "step": 773750 }, { "epoch": 7.61, "grad_norm": 1.6500741243362427, "learning_rate": 1.2095633868549358e-06, "loss": 0.0207, "step": 773775 }, { "epoch": 7.61, "grad_norm": 1.6410354375839233, "learning_rate": 1.2094392644006872e-06, "loss": 0.0285, "step": 773800 }, { "epoch": 7.61, "grad_norm": 0.22776319086551666, "learning_rate": 1.2093151419464388e-06, "loss": 0.009, "step": 773825 }, { "epoch": 7.61, "grad_norm": 3.1420273780822754, "learning_rate": 1.2091910194921902e-06, "loss": 0.0693, "step": 773850 }, { "epoch": 7.61, "grad_norm": 0.22241821885108948, "learning_rate": 1.2090668970379419e-06, "loss": 0.0364, "step": 773875 }, { "epoch": 7.61, "grad_norm": 4.070587158203125, "learning_rate": 1.2089427745836933e-06, "loss": 0.052, "step": 773900 }, { "epoch": 7.61, "grad_norm": 0.48923933506011963, "learning_rate": 1.208818652129445e-06, "loss": 0.0179, "step": 773925 }, { "epoch": 7.61, "grad_norm": 2.3636913299560547, "learning_rate": 1.2086945296751966e-06, "loss": 0.0709, "step": 773950 }, { "epoch": 7.61, "grad_norm": 2.5192015171051025, "learning_rate": 1.208570407220948e-06, "loss": 0.0201, "step": 773975 }, { "epoch": 7.61, "grad_norm": 2.8437612056732178, "learning_rate": 1.2084462847666996e-06, "loss": 0.0564, "step": 774000 }, { "epoch": 7.61, "grad_norm": 9.9136381149292, "learning_rate": 1.208322162312451e-06, "loss": 0.0229, "step": 774025 }, { "epoch": 7.61, "grad_norm": 2.28415584564209, "learning_rate": 1.2081980398582027e-06, "loss": 0.065, "step": 774050 }, { "epoch": 7.61, "grad_norm": 9.920650482177734, "learning_rate": 1.208073917403954e-06, "loss": 0.0207, "step": 774075 }, { "epoch": 7.61, "grad_norm": 4.7521257400512695, "learning_rate": 1.2079497949497057e-06, "loss": 0.0605, "step": 774100 }, { "epoch": 7.61, "grad_norm": 0.7171512246131897, "learning_rate": 1.2078256724954572e-06, "loss": 0.0154, "step": 774125 }, { "epoch": 7.61, "grad_norm": 3.989537000656128, "learning_rate": 1.2077015500412088e-06, "loss": 0.0421, "step": 774150 }, { "epoch": 7.61, "grad_norm": 1.7026970386505127, "learning_rate": 1.2075774275869604e-06, "loss": 0.0176, "step": 774175 }, { "epoch": 7.61, "grad_norm": 2.342959403991699, "learning_rate": 1.2074533051327119e-06, "loss": 0.0612, "step": 774200 }, { "epoch": 7.61, "grad_norm": 2.3513972759246826, "learning_rate": 1.2073291826784633e-06, "loss": 0.0107, "step": 774225 }, { "epoch": 7.61, "grad_norm": 3.333141803741455, "learning_rate": 1.207205060224215e-06, "loss": 0.0749, "step": 774250 }, { "epoch": 7.61, "grad_norm": 2.332098960876465, "learning_rate": 1.2070809377699665e-06, "loss": 0.0167, "step": 774275 }, { "epoch": 7.61, "grad_norm": 2.6942949295043945, "learning_rate": 1.206956815315718e-06, "loss": 0.0728, "step": 774300 }, { "epoch": 7.61, "grad_norm": 5.511715412139893, "learning_rate": 1.2068326928614694e-06, "loss": 0.0218, "step": 774325 }, { "epoch": 7.61, "grad_norm": 2.917479991912842, "learning_rate": 1.206708570407221e-06, "loss": 0.0829, "step": 774350 }, { "epoch": 7.61, "grad_norm": 4.499566078186035, "learning_rate": 1.2065844479529727e-06, "loss": 0.0118, "step": 774375 }, { "epoch": 7.61, "grad_norm": 3.8978893756866455, "learning_rate": 1.206460325498724e-06, "loss": 0.0594, "step": 774400 }, { "epoch": 7.61, "grad_norm": 10.330037117004395, "learning_rate": 1.2063362030444757e-06, "loss": 0.0308, "step": 774425 }, { "epoch": 7.61, "grad_norm": 2.4608652591705322, "learning_rate": 1.2062120805902271e-06, "loss": 0.0414, "step": 774450 }, { "epoch": 7.61, "grad_norm": 0.42800372838974, "learning_rate": 1.2060879581359788e-06, "loss": 0.0236, "step": 774475 }, { "epoch": 7.62, "grad_norm": 2.523378372192383, "learning_rate": 1.2059638356817302e-06, "loss": 0.0436, "step": 774500 }, { "epoch": 7.62, "grad_norm": 0.1587485373020172, "learning_rate": 1.2058397132274818e-06, "loss": 0.0166, "step": 774525 }, { "epoch": 7.62, "grad_norm": 3.680363893508911, "learning_rate": 1.2057155907732333e-06, "loss": 0.065, "step": 774550 }, { "epoch": 7.62, "grad_norm": 3.6633403301239014, "learning_rate": 1.205591468318985e-06, "loss": 0.0122, "step": 774575 }, { "epoch": 7.62, "grad_norm": 4.801158905029297, "learning_rate": 1.2054673458647365e-06, "loss": 0.0554, "step": 774600 }, { "epoch": 7.62, "grad_norm": 0.5923437476158142, "learning_rate": 1.205343223410488e-06, "loss": 0.0145, "step": 774625 }, { "epoch": 7.62, "grad_norm": 14.354122161865234, "learning_rate": 1.2052191009562394e-06, "loss": 0.0691, "step": 774650 }, { "epoch": 7.62, "grad_norm": 3.5364620685577393, "learning_rate": 1.205094978501991e-06, "loss": 0.0139, "step": 774675 }, { "epoch": 7.62, "grad_norm": 1.9407485723495483, "learning_rate": 1.2049708560477426e-06, "loss": 0.1046, "step": 774700 }, { "epoch": 7.62, "grad_norm": 2.8279759883880615, "learning_rate": 1.204846733593494e-06, "loss": 0.0195, "step": 774725 }, { "epoch": 7.62, "grad_norm": 1.691247582435608, "learning_rate": 1.2047226111392455e-06, "loss": 0.058, "step": 774750 }, { "epoch": 7.62, "grad_norm": 6.141487121582031, "learning_rate": 1.2045984886849971e-06, "loss": 0.0287, "step": 774775 }, { "epoch": 7.62, "grad_norm": 3.303227424621582, "learning_rate": 1.2044743662307488e-06, "loss": 0.0696, "step": 774800 }, { "epoch": 7.62, "grad_norm": 1.3210866451263428, "learning_rate": 1.2043502437765002e-06, "loss": 0.022, "step": 774825 }, { "epoch": 7.62, "grad_norm": 2.614820718765259, "learning_rate": 1.2042261213222518e-06, "loss": 0.0384, "step": 774850 }, { "epoch": 7.62, "grad_norm": 0.2422630786895752, "learning_rate": 1.2041019988680032e-06, "loss": 0.0341, "step": 774875 }, { "epoch": 7.62, "grad_norm": 3.9433791637420654, "learning_rate": 1.2039778764137549e-06, "loss": 0.0671, "step": 774900 }, { "epoch": 7.62, "grad_norm": 0.7829986214637756, "learning_rate": 1.2038537539595063e-06, "loss": 0.0095, "step": 774925 }, { "epoch": 7.62, "grad_norm": 3.4427225589752197, "learning_rate": 1.203729631505258e-06, "loss": 0.0375, "step": 774950 }, { "epoch": 7.62, "grad_norm": 4.8415961265563965, "learning_rate": 1.2036055090510094e-06, "loss": 0.0144, "step": 774975 }, { "epoch": 7.62, "grad_norm": 4.397520542144775, "learning_rate": 1.203481386596761e-06, "loss": 0.0668, "step": 775000 }, { "epoch": 7.62, "grad_norm": 0.6704462766647339, "learning_rate": 1.2033572641425126e-06, "loss": 0.0312, "step": 775025 }, { "epoch": 7.62, "grad_norm": 4.816381454467773, "learning_rate": 1.203233141688264e-06, "loss": 0.047, "step": 775050 }, { "epoch": 7.62, "grad_norm": 2.4098761081695557, "learning_rate": 1.2031090192340155e-06, "loss": 0.0171, "step": 775075 }, { "epoch": 7.62, "grad_norm": 2.3891093730926514, "learning_rate": 1.2029848967797671e-06, "loss": 0.0455, "step": 775100 }, { "epoch": 7.62, "grad_norm": 3.3836264610290527, "learning_rate": 1.2028607743255187e-06, "loss": 0.017, "step": 775125 }, { "epoch": 7.62, "grad_norm": 2.0398502349853516, "learning_rate": 1.2027366518712702e-06, "loss": 0.0559, "step": 775150 }, { "epoch": 7.62, "grad_norm": 1.6953110694885254, "learning_rate": 1.2026125294170216e-06, "loss": 0.0141, "step": 775175 }, { "epoch": 7.62, "grad_norm": 4.696913719177246, "learning_rate": 1.2024884069627732e-06, "loss": 0.0953, "step": 775200 }, { "epoch": 7.62, "grad_norm": 8.059712409973145, "learning_rate": 1.2023642845085249e-06, "loss": 0.0291, "step": 775225 }, { "epoch": 7.62, "grad_norm": 4.940985679626465, "learning_rate": 1.2022401620542763e-06, "loss": 0.0568, "step": 775250 }, { "epoch": 7.62, "grad_norm": 0.9960627555847168, "learning_rate": 1.202116039600028e-06, "loss": 0.0162, "step": 775275 }, { "epoch": 7.62, "grad_norm": 3.813013792037964, "learning_rate": 1.2019919171457796e-06, "loss": 0.048, "step": 775300 }, { "epoch": 7.62, "grad_norm": 4.854078769683838, "learning_rate": 1.201867794691531e-06, "loss": 0.0134, "step": 775325 }, { "epoch": 7.62, "grad_norm": 2.659330129623413, "learning_rate": 1.2017436722372824e-06, "loss": 0.0835, "step": 775350 }, { "epoch": 7.62, "grad_norm": 0.10881983488798141, "learning_rate": 1.201619549783034e-06, "loss": 0.0212, "step": 775375 }, { "epoch": 7.62, "grad_norm": 1.7856348752975464, "learning_rate": 1.2014954273287857e-06, "loss": 0.0335, "step": 775400 }, { "epoch": 7.62, "grad_norm": 5.013322830200195, "learning_rate": 1.201371304874537e-06, "loss": 0.0182, "step": 775425 }, { "epoch": 7.62, "grad_norm": 5.963285446166992, "learning_rate": 1.2012471824202887e-06, "loss": 0.0551, "step": 775450 }, { "epoch": 7.62, "grad_norm": 9.925148010253906, "learning_rate": 1.2011230599660402e-06, "loss": 0.0195, "step": 775475 }, { "epoch": 7.62, "grad_norm": 3.256477117538452, "learning_rate": 1.2009989375117918e-06, "loss": 0.0481, "step": 775500 }, { "epoch": 7.63, "grad_norm": 5.110006809234619, "learning_rate": 1.2008748150575434e-06, "loss": 0.0097, "step": 775525 }, { "epoch": 7.63, "grad_norm": 4.148504257202148, "learning_rate": 1.2007506926032948e-06, "loss": 0.0744, "step": 775550 }, { "epoch": 7.63, "grad_norm": 14.448074340820312, "learning_rate": 1.2006265701490463e-06, "loss": 0.0222, "step": 775575 }, { "epoch": 7.63, "grad_norm": 3.4236185550689697, "learning_rate": 1.2005074125929677e-06, "loss": 0.0798, "step": 775600 }, { "epoch": 7.63, "grad_norm": 1.8144320249557495, "learning_rate": 1.2003832901387194e-06, "loss": 0.0174, "step": 775625 }, { "epoch": 7.63, "grad_norm": 3.1965277194976807, "learning_rate": 1.200259167684471e-06, "loss": 0.0491, "step": 775650 }, { "epoch": 7.63, "grad_norm": 9.731623649597168, "learning_rate": 1.2001350452302224e-06, "loss": 0.012, "step": 775675 }, { "epoch": 7.63, "grad_norm": 3.163119316101074, "learning_rate": 1.200010922775974e-06, "loss": 0.0556, "step": 775700 }, { "epoch": 7.63, "grad_norm": 5.269472599029541, "learning_rate": 1.1998868003217255e-06, "loss": 0.0136, "step": 775725 }, { "epoch": 7.63, "grad_norm": 3.0029456615448, "learning_rate": 1.1997626778674771e-06, "loss": 0.0814, "step": 775750 }, { "epoch": 7.63, "grad_norm": 11.954315185546875, "learning_rate": 1.1996385554132285e-06, "loss": 0.0262, "step": 775775 }, { "epoch": 7.63, "grad_norm": 3.8324100971221924, "learning_rate": 1.1995144329589802e-06, "loss": 0.068, "step": 775800 }, { "epoch": 7.63, "grad_norm": 0.25186997652053833, "learning_rate": 1.1993903105047316e-06, "loss": 0.0219, "step": 775825 }, { "epoch": 7.63, "grad_norm": 5.696372032165527, "learning_rate": 1.1992661880504832e-06, "loss": 0.0423, "step": 775850 }, { "epoch": 7.63, "grad_norm": 5.707617282867432, "learning_rate": 1.1991420655962349e-06, "loss": 0.0163, "step": 775875 }, { "epoch": 7.63, "grad_norm": 3.5798909664154053, "learning_rate": 1.1990179431419863e-06, "loss": 0.0388, "step": 775900 }, { "epoch": 7.63, "grad_norm": 0.13848638534545898, "learning_rate": 1.1988938206877377e-06, "loss": 0.0251, "step": 775925 }, { "epoch": 7.63, "grad_norm": 2.8824000358581543, "learning_rate": 1.1987696982334893e-06, "loss": 0.0419, "step": 775950 }, { "epoch": 7.63, "grad_norm": 0.5528566837310791, "learning_rate": 1.198645575779241e-06, "loss": 0.013, "step": 775975 }, { "epoch": 7.63, "grad_norm": 2.821040153503418, "learning_rate": 1.1985214533249924e-06, "loss": 0.0575, "step": 776000 }, { "epoch": 7.63, "grad_norm": 0.3681838810443878, "learning_rate": 1.1983973308707438e-06, "loss": 0.0251, "step": 776025 }, { "epoch": 7.63, "grad_norm": 2.969194173812866, "learning_rate": 1.1982732084164955e-06, "loss": 0.047, "step": 776050 }, { "epoch": 7.63, "grad_norm": 1.6144522428512573, "learning_rate": 1.198149085962247e-06, "loss": 0.0179, "step": 776075 }, { "epoch": 7.63, "grad_norm": 1.4268230199813843, "learning_rate": 1.1980249635079985e-06, "loss": 0.0528, "step": 776100 }, { "epoch": 7.63, "grad_norm": 9.364487648010254, "learning_rate": 1.1979008410537501e-06, "loss": 0.0351, "step": 776125 }, { "epoch": 7.63, "grad_norm": 3.3604698181152344, "learning_rate": 1.1977767185995016e-06, "loss": 0.0749, "step": 776150 }, { "epoch": 7.63, "grad_norm": 15.329638481140137, "learning_rate": 1.1976525961452532e-06, "loss": 0.0135, "step": 776175 }, { "epoch": 7.63, "grad_norm": 3.5182321071624756, "learning_rate": 1.1975284736910046e-06, "loss": 0.0711, "step": 776200 }, { "epoch": 7.63, "grad_norm": 8.297337532043457, "learning_rate": 1.1974043512367563e-06, "loss": 0.0198, "step": 776225 }, { "epoch": 7.63, "grad_norm": 5.105969429016113, "learning_rate": 1.1972802287825077e-06, "loss": 0.0403, "step": 776250 }, { "epoch": 7.63, "grad_norm": 27.53068733215332, "learning_rate": 1.1971561063282593e-06, "loss": 0.0165, "step": 776275 }, { "epoch": 7.63, "grad_norm": 3.702474355697632, "learning_rate": 1.197031983874011e-06, "loss": 0.0708, "step": 776300 }, { "epoch": 7.63, "grad_norm": 0.9637930989265442, "learning_rate": 1.1969078614197624e-06, "loss": 0.0125, "step": 776325 }, { "epoch": 7.63, "grad_norm": 4.863890647888184, "learning_rate": 1.1967837389655138e-06, "loss": 0.0757, "step": 776350 }, { "epoch": 7.63, "grad_norm": 1.6225988864898682, "learning_rate": 1.1966596165112654e-06, "loss": 0.0295, "step": 776375 }, { "epoch": 7.63, "grad_norm": 1.5968189239501953, "learning_rate": 1.196535494057017e-06, "loss": 0.0696, "step": 776400 }, { "epoch": 7.63, "grad_norm": 4.811915397644043, "learning_rate": 1.1964113716027685e-06, "loss": 0.0233, "step": 776425 }, { "epoch": 7.63, "grad_norm": 3.9215967655181885, "learning_rate": 1.19628724914852e-06, "loss": 0.0551, "step": 776450 }, { "epoch": 7.63, "grad_norm": 1.003695011138916, "learning_rate": 1.1961631266942716e-06, "loss": 0.0149, "step": 776475 }, { "epoch": 7.63, "grad_norm": 2.13801646232605, "learning_rate": 1.1960390042400232e-06, "loss": 0.0487, "step": 776500 }, { "epoch": 7.63, "grad_norm": 0.5073513984680176, "learning_rate": 1.1959148817857746e-06, "loss": 0.0189, "step": 776525 }, { "epoch": 7.64, "grad_norm": 2.0689549446105957, "learning_rate": 1.1957907593315263e-06, "loss": 0.0707, "step": 776550 }, { "epoch": 7.64, "grad_norm": 0.08090750128030777, "learning_rate": 1.1956666368772777e-06, "loss": 0.0188, "step": 776575 }, { "epoch": 7.64, "grad_norm": 5.346855163574219, "learning_rate": 1.1955425144230293e-06, "loss": 0.0598, "step": 776600 }, { "epoch": 7.64, "grad_norm": 17.869890213012695, "learning_rate": 1.1954183919687807e-06, "loss": 0.0132, "step": 776625 }, { "epoch": 7.64, "grad_norm": 3.793989658355713, "learning_rate": 1.1952942695145324e-06, "loss": 0.0638, "step": 776650 }, { "epoch": 7.64, "grad_norm": 0.3719477951526642, "learning_rate": 1.1951701470602838e-06, "loss": 0.0203, "step": 776675 }, { "epoch": 7.64, "grad_norm": 2.172879219055176, "learning_rate": 1.1950460246060354e-06, "loss": 0.0342, "step": 776700 }, { "epoch": 7.64, "grad_norm": 0.3661348521709442, "learning_rate": 1.194921902151787e-06, "loss": 0.0253, "step": 776725 }, { "epoch": 7.64, "grad_norm": 3.459624767303467, "learning_rate": 1.1947977796975385e-06, "loss": 0.0577, "step": 776750 }, { "epoch": 7.64, "grad_norm": 3.732006311416626, "learning_rate": 1.1946736572432901e-06, "loss": 0.0315, "step": 776775 }, { "epoch": 7.64, "grad_norm": 2.146345853805542, "learning_rate": 1.1945495347890415e-06, "loss": 0.0654, "step": 776800 }, { "epoch": 7.64, "grad_norm": 14.75938892364502, "learning_rate": 1.1944254123347932e-06, "loss": 0.0171, "step": 776825 }, { "epoch": 7.64, "grad_norm": 3.738429069519043, "learning_rate": 1.1943012898805446e-06, "loss": 0.0691, "step": 776850 }, { "epoch": 7.64, "grad_norm": 3.7419183254241943, "learning_rate": 1.1941771674262962e-06, "loss": 0.0155, "step": 776875 }, { "epoch": 7.64, "grad_norm": 10.361905097961426, "learning_rate": 1.1940530449720479e-06, "loss": 0.068, "step": 776900 }, { "epoch": 7.64, "grad_norm": 8.886056900024414, "learning_rate": 1.1939289225177993e-06, "loss": 0.0176, "step": 776925 }, { "epoch": 7.64, "grad_norm": 2.0749666690826416, "learning_rate": 1.1938048000635507e-06, "loss": 0.0543, "step": 776950 }, { "epoch": 7.64, "grad_norm": 5.382864475250244, "learning_rate": 1.1936806776093024e-06, "loss": 0.0236, "step": 776975 }, { "epoch": 7.64, "grad_norm": 3.1832454204559326, "learning_rate": 1.193556555155054e-06, "loss": 0.0489, "step": 777000 }, { "epoch": 7.64, "grad_norm": 4.667747497558594, "learning_rate": 1.1934324327008054e-06, "loss": 0.019, "step": 777025 }, { "epoch": 7.64, "grad_norm": 5.005824089050293, "learning_rate": 1.1933083102465568e-06, "loss": 0.0669, "step": 777050 }, { "epoch": 7.64, "grad_norm": 2.562950611114502, "learning_rate": 1.1931841877923085e-06, "loss": 0.0159, "step": 777075 }, { "epoch": 7.64, "grad_norm": 3.299713611602783, "learning_rate": 1.19306006533806e-06, "loss": 0.0648, "step": 777100 }, { "epoch": 7.64, "grad_norm": 9.323650360107422, "learning_rate": 1.1929359428838115e-06, "loss": 0.0179, "step": 777125 }, { "epoch": 7.64, "grad_norm": 2.66502046585083, "learning_rate": 1.1928118204295632e-06, "loss": 0.0613, "step": 777150 }, { "epoch": 7.64, "grad_norm": 4.846421241760254, "learning_rate": 1.1926876979753146e-06, "loss": 0.0158, "step": 777175 }, { "epoch": 7.64, "grad_norm": 2.495108127593994, "learning_rate": 1.1925635755210662e-06, "loss": 0.0578, "step": 777200 }, { "epoch": 7.64, "grad_norm": 3.1459457874298096, "learning_rate": 1.1924394530668176e-06, "loss": 0.0276, "step": 777225 }, { "epoch": 7.64, "grad_norm": 2.6938726902008057, "learning_rate": 1.1923153306125693e-06, "loss": 0.0617, "step": 777250 }, { "epoch": 7.64, "grad_norm": 0.5324139595031738, "learning_rate": 1.1921912081583207e-06, "loss": 0.0234, "step": 777275 }, { "epoch": 7.64, "grad_norm": 3.629747152328491, "learning_rate": 1.1920670857040723e-06, "loss": 0.0612, "step": 777300 }, { "epoch": 7.64, "grad_norm": 1.6678804159164429, "learning_rate": 1.191942963249824e-06, "loss": 0.0242, "step": 777325 }, { "epoch": 7.64, "grad_norm": 1.8919317722320557, "learning_rate": 1.1918188407955754e-06, "loss": 0.0409, "step": 777350 }, { "epoch": 7.64, "grad_norm": 5.298003196716309, "learning_rate": 1.1916947183413268e-06, "loss": 0.0146, "step": 777375 }, { "epoch": 7.64, "grad_norm": 1.737337350845337, "learning_rate": 1.1915705958870785e-06, "loss": 0.0497, "step": 777400 }, { "epoch": 7.64, "grad_norm": 1.849900484085083, "learning_rate": 1.19144647343283e-06, "loss": 0.0187, "step": 777425 }, { "epoch": 7.64, "grad_norm": 2.9402263164520264, "learning_rate": 1.1913223509785815e-06, "loss": 0.0342, "step": 777450 }, { "epoch": 7.64, "grad_norm": 7.1361565589904785, "learning_rate": 1.191198228524333e-06, "loss": 0.022, "step": 777475 }, { "epoch": 7.64, "grad_norm": 4.412020683288574, "learning_rate": 1.1910741060700846e-06, "loss": 0.0392, "step": 777500 }, { "epoch": 7.64, "grad_norm": 0.06870228797197342, "learning_rate": 1.1909499836158362e-06, "loss": 0.0201, "step": 777525 }, { "epoch": 7.64, "grad_norm": 4.205445289611816, "learning_rate": 1.1908258611615876e-06, "loss": 0.07, "step": 777550 }, { "epoch": 7.65, "grad_norm": 1.1469180583953857, "learning_rate": 1.1907017387073393e-06, "loss": 0.0094, "step": 777575 }, { "epoch": 7.65, "grad_norm": 4.9475321769714355, "learning_rate": 1.1905776162530907e-06, "loss": 0.05, "step": 777600 }, { "epoch": 7.65, "grad_norm": 0.2490176111459732, "learning_rate": 1.1904534937988423e-06, "loss": 0.0177, "step": 777625 }, { "epoch": 7.65, "grad_norm": 3.3541903495788574, "learning_rate": 1.1903293713445937e-06, "loss": 0.059, "step": 777650 }, { "epoch": 7.65, "grad_norm": 0.07021049410104752, "learning_rate": 1.1902052488903454e-06, "loss": 0.0263, "step": 777675 }, { "epoch": 7.65, "grad_norm": 4.750112533569336, "learning_rate": 1.1900860913342668e-06, "loss": 0.0531, "step": 777700 }, { "epoch": 7.65, "grad_norm": 11.804703712463379, "learning_rate": 1.1899619688800183e-06, "loss": 0.0195, "step": 777725 }, { "epoch": 7.65, "grad_norm": 2.5833001136779785, "learning_rate": 1.1898378464257699e-06, "loss": 0.0715, "step": 777750 }, { "epoch": 7.65, "grad_norm": 1.389719009399414, "learning_rate": 1.1897137239715215e-06, "loss": 0.0172, "step": 777775 }, { "epoch": 7.65, "grad_norm": 11.805583000183105, "learning_rate": 1.189589601517273e-06, "loss": 0.0571, "step": 777800 }, { "epoch": 7.65, "grad_norm": 0.37848490476608276, "learning_rate": 1.1894654790630244e-06, "loss": 0.0162, "step": 777825 }, { "epoch": 7.65, "grad_norm": 2.1164796352386475, "learning_rate": 1.189341356608776e-06, "loss": 0.0823, "step": 777850 }, { "epoch": 7.65, "grad_norm": 5.361013889312744, "learning_rate": 1.1892172341545276e-06, "loss": 0.0246, "step": 777875 }, { "epoch": 7.65, "grad_norm": 1.805614948272705, "learning_rate": 1.189093111700279e-06, "loss": 0.0639, "step": 777900 }, { "epoch": 7.65, "grad_norm": 0.09073048084974289, "learning_rate": 1.1889689892460307e-06, "loss": 0.0388, "step": 777925 }, { "epoch": 7.65, "grad_norm": 2.5731632709503174, "learning_rate": 1.1888448667917821e-06, "loss": 0.0611, "step": 777950 }, { "epoch": 7.65, "grad_norm": 4.657426834106445, "learning_rate": 1.1887207443375338e-06, "loss": 0.0158, "step": 777975 }, { "epoch": 7.65, "grad_norm": 4.338490962982178, "learning_rate": 1.1885966218832854e-06, "loss": 0.0546, "step": 778000 }, { "epoch": 7.65, "grad_norm": 0.18790698051452637, "learning_rate": 1.1884724994290368e-06, "loss": 0.0152, "step": 778025 }, { "epoch": 7.65, "grad_norm": 3.220877170562744, "learning_rate": 1.1883483769747882e-06, "loss": 0.0628, "step": 778050 }, { "epoch": 7.65, "grad_norm": 0.11280345916748047, "learning_rate": 1.1882242545205399e-06, "loss": 0.0125, "step": 778075 }, { "epoch": 7.65, "grad_norm": 4.3071794509887695, "learning_rate": 1.1881001320662915e-06, "loss": 0.0781, "step": 778100 }, { "epoch": 7.65, "grad_norm": 0.23275814950466156, "learning_rate": 1.187976009612043e-06, "loss": 0.0181, "step": 778125 }, { "epoch": 7.65, "grad_norm": 3.1585605144500732, "learning_rate": 1.1878518871577944e-06, "loss": 0.0651, "step": 778150 }, { "epoch": 7.65, "grad_norm": 1.4718915224075317, "learning_rate": 1.187727764703546e-06, "loss": 0.0211, "step": 778175 }, { "epoch": 7.65, "grad_norm": 3.361170768737793, "learning_rate": 1.1876036422492976e-06, "loss": 0.0723, "step": 778200 }, { "epoch": 7.65, "grad_norm": 0.04050866514444351, "learning_rate": 1.187479519795049e-06, "loss": 0.0226, "step": 778225 }, { "epoch": 7.65, "grad_norm": 3.4490835666656494, "learning_rate": 1.1873553973408007e-06, "loss": 0.0635, "step": 778250 }, { "epoch": 7.65, "grad_norm": 1.9079972505569458, "learning_rate": 1.1872312748865523e-06, "loss": 0.0134, "step": 778275 }, { "epoch": 7.65, "grad_norm": 4.105047702789307, "learning_rate": 1.1871071524323037e-06, "loss": 0.0589, "step": 778300 }, { "epoch": 7.65, "grad_norm": 0.23776645958423615, "learning_rate": 1.1869830299780552e-06, "loss": 0.0222, "step": 778325 }, { "epoch": 7.65, "grad_norm": 1.9263720512390137, "learning_rate": 1.1868589075238068e-06, "loss": 0.0582, "step": 778350 }, { "epoch": 7.65, "grad_norm": 7.169559478759766, "learning_rate": 1.1867347850695584e-06, "loss": 0.0248, "step": 778375 }, { "epoch": 7.65, "grad_norm": 5.0237860679626465, "learning_rate": 1.1866106626153099e-06, "loss": 0.0586, "step": 778400 }, { "epoch": 7.65, "grad_norm": 0.1749303936958313, "learning_rate": 1.1864865401610615e-06, "loss": 0.0156, "step": 778425 }, { "epoch": 7.65, "grad_norm": 3.850067377090454, "learning_rate": 1.186362417706813e-06, "loss": 0.0475, "step": 778450 }, { "epoch": 7.65, "grad_norm": 2.857698678970337, "learning_rate": 1.1862382952525645e-06, "loss": 0.0187, "step": 778475 }, { "epoch": 7.65, "grad_norm": 2.7389988899230957, "learning_rate": 1.186114172798316e-06, "loss": 0.0539, "step": 778500 }, { "epoch": 7.65, "grad_norm": 4.636711597442627, "learning_rate": 1.1859900503440676e-06, "loss": 0.0134, "step": 778525 }, { "epoch": 7.65, "grad_norm": 2.9729506969451904, "learning_rate": 1.185865927889819e-06, "loss": 0.0768, "step": 778550 }, { "epoch": 7.66, "grad_norm": 2.1466805934906006, "learning_rate": 1.1857418054355707e-06, "loss": 0.0237, "step": 778575 }, { "epoch": 7.66, "grad_norm": 6.932805061340332, "learning_rate": 1.1856176829813223e-06, "loss": 0.0492, "step": 778600 }, { "epoch": 7.66, "grad_norm": 1.5734182596206665, "learning_rate": 1.1854935605270737e-06, "loss": 0.015, "step": 778625 }, { "epoch": 7.66, "grad_norm": 3.4696686267852783, "learning_rate": 1.1853694380728251e-06, "loss": 0.0625, "step": 778650 }, { "epoch": 7.66, "grad_norm": 4.088016510009766, "learning_rate": 1.1852453156185768e-06, "loss": 0.0147, "step": 778675 }, { "epoch": 7.66, "grad_norm": 3.50520396232605, "learning_rate": 1.1851211931643284e-06, "loss": 0.0542, "step": 778700 }, { "epoch": 7.66, "grad_norm": 2.2524592876434326, "learning_rate": 1.1849970707100798e-06, "loss": 0.0217, "step": 778725 }, { "epoch": 7.66, "grad_norm": 3.6853437423706055, "learning_rate": 1.1848729482558313e-06, "loss": 0.0754, "step": 778750 }, { "epoch": 7.66, "grad_norm": 7.930065155029297, "learning_rate": 1.184748825801583e-06, "loss": 0.0311, "step": 778775 }, { "epoch": 7.66, "grad_norm": 2.1731345653533936, "learning_rate": 1.1846247033473345e-06, "loss": 0.0434, "step": 778800 }, { "epoch": 7.66, "grad_norm": 0.119315966963768, "learning_rate": 1.184500580893086e-06, "loss": 0.0196, "step": 778825 }, { "epoch": 7.66, "grad_norm": 1.9920991659164429, "learning_rate": 1.1843764584388376e-06, "loss": 0.0727, "step": 778850 }, { "epoch": 7.66, "grad_norm": 1.1996194124221802, "learning_rate": 1.184252335984589e-06, "loss": 0.0102, "step": 778875 }, { "epoch": 7.66, "grad_norm": 4.296806812286377, "learning_rate": 1.1841282135303406e-06, "loss": 0.0592, "step": 778900 }, { "epoch": 7.66, "grad_norm": 12.104506492614746, "learning_rate": 1.184004091076092e-06, "loss": 0.0142, "step": 778925 }, { "epoch": 7.66, "grad_norm": 2.4940006732940674, "learning_rate": 1.1838799686218437e-06, "loss": 0.044, "step": 778950 }, { "epoch": 7.66, "grad_norm": 4.248804092407227, "learning_rate": 1.1837558461675951e-06, "loss": 0.0201, "step": 778975 }, { "epoch": 7.66, "grad_norm": 3.898792028427124, "learning_rate": 1.1836317237133468e-06, "loss": 0.064, "step": 779000 }, { "epoch": 7.66, "grad_norm": 0.08667996525764465, "learning_rate": 1.1835076012590984e-06, "loss": 0.0245, "step": 779025 }, { "epoch": 7.66, "grad_norm": 2.627476930618286, "learning_rate": 1.1833834788048498e-06, "loss": 0.0578, "step": 779050 }, { "epoch": 7.66, "grad_norm": 0.9137995839118958, "learning_rate": 1.1832593563506012e-06, "loss": 0.0103, "step": 779075 }, { "epoch": 7.66, "grad_norm": 4.370953559875488, "learning_rate": 1.1831352338963529e-06, "loss": 0.0641, "step": 779100 }, { "epoch": 7.66, "grad_norm": 7.527566432952881, "learning_rate": 1.1830111114421045e-06, "loss": 0.0096, "step": 779125 }, { "epoch": 7.66, "grad_norm": 1.592880129814148, "learning_rate": 1.182886988987856e-06, "loss": 0.0524, "step": 779150 }, { "epoch": 7.66, "grad_norm": 12.021888732910156, "learning_rate": 1.1827628665336074e-06, "loss": 0.0159, "step": 779175 }, { "epoch": 7.66, "grad_norm": 2.7573111057281494, "learning_rate": 1.182638744079359e-06, "loss": 0.0474, "step": 779200 }, { "epoch": 7.66, "grad_norm": 4.1306915283203125, "learning_rate": 1.1825146216251106e-06, "loss": 0.0151, "step": 779225 }, { "epoch": 7.66, "grad_norm": 4.084059715270996, "learning_rate": 1.182390499170862e-06, "loss": 0.0568, "step": 779250 }, { "epoch": 7.66, "grad_norm": 0.13475045561790466, "learning_rate": 1.1822663767166137e-06, "loss": 0.0249, "step": 779275 }, { "epoch": 7.66, "grad_norm": 2.5991649627685547, "learning_rate": 1.1821422542623651e-06, "loss": 0.0779, "step": 779300 }, { "epoch": 7.66, "grad_norm": 0.2737372815608978, "learning_rate": 1.1820181318081167e-06, "loss": 0.019, "step": 779325 }, { "epoch": 7.66, "grad_norm": 2.9547340869903564, "learning_rate": 1.1818940093538682e-06, "loss": 0.0706, "step": 779350 }, { "epoch": 7.66, "grad_norm": 1.1376757621765137, "learning_rate": 1.1817698868996198e-06, "loss": 0.0146, "step": 779375 }, { "epoch": 7.66, "grad_norm": 3.08510684967041, "learning_rate": 1.1816457644453712e-06, "loss": 0.0805, "step": 779400 }, { "epoch": 7.66, "grad_norm": 0.2495407909154892, "learning_rate": 1.1815216419911229e-06, "loss": 0.0119, "step": 779425 }, { "epoch": 7.66, "grad_norm": 1.4509484767913818, "learning_rate": 1.1813975195368745e-06, "loss": 0.036, "step": 779450 }, { "epoch": 7.66, "grad_norm": 0.09104668349027634, "learning_rate": 1.181273397082626e-06, "loss": 0.0179, "step": 779475 }, { "epoch": 7.66, "grad_norm": 3.099745035171509, "learning_rate": 1.1811492746283773e-06, "loss": 0.0528, "step": 779500 }, { "epoch": 7.66, "grad_norm": 8.731304168701172, "learning_rate": 1.181025152174129e-06, "loss": 0.0201, "step": 779525 }, { "epoch": 7.66, "grad_norm": 4.800446033477783, "learning_rate": 1.1809010297198806e-06, "loss": 0.0714, "step": 779550 }, { "epoch": 7.66, "grad_norm": 5.368635177612305, "learning_rate": 1.180776907265632e-06, "loss": 0.0212, "step": 779575 }, { "epoch": 7.67, "grad_norm": 4.804200172424316, "learning_rate": 1.1806527848113835e-06, "loss": 0.0409, "step": 779600 }, { "epoch": 7.67, "grad_norm": 12.495939254760742, "learning_rate": 1.180528662357135e-06, "loss": 0.0376, "step": 779625 }, { "epoch": 7.67, "grad_norm": 2.928880214691162, "learning_rate": 1.1804045399028867e-06, "loss": 0.0693, "step": 779650 }, { "epoch": 7.67, "grad_norm": 0.04716262221336365, "learning_rate": 1.1802804174486382e-06, "loss": 0.0269, "step": 779675 }, { "epoch": 7.67, "grad_norm": 6.206967353820801, "learning_rate": 1.1801562949943898e-06, "loss": 0.0544, "step": 779700 }, { "epoch": 7.67, "grad_norm": 5.031124114990234, "learning_rate": 1.1800321725401414e-06, "loss": 0.019, "step": 779725 }, { "epoch": 7.67, "grad_norm": 3.0081980228424072, "learning_rate": 1.1799080500858928e-06, "loss": 0.0574, "step": 779750 }, { "epoch": 7.67, "grad_norm": 2.461806535720825, "learning_rate": 1.1797839276316443e-06, "loss": 0.0217, "step": 779775 }, { "epoch": 7.67, "grad_norm": 2.2470645904541016, "learning_rate": 1.179659805177396e-06, "loss": 0.0472, "step": 779800 }, { "epoch": 7.67, "grad_norm": 5.979519844055176, "learning_rate": 1.1795356827231475e-06, "loss": 0.0205, "step": 779825 }, { "epoch": 7.67, "grad_norm": 4.0388875007629395, "learning_rate": 1.179411560268899e-06, "loss": 0.0692, "step": 779850 }, { "epoch": 7.67, "grad_norm": 2.17805814743042, "learning_rate": 1.1792874378146506e-06, "loss": 0.0189, "step": 779875 }, { "epoch": 7.67, "grad_norm": 3.0315263271331787, "learning_rate": 1.179163315360402e-06, "loss": 0.0538, "step": 779900 }, { "epoch": 7.67, "grad_norm": 0.430902898311615, "learning_rate": 1.1790391929061537e-06, "loss": 0.0181, "step": 779925 }, { "epoch": 7.67, "grad_norm": 2.2936410903930664, "learning_rate": 1.178915070451905e-06, "loss": 0.0455, "step": 779950 }, { "epoch": 7.67, "grad_norm": 26.384428024291992, "learning_rate": 1.1787909479976567e-06, "loss": 0.0245, "step": 779975 }, { "epoch": 7.67, "grad_norm": 1.866967797279358, "learning_rate": 1.1786668255434081e-06, "loss": 0.0587, "step": 780000 }, { "epoch": 7.67, "eval_loss": 0.9106917381286621, "eval_runtime": 6124.6762, "eval_samples_per_second": 1.546, "eval_steps_per_second": 0.193, "eval_wer": 0.11219661366123737, "step": 780000 }, { "epoch": 7.67, "grad_norm": 3.9426071643829346, "learning_rate": 1.1785427030891598e-06, "loss": 0.0165, "step": 780025 }, { "epoch": 7.67, "grad_norm": 4.17617654800415, "learning_rate": 1.1784185806349114e-06, "loss": 0.0586, "step": 780050 }, { "epoch": 7.67, "grad_norm": 2.8482866287231445, "learning_rate": 1.1782944581806628e-06, "loss": 0.0297, "step": 780075 }, { "epoch": 7.67, "grad_norm": 3.536558151245117, "learning_rate": 1.1781703357264143e-06, "loss": 0.0661, "step": 780100 }, { "epoch": 7.67, "grad_norm": 1.664278507232666, "learning_rate": 1.1780462132721659e-06, "loss": 0.0274, "step": 780125 }, { "epoch": 7.67, "grad_norm": 2.7170746326446533, "learning_rate": 1.1779220908179175e-06, "loss": 0.0427, "step": 780150 }, { "epoch": 7.67, "grad_norm": 3.4411447048187256, "learning_rate": 1.177797968363669e-06, "loss": 0.012, "step": 780175 }, { "epoch": 7.67, "grad_norm": 3.8870975971221924, "learning_rate": 1.1776738459094204e-06, "loss": 0.062, "step": 780200 }, { "epoch": 7.67, "grad_norm": 0.14266805350780487, "learning_rate": 1.177549723455172e-06, "loss": 0.0214, "step": 780225 }, { "epoch": 7.67, "grad_norm": 3.7607638835906982, "learning_rate": 1.1774305658990935e-06, "loss": 0.0681, "step": 780250 }, { "epoch": 7.67, "grad_norm": 0.4504948556423187, "learning_rate": 1.177306443444845e-06, "loss": 0.0155, "step": 780275 }, { "epoch": 7.67, "grad_norm": 2.738983154296875, "learning_rate": 1.1771823209905965e-06, "loss": 0.0568, "step": 780300 }, { "epoch": 7.67, "grad_norm": 0.439557820558548, "learning_rate": 1.1770581985363482e-06, "loss": 0.0219, "step": 780325 }, { "epoch": 7.67, "grad_norm": 2.853938341140747, "learning_rate": 1.1769340760820996e-06, "loss": 0.0593, "step": 780350 }, { "epoch": 7.67, "grad_norm": 0.23539166152477264, "learning_rate": 1.1768099536278512e-06, "loss": 0.0218, "step": 780375 }, { "epoch": 7.67, "grad_norm": 1.2020108699798584, "learning_rate": 1.1766858311736028e-06, "loss": 0.0637, "step": 780400 }, { "epoch": 7.67, "grad_norm": 0.06483975052833557, "learning_rate": 1.1765617087193543e-06, "loss": 0.0247, "step": 780425 }, { "epoch": 7.67, "grad_norm": 3.4386699199676514, "learning_rate": 1.1764375862651057e-06, "loss": 0.059, "step": 780450 }, { "epoch": 7.67, "grad_norm": 0.2735222578048706, "learning_rate": 1.1763134638108573e-06, "loss": 0.0301, "step": 780475 }, { "epoch": 7.67, "grad_norm": 3.2290847301483154, "learning_rate": 1.176189341356609e-06, "loss": 0.0775, "step": 780500 }, { "epoch": 7.67, "grad_norm": 6.667654037475586, "learning_rate": 1.1760652189023604e-06, "loss": 0.0175, "step": 780525 }, { "epoch": 7.67, "grad_norm": 3.831604480743408, "learning_rate": 1.1759410964481118e-06, "loss": 0.0619, "step": 780550 }, { "epoch": 7.67, "grad_norm": 11.389601707458496, "learning_rate": 1.1758169739938634e-06, "loss": 0.0212, "step": 780575 }, { "epoch": 7.67, "grad_norm": 2.812670946121216, "learning_rate": 1.175692851539615e-06, "loss": 0.0708, "step": 780600 }, { "epoch": 7.68, "grad_norm": 1.3703477382659912, "learning_rate": 1.1755687290853665e-06, "loss": 0.0082, "step": 780625 }, { "epoch": 7.68, "grad_norm": 5.865811347961426, "learning_rate": 1.1754446066311181e-06, "loss": 0.0812, "step": 780650 }, { "epoch": 7.68, "grad_norm": 6.937664985656738, "learning_rate": 1.1753204841768696e-06, "loss": 0.0345, "step": 780675 }, { "epoch": 7.68, "grad_norm": 2.5692689418792725, "learning_rate": 1.1751963617226212e-06, "loss": 0.0408, "step": 780700 }, { "epoch": 7.68, "grad_norm": 1.013102650642395, "learning_rate": 1.1750722392683726e-06, "loss": 0.0181, "step": 780725 }, { "epoch": 7.68, "grad_norm": 2.950749635696411, "learning_rate": 1.1749481168141243e-06, "loss": 0.0701, "step": 780750 }, { "epoch": 7.68, "grad_norm": 2.0878093242645264, "learning_rate": 1.1748239943598757e-06, "loss": 0.0176, "step": 780775 }, { "epoch": 7.68, "grad_norm": 7.259602069854736, "learning_rate": 1.1746998719056273e-06, "loss": 0.0661, "step": 780800 }, { "epoch": 7.68, "grad_norm": 3.5714380741119385, "learning_rate": 1.174575749451379e-06, "loss": 0.0195, "step": 780825 }, { "epoch": 7.68, "grad_norm": 2.608469247817993, "learning_rate": 1.1744516269971304e-06, "loss": 0.0478, "step": 780850 }, { "epoch": 7.68, "grad_norm": 4.405276298522949, "learning_rate": 1.1743275045428818e-06, "loss": 0.0244, "step": 780875 }, { "epoch": 7.68, "grad_norm": 2.716904878616333, "learning_rate": 1.1742033820886334e-06, "loss": 0.0509, "step": 780900 }, { "epoch": 7.68, "grad_norm": 5.050093650817871, "learning_rate": 1.174079259634385e-06, "loss": 0.029, "step": 780925 }, { "epoch": 7.68, "grad_norm": 4.7113037109375, "learning_rate": 1.1739551371801365e-06, "loss": 0.0599, "step": 780950 }, { "epoch": 7.68, "grad_norm": 1.0416499376296997, "learning_rate": 1.173831014725888e-06, "loss": 0.0155, "step": 780975 }, { "epoch": 7.68, "grad_norm": 4.303297996520996, "learning_rate": 1.1737068922716395e-06, "loss": 0.0562, "step": 781000 }, { "epoch": 7.68, "grad_norm": 0.7029220461845398, "learning_rate": 1.1735827698173912e-06, "loss": 0.0125, "step": 781025 }, { "epoch": 7.68, "grad_norm": 4.008752822875977, "learning_rate": 1.1734586473631426e-06, "loss": 0.0666, "step": 781050 }, { "epoch": 7.68, "grad_norm": 1.7095423936843872, "learning_rate": 1.1733345249088942e-06, "loss": 0.0191, "step": 781075 }, { "epoch": 7.68, "grad_norm": 5.889688968658447, "learning_rate": 1.1732104024546457e-06, "loss": 0.0506, "step": 781100 }, { "epoch": 7.68, "grad_norm": 0.030248982831835747, "learning_rate": 1.1730862800003973e-06, "loss": 0.0202, "step": 781125 }, { "epoch": 7.68, "grad_norm": 3.869314670562744, "learning_rate": 1.1729621575461487e-06, "loss": 0.0774, "step": 781150 }, { "epoch": 7.68, "grad_norm": 9.837936401367188, "learning_rate": 1.1728380350919004e-06, "loss": 0.0175, "step": 781175 }, { "epoch": 7.68, "grad_norm": 3.626638412475586, "learning_rate": 1.172713912637652e-06, "loss": 0.048, "step": 781200 }, { "epoch": 7.68, "grad_norm": 0.7524591684341431, "learning_rate": 1.1725897901834034e-06, "loss": 0.0221, "step": 781225 }, { "epoch": 7.68, "grad_norm": 5.060701370239258, "learning_rate": 1.172465667729155e-06, "loss": 0.0572, "step": 781250 }, { "epoch": 7.68, "grad_norm": 14.538840293884277, "learning_rate": 1.1723415452749065e-06, "loss": 0.0288, "step": 781275 }, { "epoch": 7.68, "grad_norm": 2.174269676208496, "learning_rate": 1.172217422820658e-06, "loss": 0.0693, "step": 781300 }, { "epoch": 7.68, "grad_norm": 0.678615152835846, "learning_rate": 1.1720933003664097e-06, "loss": 0.0144, "step": 781325 }, { "epoch": 7.68, "grad_norm": 2.889158010482788, "learning_rate": 1.1719691779121612e-06, "loss": 0.0326, "step": 781350 }, { "epoch": 7.68, "grad_norm": 13.464287757873535, "learning_rate": 1.1718450554579126e-06, "loss": 0.0179, "step": 781375 }, { "epoch": 7.68, "grad_norm": 1.547793984413147, "learning_rate": 1.1717209330036642e-06, "loss": 0.0481, "step": 781400 }, { "epoch": 7.68, "grad_norm": 11.588822364807129, "learning_rate": 1.1715968105494159e-06, "loss": 0.0286, "step": 781425 }, { "epoch": 7.68, "grad_norm": 3.2607181072235107, "learning_rate": 1.1714726880951673e-06, "loss": 0.0741, "step": 781450 }, { "epoch": 7.68, "grad_norm": 0.3173595666885376, "learning_rate": 1.1713485656409187e-06, "loss": 0.0191, "step": 781475 }, { "epoch": 7.68, "grad_norm": 3.2544236183166504, "learning_rate": 1.1712244431866703e-06, "loss": 0.0688, "step": 781500 }, { "epoch": 7.68, "grad_norm": 9.346540451049805, "learning_rate": 1.171100320732422e-06, "loss": 0.0182, "step": 781525 }, { "epoch": 7.68, "grad_norm": 2.7955479621887207, "learning_rate": 1.1709761982781734e-06, "loss": 0.0421, "step": 781550 }, { "epoch": 7.68, "grad_norm": 11.924243927001953, "learning_rate": 1.170852075823925e-06, "loss": 0.0177, "step": 781575 }, { "epoch": 7.68, "grad_norm": 2.478041887283325, "learning_rate": 1.1707279533696765e-06, "loss": 0.043, "step": 781600 }, { "epoch": 7.69, "grad_norm": 3.199094772338867, "learning_rate": 1.170603830915428e-06, "loss": 0.0261, "step": 781625 }, { "epoch": 7.69, "grad_norm": 3.9850597381591797, "learning_rate": 1.1704797084611795e-06, "loss": 0.0595, "step": 781650 }, { "epoch": 7.69, "grad_norm": 0.5412338376045227, "learning_rate": 1.1703555860069311e-06, "loss": 0.021, "step": 781675 }, { "epoch": 7.69, "grad_norm": 3.4315757751464844, "learning_rate": 1.1702314635526826e-06, "loss": 0.0498, "step": 781700 }, { "epoch": 7.69, "grad_norm": 11.455288887023926, "learning_rate": 1.1701073410984342e-06, "loss": 0.0286, "step": 781725 }, { "epoch": 7.69, "grad_norm": 4.637490272521973, "learning_rate": 1.1699832186441858e-06, "loss": 0.0494, "step": 781750 }, { "epoch": 7.69, "grad_norm": 0.5161271095275879, "learning_rate": 1.1698590961899373e-06, "loss": 0.0139, "step": 781775 }, { "epoch": 7.69, "grad_norm": 4.990983486175537, "learning_rate": 1.1697349737356887e-06, "loss": 0.0655, "step": 781800 }, { "epoch": 7.69, "grad_norm": 4.80427360534668, "learning_rate": 1.1696108512814403e-06, "loss": 0.0253, "step": 781825 }, { "epoch": 7.69, "grad_norm": 3.4276020526885986, "learning_rate": 1.169486728827192e-06, "loss": 0.0619, "step": 781850 }, { "epoch": 7.69, "grad_norm": 0.15344490110874176, "learning_rate": 1.1693626063729434e-06, "loss": 0.0181, "step": 781875 }, { "epoch": 7.69, "grad_norm": 4.220358848571777, "learning_rate": 1.1692384839186948e-06, "loss": 0.0518, "step": 781900 }, { "epoch": 7.69, "grad_norm": 0.018689678981900215, "learning_rate": 1.1691143614644464e-06, "loss": 0.0208, "step": 781925 }, { "epoch": 7.69, "grad_norm": 2.456300735473633, "learning_rate": 1.168990239010198e-06, "loss": 0.075, "step": 781950 }, { "epoch": 7.69, "grad_norm": 7.925577163696289, "learning_rate": 1.1688661165559495e-06, "loss": 0.0196, "step": 781975 }, { "epoch": 7.69, "grad_norm": 2.6519417762756348, "learning_rate": 1.1687419941017011e-06, "loss": 0.0507, "step": 782000 }, { "epoch": 7.69, "grad_norm": 0.32480043172836304, "learning_rate": 1.1686178716474526e-06, "loss": 0.0138, "step": 782025 }, { "epoch": 7.69, "grad_norm": 2.643326997756958, "learning_rate": 1.1684937491932042e-06, "loss": 0.0392, "step": 782050 }, { "epoch": 7.69, "grad_norm": 6.8191962242126465, "learning_rate": 1.1683696267389556e-06, "loss": 0.0136, "step": 782075 }, { "epoch": 7.69, "grad_norm": 4.580610752105713, "learning_rate": 1.1682455042847072e-06, "loss": 0.0711, "step": 782100 }, { "epoch": 7.69, "grad_norm": 3.5117123126983643, "learning_rate": 1.1681213818304587e-06, "loss": 0.0235, "step": 782125 }, { "epoch": 7.69, "grad_norm": 4.267278671264648, "learning_rate": 1.1679972593762103e-06, "loss": 0.0485, "step": 782150 }, { "epoch": 7.69, "grad_norm": 6.983972072601318, "learning_rate": 1.167873136921962e-06, "loss": 0.0209, "step": 782175 }, { "epoch": 7.69, "grad_norm": 2.5776145458221436, "learning_rate": 1.1677490144677134e-06, "loss": 0.0573, "step": 782200 }, { "epoch": 7.69, "grad_norm": 0.972111165523529, "learning_rate": 1.1676248920134648e-06, "loss": 0.0203, "step": 782225 }, { "epoch": 7.69, "grad_norm": 3.50723934173584, "learning_rate": 1.1675007695592164e-06, "loss": 0.0637, "step": 782250 }, { "epoch": 7.69, "grad_norm": 0.6258694529533386, "learning_rate": 1.167376647104968e-06, "loss": 0.0148, "step": 782275 }, { "epoch": 7.69, "grad_norm": 3.784745216369629, "learning_rate": 1.1672525246507195e-06, "loss": 0.0757, "step": 782300 }, { "epoch": 7.69, "grad_norm": 5.9637885093688965, "learning_rate": 1.167128402196471e-06, "loss": 0.0313, "step": 782325 }, { "epoch": 7.69, "grad_norm": 2.020831346511841, "learning_rate": 1.1670042797422225e-06, "loss": 0.0581, "step": 782350 }, { "epoch": 7.69, "grad_norm": 0.17744246125221252, "learning_rate": 1.1668801572879742e-06, "loss": 0.0309, "step": 782375 }, { "epoch": 7.69, "grad_norm": 2.3531248569488525, "learning_rate": 1.1667560348337256e-06, "loss": 0.046, "step": 782400 }, { "epoch": 7.69, "grad_norm": 0.09859424829483032, "learning_rate": 1.1666319123794772e-06, "loss": 0.0201, "step": 782425 }, { "epoch": 7.69, "grad_norm": 4.346620559692383, "learning_rate": 1.1665127548233987e-06, "loss": 0.0841, "step": 782450 }, { "epoch": 7.69, "grad_norm": 1.5835566520690918, "learning_rate": 1.16638863236915e-06, "loss": 0.0175, "step": 782475 }, { "epoch": 7.69, "grad_norm": 3.010564088821411, "learning_rate": 1.1662645099149017e-06, "loss": 0.0755, "step": 782500 }, { "epoch": 7.69, "grad_norm": 1.9150530099868774, "learning_rate": 1.1661403874606534e-06, "loss": 0.0169, "step": 782525 }, { "epoch": 7.69, "grad_norm": 3.020989418029785, "learning_rate": 1.1660162650064048e-06, "loss": 0.0681, "step": 782550 }, { "epoch": 7.69, "grad_norm": 0.317031592130661, "learning_rate": 1.1658921425521562e-06, "loss": 0.0247, "step": 782575 }, { "epoch": 7.69, "grad_norm": 2.745105504989624, "learning_rate": 1.1657680200979079e-06, "loss": 0.0714, "step": 782600 }, { "epoch": 7.69, "grad_norm": 1.8357751369476318, "learning_rate": 1.1656438976436595e-06, "loss": 0.0146, "step": 782625 }, { "epoch": 7.7, "grad_norm": 3.2087600231170654, "learning_rate": 1.165519775189411e-06, "loss": 0.0306, "step": 782650 }, { "epoch": 7.7, "grad_norm": 2.2839295864105225, "learning_rate": 1.1653956527351625e-06, "loss": 0.0195, "step": 782675 }, { "epoch": 7.7, "grad_norm": 4.156822204589844, "learning_rate": 1.1652715302809142e-06, "loss": 0.0469, "step": 782700 }, { "epoch": 7.7, "grad_norm": 6.386406898498535, "learning_rate": 1.1651474078266656e-06, "loss": 0.0126, "step": 782725 }, { "epoch": 7.7, "grad_norm": 1.5183011293411255, "learning_rate": 1.165023285372417e-06, "loss": 0.0604, "step": 782750 }, { "epoch": 7.7, "grad_norm": 0.16710180044174194, "learning_rate": 1.1648991629181687e-06, "loss": 0.0181, "step": 782775 }, { "epoch": 7.7, "grad_norm": 3.1288397312164307, "learning_rate": 1.1647750404639203e-06, "loss": 0.0574, "step": 782800 }, { "epoch": 7.7, "grad_norm": 5.83577823638916, "learning_rate": 1.1646509180096717e-06, "loss": 0.0225, "step": 782825 }, { "epoch": 7.7, "grad_norm": 3.9998908042907715, "learning_rate": 1.1645267955554231e-06, "loss": 0.0576, "step": 782850 }, { "epoch": 7.7, "grad_norm": 4.158405780792236, "learning_rate": 1.1644026731011748e-06, "loss": 0.0135, "step": 782875 }, { "epoch": 7.7, "grad_norm": 3.5018067359924316, "learning_rate": 1.1642785506469264e-06, "loss": 0.0568, "step": 782900 }, { "epoch": 7.7, "grad_norm": 7.334343433380127, "learning_rate": 1.1641544281926778e-06, "loss": 0.0211, "step": 782925 }, { "epoch": 7.7, "grad_norm": 3.859403371810913, "learning_rate": 1.1640303057384295e-06, "loss": 0.0588, "step": 782950 }, { "epoch": 7.7, "grad_norm": 9.20486068725586, "learning_rate": 1.163906183284181e-06, "loss": 0.0234, "step": 782975 }, { "epoch": 7.7, "grad_norm": 1.8276636600494385, "learning_rate": 1.1637820608299325e-06, "loss": 0.0504, "step": 783000 }, { "epoch": 7.7, "grad_norm": 1.1642144918441772, "learning_rate": 1.163657938375684e-06, "loss": 0.0321, "step": 783025 }, { "epoch": 7.7, "grad_norm": 3.230914354324341, "learning_rate": 1.1635338159214356e-06, "loss": 0.0707, "step": 783050 }, { "epoch": 7.7, "grad_norm": 1.9670177698135376, "learning_rate": 1.163409693467187e-06, "loss": 0.0175, "step": 783075 }, { "epoch": 7.7, "grad_norm": 3.837764024734497, "learning_rate": 1.1632855710129387e-06, "loss": 0.0634, "step": 783100 }, { "epoch": 7.7, "grad_norm": 22.854333877563477, "learning_rate": 1.1631614485586903e-06, "loss": 0.0267, "step": 783125 }, { "epoch": 7.7, "grad_norm": 4.011175632476807, "learning_rate": 1.1630373261044417e-06, "loss": 0.0526, "step": 783150 }, { "epoch": 7.7, "grad_norm": 3.0684614181518555, "learning_rate": 1.1629132036501931e-06, "loss": 0.0149, "step": 783175 }, { "epoch": 7.7, "grad_norm": 4.735243797302246, "learning_rate": 1.1627890811959448e-06, "loss": 0.0648, "step": 783200 }, { "epoch": 7.7, "grad_norm": 0.5506986975669861, "learning_rate": 1.1626649587416964e-06, "loss": 0.0214, "step": 783225 }, { "epoch": 7.7, "grad_norm": 3.8680574893951416, "learning_rate": 1.1625408362874478e-06, "loss": 0.0495, "step": 783250 }, { "epoch": 7.7, "grad_norm": 5.975026607513428, "learning_rate": 1.1624167138331992e-06, "loss": 0.0156, "step": 783275 }, { "epoch": 7.7, "grad_norm": 3.9445252418518066, "learning_rate": 1.1622925913789509e-06, "loss": 0.0605, "step": 783300 }, { "epoch": 7.7, "grad_norm": 7.005942344665527, "learning_rate": 1.1621684689247025e-06, "loss": 0.0116, "step": 783325 }, { "epoch": 7.7, "grad_norm": 2.4811437129974365, "learning_rate": 1.162044346470454e-06, "loss": 0.0499, "step": 783350 }, { "epoch": 7.7, "grad_norm": 6.033621788024902, "learning_rate": 1.1619202240162056e-06, "loss": 0.0175, "step": 783375 }, { "epoch": 7.7, "grad_norm": 3.5062801837921143, "learning_rate": 1.161796101561957e-06, "loss": 0.056, "step": 783400 }, { "epoch": 7.7, "grad_norm": 3.276007652282715, "learning_rate": 1.1616719791077086e-06, "loss": 0.0268, "step": 783425 }, { "epoch": 7.7, "grad_norm": 3.9609031677246094, "learning_rate": 1.16154785665346e-06, "loss": 0.0552, "step": 783450 }, { "epoch": 7.7, "grad_norm": 0.08264151960611343, "learning_rate": 1.1614237341992117e-06, "loss": 0.0094, "step": 783475 }, { "epoch": 7.7, "grad_norm": 5.314615249633789, "learning_rate": 1.1612996117449631e-06, "loss": 0.053, "step": 783500 }, { "epoch": 7.7, "grad_norm": 0.2003990262746811, "learning_rate": 1.1611754892907148e-06, "loss": 0.0156, "step": 783525 }, { "epoch": 7.7, "grad_norm": 3.810175657272339, "learning_rate": 1.1610513668364664e-06, "loss": 0.0612, "step": 783550 }, { "epoch": 7.7, "grad_norm": 0.6782510280609131, "learning_rate": 1.1609272443822178e-06, "loss": 0.0259, "step": 783575 }, { "epoch": 7.7, "grad_norm": 1.620620846748352, "learning_rate": 1.1608031219279692e-06, "loss": 0.0383, "step": 783600 }, { "epoch": 7.7, "grad_norm": 2.005037546157837, "learning_rate": 1.1606789994737209e-06, "loss": 0.0237, "step": 783625 }, { "epoch": 7.7, "grad_norm": 17.788692474365234, "learning_rate": 1.1605548770194725e-06, "loss": 0.0699, "step": 783650 }, { "epoch": 7.71, "grad_norm": 1.159655213356018, "learning_rate": 1.160430754565224e-06, "loss": 0.0114, "step": 783675 }, { "epoch": 7.71, "grad_norm": 3.6730477809906006, "learning_rate": 1.1603066321109753e-06, "loss": 0.0502, "step": 783700 }, { "epoch": 7.71, "grad_norm": 0.03728780522942543, "learning_rate": 1.160182509656727e-06, "loss": 0.0156, "step": 783725 }, { "epoch": 7.71, "grad_norm": 3.1236279010772705, "learning_rate": 1.1600583872024786e-06, "loss": 0.0537, "step": 783750 }, { "epoch": 7.71, "grad_norm": 3.6643595695495605, "learning_rate": 1.15993426474823e-06, "loss": 0.0216, "step": 783775 }, { "epoch": 7.71, "grad_norm": 2.3647189140319824, "learning_rate": 1.1598101422939817e-06, "loss": 0.0642, "step": 783800 }, { "epoch": 7.71, "grad_norm": 0.04623144492506981, "learning_rate": 1.159686019839733e-06, "loss": 0.0198, "step": 783825 }, { "epoch": 7.71, "grad_norm": 2.270220994949341, "learning_rate": 1.1595618973854847e-06, "loss": 0.0435, "step": 783850 }, { "epoch": 7.71, "grad_norm": 0.25206583738327026, "learning_rate": 1.1594377749312362e-06, "loss": 0.0156, "step": 783875 }, { "epoch": 7.71, "grad_norm": 4.492191791534424, "learning_rate": 1.1593136524769878e-06, "loss": 0.0514, "step": 783900 }, { "epoch": 7.71, "grad_norm": 1.1278884410858154, "learning_rate": 1.1591895300227392e-06, "loss": 0.0128, "step": 783925 }, { "epoch": 7.71, "grad_norm": 2.802832841873169, "learning_rate": 1.1590654075684909e-06, "loss": 0.0833, "step": 783950 }, { "epoch": 7.71, "grad_norm": 6.869307041168213, "learning_rate": 1.1589412851142425e-06, "loss": 0.0228, "step": 783975 }, { "epoch": 7.71, "grad_norm": 2.7910616397857666, "learning_rate": 1.158817162659994e-06, "loss": 0.053, "step": 784000 }, { "epoch": 7.71, "grad_norm": 0.02601091004908085, "learning_rate": 1.1586930402057455e-06, "loss": 0.0199, "step": 784025 }, { "epoch": 7.71, "grad_norm": 3.5176212787628174, "learning_rate": 1.1585689177514972e-06, "loss": 0.0672, "step": 784050 }, { "epoch": 7.71, "grad_norm": 0.2721477746963501, "learning_rate": 1.1584447952972486e-06, "loss": 0.0219, "step": 784075 }, { "epoch": 7.71, "grad_norm": 4.480744361877441, "learning_rate": 1.158320672843e-06, "loss": 0.0613, "step": 784100 }, { "epoch": 7.71, "grad_norm": 5.824295520782471, "learning_rate": 1.1581965503887517e-06, "loss": 0.021, "step": 784125 }, { "epoch": 7.71, "grad_norm": 10.05301570892334, "learning_rate": 1.1580724279345033e-06, "loss": 0.0471, "step": 784150 }, { "epoch": 7.71, "grad_norm": 1.4665086269378662, "learning_rate": 1.1579483054802547e-06, "loss": 0.0253, "step": 784175 }, { "epoch": 7.71, "grad_norm": 2.89304256439209, "learning_rate": 1.1578241830260061e-06, "loss": 0.0561, "step": 784200 }, { "epoch": 7.71, "grad_norm": 6.608104705810547, "learning_rate": 1.1577000605717578e-06, "loss": 0.0207, "step": 784225 }, { "epoch": 7.71, "grad_norm": 4.385521411895752, "learning_rate": 1.1575759381175094e-06, "loss": 0.0554, "step": 784250 }, { "epoch": 7.71, "grad_norm": 10.73200511932373, "learning_rate": 1.1574518156632608e-06, "loss": 0.0216, "step": 784275 }, { "epoch": 7.71, "grad_norm": 3.995906114578247, "learning_rate": 1.1573276932090123e-06, "loss": 0.0458, "step": 784300 }, { "epoch": 7.71, "grad_norm": 1.4922418594360352, "learning_rate": 1.1572035707547639e-06, "loss": 0.0211, "step": 784325 }, { "epoch": 7.71, "grad_norm": 3.678501844406128, "learning_rate": 1.1570794483005155e-06, "loss": 0.0776, "step": 784350 }, { "epoch": 7.71, "grad_norm": 10.620623588562012, "learning_rate": 1.156955325846267e-06, "loss": 0.0179, "step": 784375 }, { "epoch": 7.71, "grad_norm": 4.963958263397217, "learning_rate": 1.1568312033920186e-06, "loss": 0.0286, "step": 784400 }, { "epoch": 7.71, "grad_norm": 0.04471145570278168, "learning_rate": 1.15670708093777e-06, "loss": 0.0218, "step": 784425 }, { "epoch": 7.71, "grad_norm": 2.6126928329467773, "learning_rate": 1.1565829584835216e-06, "loss": 0.0572, "step": 784450 }, { "epoch": 7.71, "grad_norm": 0.29281923174858093, "learning_rate": 1.1564588360292733e-06, "loss": 0.0203, "step": 784475 }, { "epoch": 7.71, "grad_norm": 2.424988269805908, "learning_rate": 1.1563347135750247e-06, "loss": 0.0546, "step": 784500 }, { "epoch": 7.71, "grad_norm": 0.38213521242141724, "learning_rate": 1.1562105911207761e-06, "loss": 0.0287, "step": 784525 }, { "epoch": 7.71, "grad_norm": 1.9143201112747192, "learning_rate": 1.1560864686665278e-06, "loss": 0.0547, "step": 784550 }, { "epoch": 7.71, "grad_norm": 8.182722091674805, "learning_rate": 1.1559623462122794e-06, "loss": 0.0262, "step": 784575 }, { "epoch": 7.71, "grad_norm": 6.451606750488281, "learning_rate": 1.1558382237580308e-06, "loss": 0.0705, "step": 784600 }, { "epoch": 7.71, "grad_norm": 7.327608585357666, "learning_rate": 1.1557141013037822e-06, "loss": 0.0225, "step": 784625 }, { "epoch": 7.71, "grad_norm": 2.5885493755340576, "learning_rate": 1.155594943747704e-06, "loss": 0.0543, "step": 784650 }, { "epoch": 7.72, "grad_norm": 5.016245365142822, "learning_rate": 1.1554708212934553e-06, "loss": 0.0135, "step": 784675 }, { "epoch": 7.72, "grad_norm": 2.0968880653381348, "learning_rate": 1.155346698839207e-06, "loss": 0.0616, "step": 784700 }, { "epoch": 7.72, "grad_norm": 4.678051948547363, "learning_rate": 1.1552225763849584e-06, "loss": 0.0177, "step": 784725 }, { "epoch": 7.72, "grad_norm": 5.448367118835449, "learning_rate": 1.15509845393071e-06, "loss": 0.0584, "step": 784750 }, { "epoch": 7.72, "grad_norm": 0.053339242935180664, "learning_rate": 1.1549743314764614e-06, "loss": 0.0223, "step": 784775 }, { "epoch": 7.72, "grad_norm": 4.02352237701416, "learning_rate": 1.154850209022213e-06, "loss": 0.0536, "step": 784800 }, { "epoch": 7.72, "grad_norm": 3.1194097995758057, "learning_rate": 1.1547260865679647e-06, "loss": 0.0214, "step": 784825 }, { "epoch": 7.72, "grad_norm": 2.2524707317352295, "learning_rate": 1.1546019641137161e-06, "loss": 0.0549, "step": 784850 }, { "epoch": 7.72, "grad_norm": 0.7841212749481201, "learning_rate": 1.1544778416594676e-06, "loss": 0.0162, "step": 784875 }, { "epoch": 7.72, "grad_norm": 7.001913070678711, "learning_rate": 1.1543537192052192e-06, "loss": 0.0405, "step": 784900 }, { "epoch": 7.72, "grad_norm": 1.265974521636963, "learning_rate": 1.1542295967509708e-06, "loss": 0.0218, "step": 784925 }, { "epoch": 7.72, "grad_norm": 5.3518476486206055, "learning_rate": 1.1541054742967223e-06, "loss": 0.0596, "step": 784950 }, { "epoch": 7.72, "grad_norm": 0.13840322196483612, "learning_rate": 1.1539813518424737e-06, "loss": 0.0274, "step": 784975 }, { "epoch": 7.72, "grad_norm": 1.8686590194702148, "learning_rate": 1.1538572293882253e-06, "loss": 0.0384, "step": 785000 }, { "epoch": 7.72, "grad_norm": 2.494381904602051, "learning_rate": 1.153733106933977e-06, "loss": 0.0292, "step": 785025 }, { "epoch": 7.72, "grad_norm": 3.4285032749176025, "learning_rate": 1.1536089844797284e-06, "loss": 0.0529, "step": 785050 }, { "epoch": 7.72, "grad_norm": 2.0026865005493164, "learning_rate": 1.15348486202548e-06, "loss": 0.0242, "step": 785075 }, { "epoch": 7.72, "grad_norm": 4.28462553024292, "learning_rate": 1.1533607395712314e-06, "loss": 0.0568, "step": 785100 }, { "epoch": 7.72, "grad_norm": 0.20946776866912842, "learning_rate": 1.153236617116983e-06, "loss": 0.02, "step": 785125 }, { "epoch": 7.72, "grad_norm": 2.1684653759002686, "learning_rate": 1.1531124946627345e-06, "loss": 0.0553, "step": 785150 }, { "epoch": 7.72, "grad_norm": 0.9963067173957825, "learning_rate": 1.1529883722084861e-06, "loss": 0.0209, "step": 785175 }, { "epoch": 7.72, "grad_norm": 4.497232913970947, "learning_rate": 1.1528642497542375e-06, "loss": 0.052, "step": 785200 }, { "epoch": 7.72, "grad_norm": 1.569596290588379, "learning_rate": 1.1527401272999892e-06, "loss": 0.0136, "step": 785225 }, { "epoch": 7.72, "grad_norm": 4.805146217346191, "learning_rate": 1.1526160048457408e-06, "loss": 0.0654, "step": 785250 }, { "epoch": 7.72, "grad_norm": 11.738907814025879, "learning_rate": 1.1524918823914922e-06, "loss": 0.0165, "step": 785275 }, { "epoch": 7.72, "grad_norm": 1.4978678226470947, "learning_rate": 1.1523677599372437e-06, "loss": 0.0583, "step": 785300 }, { "epoch": 7.72, "grad_norm": 7.609116077423096, "learning_rate": 1.1522436374829953e-06, "loss": 0.0157, "step": 785325 }, { "epoch": 7.72, "grad_norm": 2.4269185066223145, "learning_rate": 1.152119515028747e-06, "loss": 0.0531, "step": 785350 }, { "epoch": 7.72, "grad_norm": 6.329926490783691, "learning_rate": 1.1519953925744984e-06, "loss": 0.022, "step": 785375 }, { "epoch": 7.72, "grad_norm": 4.7403717041015625, "learning_rate": 1.1518712701202498e-06, "loss": 0.0492, "step": 785400 }, { "epoch": 7.72, "grad_norm": 0.4831811785697937, "learning_rate": 1.1517471476660014e-06, "loss": 0.0164, "step": 785425 }, { "epoch": 7.72, "grad_norm": 3.051121711730957, "learning_rate": 1.151623025211753e-06, "loss": 0.0927, "step": 785450 }, { "epoch": 7.72, "grad_norm": 3.921285629272461, "learning_rate": 1.1514989027575045e-06, "loss": 0.0206, "step": 785475 }, { "epoch": 7.72, "grad_norm": 2.460697889328003, "learning_rate": 1.1513747803032561e-06, "loss": 0.0447, "step": 785500 }, { "epoch": 7.72, "grad_norm": 1.3068619966506958, "learning_rate": 1.1512506578490077e-06, "loss": 0.0325, "step": 785525 }, { "epoch": 7.72, "grad_norm": 1.9595848321914673, "learning_rate": 1.1511265353947592e-06, "loss": 0.057, "step": 785550 }, { "epoch": 7.72, "grad_norm": 3.7022225856781006, "learning_rate": 1.1510024129405106e-06, "loss": 0.0323, "step": 785575 }, { "epoch": 7.72, "grad_norm": 1.947924256324768, "learning_rate": 1.1508782904862622e-06, "loss": 0.0572, "step": 785600 }, { "epoch": 7.72, "grad_norm": 1.113822340965271, "learning_rate": 1.1507541680320139e-06, "loss": 0.0222, "step": 785625 }, { "epoch": 7.72, "grad_norm": 2.796895742416382, "learning_rate": 1.1506300455777653e-06, "loss": 0.055, "step": 785650 }, { "epoch": 7.72, "grad_norm": 15.058694839477539, "learning_rate": 1.150505923123517e-06, "loss": 0.0302, "step": 785675 }, { "epoch": 7.73, "grad_norm": 1.6917946338653564, "learning_rate": 1.1503818006692683e-06, "loss": 0.0482, "step": 785700 }, { "epoch": 7.73, "grad_norm": 0.9092026352882385, "learning_rate": 1.15025767821502e-06, "loss": 0.0178, "step": 785725 }, { "epoch": 7.73, "grad_norm": 2.844672441482544, "learning_rate": 1.1501335557607714e-06, "loss": 0.0462, "step": 785750 }, { "epoch": 7.73, "grad_norm": 0.2483922839164734, "learning_rate": 1.150009433306523e-06, "loss": 0.0247, "step": 785775 }, { "epoch": 7.73, "grad_norm": 4.236270904541016, "learning_rate": 1.1498853108522745e-06, "loss": 0.0746, "step": 785800 }, { "epoch": 7.73, "grad_norm": 6.187793254852295, "learning_rate": 1.149761188398026e-06, "loss": 0.0208, "step": 785825 }, { "epoch": 7.73, "grad_norm": 5.5383806228637695, "learning_rate": 1.1496370659437777e-06, "loss": 0.0652, "step": 785850 }, { "epoch": 7.73, "grad_norm": 3.3317713737487793, "learning_rate": 1.1495129434895291e-06, "loss": 0.0183, "step": 785875 }, { "epoch": 7.73, "grad_norm": 4.239128589630127, "learning_rate": 1.1493888210352806e-06, "loss": 0.07, "step": 785900 }, { "epoch": 7.73, "grad_norm": 1.9449024200439453, "learning_rate": 1.1492646985810322e-06, "loss": 0.0198, "step": 785925 }, { "epoch": 7.73, "grad_norm": 3.568100929260254, "learning_rate": 1.1491405761267838e-06, "loss": 0.047, "step": 785950 }, { "epoch": 7.73, "grad_norm": 2.350764274597168, "learning_rate": 1.1490164536725353e-06, "loss": 0.0184, "step": 785975 }, { "epoch": 7.73, "grad_norm": 2.4158356189727783, "learning_rate": 1.1488923312182867e-06, "loss": 0.0597, "step": 786000 }, { "epoch": 7.73, "grad_norm": 1.459487795829773, "learning_rate": 1.1487682087640383e-06, "loss": 0.0181, "step": 786025 }, { "epoch": 7.73, "grad_norm": 2.7046821117401123, "learning_rate": 1.14864408630979e-06, "loss": 0.0525, "step": 786050 }, { "epoch": 7.73, "grad_norm": 2.602687120437622, "learning_rate": 1.1485199638555414e-06, "loss": 0.0155, "step": 786075 }, { "epoch": 7.73, "grad_norm": 2.831979274749756, "learning_rate": 1.148395841401293e-06, "loss": 0.0443, "step": 786100 }, { "epoch": 7.73, "grad_norm": 1.5158365964889526, "learning_rate": 1.1482717189470444e-06, "loss": 0.0194, "step": 786125 }, { "epoch": 7.73, "grad_norm": 3.4699037075042725, "learning_rate": 1.148147596492796e-06, "loss": 0.0414, "step": 786150 }, { "epoch": 7.73, "grad_norm": 1.0274124145507812, "learning_rate": 1.1480234740385475e-06, "loss": 0.0264, "step": 786175 }, { "epoch": 7.73, "grad_norm": 4.442610740661621, "learning_rate": 1.1478993515842991e-06, "loss": 0.0624, "step": 786200 }, { "epoch": 7.73, "grad_norm": 1.8336036205291748, "learning_rate": 1.1477752291300506e-06, "loss": 0.0193, "step": 786225 }, { "epoch": 7.73, "grad_norm": 3.718870162963867, "learning_rate": 1.1476511066758022e-06, "loss": 0.057, "step": 786250 }, { "epoch": 7.73, "grad_norm": 0.19243136048316956, "learning_rate": 1.1475269842215538e-06, "loss": 0.0158, "step": 786275 }, { "epoch": 7.73, "grad_norm": 4.604497909545898, "learning_rate": 1.1474028617673052e-06, "loss": 0.05, "step": 786300 }, { "epoch": 7.73, "grad_norm": 1.0071247816085815, "learning_rate": 1.1472787393130567e-06, "loss": 0.0099, "step": 786325 }, { "epoch": 7.73, "grad_norm": 3.983203887939453, "learning_rate": 1.1471546168588083e-06, "loss": 0.0587, "step": 786350 }, { "epoch": 7.73, "grad_norm": 10.823671340942383, "learning_rate": 1.14703049440456e-06, "loss": 0.0163, "step": 786375 }, { "epoch": 7.73, "grad_norm": 5.8192667961120605, "learning_rate": 1.1469063719503114e-06, "loss": 0.0502, "step": 786400 }, { "epoch": 7.73, "grad_norm": 1.3489470481872559, "learning_rate": 1.1467822494960628e-06, "loss": 0.0241, "step": 786425 }, { "epoch": 7.73, "grad_norm": 3.570798635482788, "learning_rate": 1.1466581270418144e-06, "loss": 0.0673, "step": 786450 }, { "epoch": 7.73, "grad_norm": 1.229040503501892, "learning_rate": 1.146534004587566e-06, "loss": 0.0148, "step": 786475 }, { "epoch": 7.73, "grad_norm": 2.6764535903930664, "learning_rate": 1.1464098821333175e-06, "loss": 0.0619, "step": 786500 }, { "epoch": 7.73, "grad_norm": 3.0980029106140137, "learning_rate": 1.1462857596790691e-06, "loss": 0.0236, "step": 786525 }, { "epoch": 7.73, "grad_norm": 2.141679048538208, "learning_rate": 1.1461616372248205e-06, "loss": 0.0443, "step": 786550 }, { "epoch": 7.73, "grad_norm": 8.050567626953125, "learning_rate": 1.1460375147705722e-06, "loss": 0.0107, "step": 786575 }, { "epoch": 7.73, "grad_norm": 2.6153645515441895, "learning_rate": 1.1459133923163236e-06, "loss": 0.0374, "step": 786600 }, { "epoch": 7.73, "grad_norm": 0.1625921130180359, "learning_rate": 1.1457892698620752e-06, "loss": 0.0187, "step": 786625 }, { "epoch": 7.73, "grad_norm": 4.129515171051025, "learning_rate": 1.1456651474078267e-06, "loss": 0.0605, "step": 786650 }, { "epoch": 7.73, "grad_norm": 1.9616708755493164, "learning_rate": 1.1455410249535783e-06, "loss": 0.0138, "step": 786675 }, { "epoch": 7.73, "grad_norm": 3.644454002380371, "learning_rate": 1.14541690249933e-06, "loss": 0.0497, "step": 786700 }, { "epoch": 7.74, "grad_norm": 5.082226753234863, "learning_rate": 1.1452927800450813e-06, "loss": 0.0269, "step": 786725 }, { "epoch": 7.74, "grad_norm": 3.8409616947174072, "learning_rate": 1.1451686575908328e-06, "loss": 0.0505, "step": 786750 }, { "epoch": 7.74, "grad_norm": 0.7141600847244263, "learning_rate": 1.1450445351365844e-06, "loss": 0.0313, "step": 786775 }, { "epoch": 7.74, "grad_norm": 3.6832001209259033, "learning_rate": 1.144920412682336e-06, "loss": 0.0491, "step": 786800 }, { "epoch": 7.74, "grad_norm": 8.863874435424805, "learning_rate": 1.1447962902280875e-06, "loss": 0.0168, "step": 786825 }, { "epoch": 7.74, "grad_norm": 4.540604591369629, "learning_rate": 1.1446721677738389e-06, "loss": 0.0443, "step": 786850 }, { "epoch": 7.74, "grad_norm": 6.019964694976807, "learning_rate": 1.1445480453195905e-06, "loss": 0.0234, "step": 786875 }, { "epoch": 7.74, "grad_norm": 3.1962265968322754, "learning_rate": 1.1444239228653422e-06, "loss": 0.0618, "step": 786900 }, { "epoch": 7.74, "grad_norm": 1.175948143005371, "learning_rate": 1.1442998004110936e-06, "loss": 0.0168, "step": 786925 }, { "epoch": 7.74, "grad_norm": 3.411039352416992, "learning_rate": 1.1441806428550152e-06, "loss": 0.0697, "step": 786950 }, { "epoch": 7.74, "grad_norm": 4.046091079711914, "learning_rate": 1.1440565204007667e-06, "loss": 0.0221, "step": 786975 }, { "epoch": 7.74, "grad_norm": 2.723663806915283, "learning_rate": 1.143932397946518e-06, "loss": 0.0459, "step": 787000 }, { "epoch": 7.74, "grad_norm": 5.159246444702148, "learning_rate": 1.1438082754922697e-06, "loss": 0.0097, "step": 787025 }, { "epoch": 7.74, "grad_norm": 2.8494327068328857, "learning_rate": 1.1436841530380214e-06, "loss": 0.0482, "step": 787050 }, { "epoch": 7.74, "grad_norm": 1.177086353302002, "learning_rate": 1.1435600305837728e-06, "loss": 0.0154, "step": 787075 }, { "epoch": 7.74, "grad_norm": 3.5683047771453857, "learning_rate": 1.1434359081295244e-06, "loss": 0.0501, "step": 787100 }, { "epoch": 7.74, "grad_norm": 0.24387922883033752, "learning_rate": 1.143311785675276e-06, "loss": 0.0177, "step": 787125 }, { "epoch": 7.74, "grad_norm": 2.5344014167785645, "learning_rate": 1.1431876632210275e-06, "loss": 0.0626, "step": 787150 }, { "epoch": 7.74, "grad_norm": 2.4402904510498047, "learning_rate": 1.143063540766779e-06, "loss": 0.0163, "step": 787175 }, { "epoch": 7.74, "grad_norm": 4.244140148162842, "learning_rate": 1.1429394183125305e-06, "loss": 0.0455, "step": 787200 }, { "epoch": 7.74, "grad_norm": 10.303545951843262, "learning_rate": 1.1428152958582822e-06, "loss": 0.0119, "step": 787225 }, { "epoch": 7.74, "grad_norm": 4.504813194274902, "learning_rate": 1.1426911734040336e-06, "loss": 0.0536, "step": 787250 }, { "epoch": 7.74, "grad_norm": 1.945650339126587, "learning_rate": 1.142567050949785e-06, "loss": 0.019, "step": 787275 }, { "epoch": 7.74, "grad_norm": 3.4414687156677246, "learning_rate": 1.1424429284955367e-06, "loss": 0.0506, "step": 787300 }, { "epoch": 7.74, "grad_norm": 0.7506770491600037, "learning_rate": 1.1423188060412883e-06, "loss": 0.0221, "step": 787325 }, { "epoch": 7.74, "grad_norm": 1.8305814266204834, "learning_rate": 1.1421946835870397e-06, "loss": 0.0672, "step": 787350 }, { "epoch": 7.74, "grad_norm": 0.7400593161582947, "learning_rate": 1.1420705611327913e-06, "loss": 0.011, "step": 787375 }, { "epoch": 7.74, "grad_norm": 3.194221258163452, "learning_rate": 1.1419464386785428e-06, "loss": 0.045, "step": 787400 }, { "epoch": 7.74, "grad_norm": 0.3046432137489319, "learning_rate": 1.1418223162242944e-06, "loss": 0.0265, "step": 787425 }, { "epoch": 7.74, "grad_norm": 3.5298678874969482, "learning_rate": 1.1416981937700458e-06, "loss": 0.0733, "step": 787450 }, { "epoch": 7.74, "grad_norm": 0.034112054854631424, "learning_rate": 1.1415740713157975e-06, "loss": 0.0205, "step": 787475 }, { "epoch": 7.74, "grad_norm": 4.627866744995117, "learning_rate": 1.1414499488615489e-06, "loss": 0.0656, "step": 787500 }, { "epoch": 7.74, "grad_norm": 7.761382102966309, "learning_rate": 1.1413258264073005e-06, "loss": 0.0124, "step": 787525 }, { "epoch": 7.74, "grad_norm": 3.433631658554077, "learning_rate": 1.1412017039530522e-06, "loss": 0.0411, "step": 787550 }, { "epoch": 7.74, "grad_norm": 4.652944087982178, "learning_rate": 1.1410775814988036e-06, "loss": 0.0197, "step": 787575 }, { "epoch": 7.74, "grad_norm": 5.0328521728515625, "learning_rate": 1.140953459044555e-06, "loss": 0.0706, "step": 787600 }, { "epoch": 7.74, "grad_norm": 1.0239077806472778, "learning_rate": 1.1408293365903066e-06, "loss": 0.014, "step": 787625 }, { "epoch": 7.74, "grad_norm": 3.186033010482788, "learning_rate": 1.1407052141360583e-06, "loss": 0.0335, "step": 787650 }, { "epoch": 7.74, "grad_norm": 7.931560516357422, "learning_rate": 1.1405810916818097e-06, "loss": 0.0183, "step": 787675 }, { "epoch": 7.74, "grad_norm": 3.8535499572753906, "learning_rate": 1.1404569692275611e-06, "loss": 0.0415, "step": 787700 }, { "epoch": 7.75, "grad_norm": 0.35192543268203735, "learning_rate": 1.1403328467733128e-06, "loss": 0.0194, "step": 787725 }, { "epoch": 7.75, "grad_norm": 4.604798316955566, "learning_rate": 1.1402087243190644e-06, "loss": 0.0723, "step": 787750 }, { "epoch": 7.75, "grad_norm": 5.564565181732178, "learning_rate": 1.1400846018648158e-06, "loss": 0.0189, "step": 787775 }, { "epoch": 7.75, "grad_norm": 4.676783561706543, "learning_rate": 1.1399604794105674e-06, "loss": 0.0471, "step": 787800 }, { "epoch": 7.75, "grad_norm": 0.1351134330034256, "learning_rate": 1.1398363569563189e-06, "loss": 0.0203, "step": 787825 }, { "epoch": 7.75, "grad_norm": 4.601737976074219, "learning_rate": 1.1397122345020705e-06, "loss": 0.0553, "step": 787850 }, { "epoch": 7.75, "grad_norm": 0.7238503694534302, "learning_rate": 1.139588112047822e-06, "loss": 0.0257, "step": 787875 }, { "epoch": 7.75, "grad_norm": 2.241705894470215, "learning_rate": 1.1394639895935736e-06, "loss": 0.0582, "step": 787900 }, { "epoch": 7.75, "grad_norm": 0.05069785565137863, "learning_rate": 1.139339867139325e-06, "loss": 0.0187, "step": 787925 }, { "epoch": 7.75, "grad_norm": 3.2654716968536377, "learning_rate": 1.1392157446850766e-06, "loss": 0.0817, "step": 787950 }, { "epoch": 7.75, "grad_norm": 1.9372788667678833, "learning_rate": 1.1390916222308283e-06, "loss": 0.0235, "step": 787975 }, { "epoch": 7.75, "grad_norm": 1.992488145828247, "learning_rate": 1.1389674997765797e-06, "loss": 0.0731, "step": 788000 }, { "epoch": 7.75, "grad_norm": 4.933709144592285, "learning_rate": 1.138843377322331e-06, "loss": 0.0214, "step": 788025 }, { "epoch": 7.75, "grad_norm": 2.661851167678833, "learning_rate": 1.1387192548680827e-06, "loss": 0.042, "step": 788050 }, { "epoch": 7.75, "grad_norm": 6.0389814376831055, "learning_rate": 1.1385951324138344e-06, "loss": 0.0195, "step": 788075 }, { "epoch": 7.75, "grad_norm": 2.836965322494507, "learning_rate": 1.1384710099595858e-06, "loss": 0.0439, "step": 788100 }, { "epoch": 7.75, "grad_norm": 0.40809544920921326, "learning_rate": 1.1383468875053372e-06, "loss": 0.0267, "step": 788125 }, { "epoch": 7.75, "grad_norm": 4.679774284362793, "learning_rate": 1.1382227650510889e-06, "loss": 0.0696, "step": 788150 }, { "epoch": 7.75, "grad_norm": 2.7425904273986816, "learning_rate": 1.1380986425968405e-06, "loss": 0.0136, "step": 788175 }, { "epoch": 7.75, "grad_norm": 1.8152602910995483, "learning_rate": 1.137974520142592e-06, "loss": 0.0575, "step": 788200 }, { "epoch": 7.75, "grad_norm": 6.581360816955566, "learning_rate": 1.1378503976883435e-06, "loss": 0.0195, "step": 788225 }, { "epoch": 7.75, "grad_norm": 2.649336576461792, "learning_rate": 1.137726275234095e-06, "loss": 0.0532, "step": 788250 }, { "epoch": 7.75, "grad_norm": 5.303762435913086, "learning_rate": 1.1376021527798466e-06, "loss": 0.0208, "step": 788275 }, { "epoch": 7.75, "grad_norm": 2.080252170562744, "learning_rate": 1.137478030325598e-06, "loss": 0.0473, "step": 788300 }, { "epoch": 7.75, "grad_norm": 8.26233959197998, "learning_rate": 1.1373539078713497e-06, "loss": 0.0164, "step": 788325 }, { "epoch": 7.75, "grad_norm": 3.108351469039917, "learning_rate": 1.137229785417101e-06, "loss": 0.0719, "step": 788350 }, { "epoch": 7.75, "grad_norm": 0.1899559199810028, "learning_rate": 1.1371056629628527e-06, "loss": 0.024, "step": 788375 }, { "epoch": 7.75, "grad_norm": 4.0857696533203125, "learning_rate": 1.1369815405086044e-06, "loss": 0.0714, "step": 788400 }, { "epoch": 7.75, "grad_norm": 3.2210609912872314, "learning_rate": 1.1368574180543558e-06, "loss": 0.0192, "step": 788425 }, { "epoch": 7.75, "grad_norm": 3.364804983139038, "learning_rate": 1.1367332956001074e-06, "loss": 0.0624, "step": 788450 }, { "epoch": 7.75, "grad_norm": 6.973985195159912, "learning_rate": 1.1366091731458588e-06, "loss": 0.0248, "step": 788475 }, { "epoch": 7.75, "grad_norm": 2.6230404376983643, "learning_rate": 1.1364850506916105e-06, "loss": 0.0787, "step": 788500 }, { "epoch": 7.75, "grad_norm": 7.093630790710449, "learning_rate": 1.136360928237362e-06, "loss": 0.0191, "step": 788525 }, { "epoch": 7.75, "grad_norm": 3.057291030883789, "learning_rate": 1.1362368057831135e-06, "loss": 0.0637, "step": 788550 }, { "epoch": 7.75, "grad_norm": 5.147065162658691, "learning_rate": 1.1361126833288652e-06, "loss": 0.0123, "step": 788575 }, { "epoch": 7.75, "grad_norm": 1.6323432922363281, "learning_rate": 1.1359885608746166e-06, "loss": 0.0392, "step": 788600 }, { "epoch": 7.75, "grad_norm": 19.71169090270996, "learning_rate": 1.135864438420368e-06, "loss": 0.0228, "step": 788625 }, { "epoch": 7.75, "grad_norm": 4.149465560913086, "learning_rate": 1.1357403159661196e-06, "loss": 0.0683, "step": 788650 }, { "epoch": 7.75, "grad_norm": 0.2617858350276947, "learning_rate": 1.1356161935118713e-06, "loss": 0.0241, "step": 788675 }, { "epoch": 7.75, "grad_norm": 2.2491648197174072, "learning_rate": 1.1354920710576227e-06, "loss": 0.072, "step": 788700 }, { "epoch": 7.75, "grad_norm": 0.5165414810180664, "learning_rate": 1.1353679486033741e-06, "loss": 0.0162, "step": 788725 }, { "epoch": 7.76, "grad_norm": 2.90293550491333, "learning_rate": 1.1352438261491258e-06, "loss": 0.0737, "step": 788750 }, { "epoch": 7.76, "grad_norm": 8.948760032653809, "learning_rate": 1.1351197036948774e-06, "loss": 0.0293, "step": 788775 }, { "epoch": 7.76, "grad_norm": 4.681279182434082, "learning_rate": 1.1349955812406288e-06, "loss": 0.0566, "step": 788800 }, { "epoch": 7.76, "grad_norm": 0.3843097388744354, "learning_rate": 1.1348714587863805e-06, "loss": 0.0268, "step": 788825 }, { "epoch": 7.76, "grad_norm": 4.650067329406738, "learning_rate": 1.1347473363321319e-06, "loss": 0.0634, "step": 788850 }, { "epoch": 7.76, "grad_norm": 0.8577110767364502, "learning_rate": 1.1346232138778835e-06, "loss": 0.0093, "step": 788875 }, { "epoch": 7.76, "grad_norm": 3.4571640491485596, "learning_rate": 1.134499091423635e-06, "loss": 0.0523, "step": 788900 }, { "epoch": 7.76, "grad_norm": 0.4459703862667084, "learning_rate": 1.1343749689693866e-06, "loss": 0.0179, "step": 788925 }, { "epoch": 7.76, "grad_norm": 13.853998184204102, "learning_rate": 1.134250846515138e-06, "loss": 0.0455, "step": 788950 }, { "epoch": 7.76, "grad_norm": 1.4780070781707764, "learning_rate": 1.1341267240608896e-06, "loss": 0.0271, "step": 788975 }, { "epoch": 7.76, "grad_norm": 5.0397539138793945, "learning_rate": 1.1340026016066413e-06, "loss": 0.0509, "step": 789000 }, { "epoch": 7.76, "grad_norm": 5.629538059234619, "learning_rate": 1.1338784791523927e-06, "loss": 0.0267, "step": 789025 }, { "epoch": 7.76, "grad_norm": 3.5257840156555176, "learning_rate": 1.1337543566981441e-06, "loss": 0.0515, "step": 789050 }, { "epoch": 7.76, "grad_norm": 0.34801363945007324, "learning_rate": 1.1336302342438957e-06, "loss": 0.0164, "step": 789075 }, { "epoch": 7.76, "grad_norm": 4.49445104598999, "learning_rate": 1.1335110766878172e-06, "loss": 0.0606, "step": 789100 }, { "epoch": 7.76, "grad_norm": 9.372818946838379, "learning_rate": 1.1333869542335688e-06, "loss": 0.0213, "step": 789125 }, { "epoch": 7.76, "grad_norm": 4.05828857421875, "learning_rate": 1.1332628317793203e-06, "loss": 0.0688, "step": 789150 }, { "epoch": 7.76, "grad_norm": 3.9572010040283203, "learning_rate": 1.1331387093250719e-06, "loss": 0.0166, "step": 789175 }, { "epoch": 7.76, "grad_norm": 5.11647891998291, "learning_rate": 1.1330145868708233e-06, "loss": 0.0471, "step": 789200 }, { "epoch": 7.76, "grad_norm": 3.7737624645233154, "learning_rate": 1.132890464416575e-06, "loss": 0.0155, "step": 789225 }, { "epoch": 7.76, "grad_norm": 2.1334848403930664, "learning_rate": 1.1327663419623264e-06, "loss": 0.0479, "step": 789250 }, { "epoch": 7.76, "grad_norm": 3.4064183235168457, "learning_rate": 1.132642219508078e-06, "loss": 0.0187, "step": 789275 }, { "epoch": 7.76, "grad_norm": 4.0379462242126465, "learning_rate": 1.1325180970538294e-06, "loss": 0.0712, "step": 789300 }, { "epoch": 7.76, "grad_norm": 7.886664867401123, "learning_rate": 1.132393974599581e-06, "loss": 0.0165, "step": 789325 }, { "epoch": 7.76, "grad_norm": 1.7921421527862549, "learning_rate": 1.1322698521453327e-06, "loss": 0.0477, "step": 789350 }, { "epoch": 7.76, "grad_norm": 12.19594955444336, "learning_rate": 1.1321457296910841e-06, "loss": 0.0248, "step": 789375 }, { "epoch": 7.76, "grad_norm": 2.724379062652588, "learning_rate": 1.1320216072368355e-06, "loss": 0.0412, "step": 789400 }, { "epoch": 7.76, "grad_norm": 13.764360427856445, "learning_rate": 1.1318974847825872e-06, "loss": 0.025, "step": 789425 }, { "epoch": 7.76, "grad_norm": 4.076315879821777, "learning_rate": 1.1317733623283388e-06, "loss": 0.054, "step": 789450 }, { "epoch": 7.76, "grad_norm": 0.010735243558883667, "learning_rate": 1.1316492398740902e-06, "loss": 0.0195, "step": 789475 }, { "epoch": 7.76, "grad_norm": 2.9690661430358887, "learning_rate": 1.1315251174198417e-06, "loss": 0.0311, "step": 789500 }, { "epoch": 7.76, "grad_norm": 3.4956300258636475, "learning_rate": 1.1314009949655933e-06, "loss": 0.0149, "step": 789525 }, { "epoch": 7.76, "grad_norm": 2.6821916103363037, "learning_rate": 1.131276872511345e-06, "loss": 0.0462, "step": 789550 }, { "epoch": 7.76, "grad_norm": 0.32327333092689514, "learning_rate": 1.1311527500570964e-06, "loss": 0.0143, "step": 789575 }, { "epoch": 7.76, "grad_norm": 2.95654559135437, "learning_rate": 1.131028627602848e-06, "loss": 0.0825, "step": 789600 }, { "epoch": 7.76, "grad_norm": 1.39537513256073, "learning_rate": 1.1309045051485994e-06, "loss": 0.0235, "step": 789625 }, { "epoch": 7.76, "grad_norm": 3.8035500049591064, "learning_rate": 1.130780382694351e-06, "loss": 0.0601, "step": 789650 }, { "epoch": 7.76, "grad_norm": 0.8628261685371399, "learning_rate": 1.1306562602401025e-06, "loss": 0.0228, "step": 789675 }, { "epoch": 7.76, "grad_norm": 2.3990724086761475, "learning_rate": 1.1305321377858541e-06, "loss": 0.0649, "step": 789700 }, { "epoch": 7.76, "grad_norm": 3.6554653644561768, "learning_rate": 1.1304080153316055e-06, "loss": 0.0154, "step": 789725 }, { "epoch": 7.76, "grad_norm": 5.7777628898620605, "learning_rate": 1.1302838928773572e-06, "loss": 0.0836, "step": 789750 }, { "epoch": 7.77, "grad_norm": 3.680135726928711, "learning_rate": 1.1301597704231088e-06, "loss": 0.0109, "step": 789775 }, { "epoch": 7.77, "grad_norm": 2.481743097305298, "learning_rate": 1.1300356479688602e-06, "loss": 0.0328, "step": 789800 }, { "epoch": 7.77, "grad_norm": 0.24144384264945984, "learning_rate": 1.1299115255146116e-06, "loss": 0.0264, "step": 789825 }, { "epoch": 7.77, "grad_norm": 3.010061502456665, "learning_rate": 1.1297874030603633e-06, "loss": 0.0667, "step": 789850 }, { "epoch": 7.77, "grad_norm": 15.2194185256958, "learning_rate": 1.129663280606115e-06, "loss": 0.0113, "step": 789875 }, { "epoch": 7.77, "grad_norm": 2.271897792816162, "learning_rate": 1.1295391581518663e-06, "loss": 0.0464, "step": 789900 }, { "epoch": 7.77, "grad_norm": 4.679391860961914, "learning_rate": 1.129415035697618e-06, "loss": 0.0184, "step": 789925 }, { "epoch": 7.77, "grad_norm": 3.0023670196533203, "learning_rate": 1.1292909132433696e-06, "loss": 0.054, "step": 789950 }, { "epoch": 7.77, "grad_norm": 0.18293409049510956, "learning_rate": 1.129166790789121e-06, "loss": 0.0247, "step": 789975 }, { "epoch": 7.77, "grad_norm": 3.9818472862243652, "learning_rate": 1.1290426683348725e-06, "loss": 0.0628, "step": 790000 }, { "epoch": 7.77, "grad_norm": 9.155858993530273, "learning_rate": 1.128918545880624e-06, "loss": 0.0159, "step": 790025 }, { "epoch": 7.77, "grad_norm": 3.081221580505371, "learning_rate": 1.1287944234263757e-06, "loss": 0.0622, "step": 790050 }, { "epoch": 7.77, "grad_norm": 1.8892215490341187, "learning_rate": 1.1286703009721272e-06, "loss": 0.0137, "step": 790075 }, { "epoch": 7.77, "grad_norm": 1.9778097867965698, "learning_rate": 1.1285461785178788e-06, "loss": 0.0554, "step": 790100 }, { "epoch": 7.77, "grad_norm": 2.9974353313446045, "learning_rate": 1.1284220560636302e-06, "loss": 0.0148, "step": 790125 }, { "epoch": 7.77, "grad_norm": 5.466217517852783, "learning_rate": 1.1282979336093818e-06, "loss": 0.0542, "step": 790150 }, { "epoch": 7.77, "grad_norm": 1.5237942934036255, "learning_rate": 1.1281738111551333e-06, "loss": 0.0135, "step": 790175 }, { "epoch": 7.77, "grad_norm": 1.827189564704895, "learning_rate": 1.128049688700885e-06, "loss": 0.0661, "step": 790200 }, { "epoch": 7.77, "grad_norm": 10.60622787475586, "learning_rate": 1.1279255662466363e-06, "loss": 0.0171, "step": 790225 }, { "epoch": 7.77, "grad_norm": 3.723212957382202, "learning_rate": 1.127801443792388e-06, "loss": 0.0545, "step": 790250 }, { "epoch": 7.77, "grad_norm": 7.918400764465332, "learning_rate": 1.1276773213381396e-06, "loss": 0.0156, "step": 790275 }, { "epoch": 7.77, "grad_norm": 1.7682467699050903, "learning_rate": 1.127553198883891e-06, "loss": 0.055, "step": 790300 }, { "epoch": 7.77, "grad_norm": 0.270343154668808, "learning_rate": 1.1274290764296424e-06, "loss": 0.0162, "step": 790325 }, { "epoch": 7.77, "grad_norm": 2.6879913806915283, "learning_rate": 1.127304953975394e-06, "loss": 0.0424, "step": 790350 }, { "epoch": 7.77, "grad_norm": 2.7064321041107178, "learning_rate": 1.1271808315211457e-06, "loss": 0.0245, "step": 790375 }, { "epoch": 7.77, "grad_norm": 9.183542251586914, "learning_rate": 1.1270567090668971e-06, "loss": 0.0531, "step": 790400 }, { "epoch": 7.77, "grad_norm": 0.9467649459838867, "learning_rate": 1.1269325866126486e-06, "loss": 0.0171, "step": 790425 }, { "epoch": 7.77, "grad_norm": 3.907003402709961, "learning_rate": 1.1268084641584002e-06, "loss": 0.0495, "step": 790450 }, { "epoch": 7.77, "grad_norm": 1.1345500946044922, "learning_rate": 1.1266843417041518e-06, "loss": 0.02, "step": 790475 }, { "epoch": 7.77, "grad_norm": 3.668567180633545, "learning_rate": 1.1265602192499033e-06, "loss": 0.0514, "step": 790500 }, { "epoch": 7.77, "grad_norm": 2.2966859340667725, "learning_rate": 1.1264360967956549e-06, "loss": 0.0174, "step": 790525 }, { "epoch": 7.77, "grad_norm": 4.854270935058594, "learning_rate": 1.1263119743414063e-06, "loss": 0.0713, "step": 790550 }, { "epoch": 7.77, "grad_norm": 1.7729097604751587, "learning_rate": 1.126187851887158e-06, "loss": 0.0172, "step": 790575 }, { "epoch": 7.77, "grad_norm": 3.212559700012207, "learning_rate": 1.1260637294329094e-06, "loss": 0.0568, "step": 790600 }, { "epoch": 7.77, "grad_norm": 3.923053503036499, "learning_rate": 1.125939606978661e-06, "loss": 0.0276, "step": 790625 }, { "epoch": 7.77, "grad_norm": 3.9069697856903076, "learning_rate": 1.1258154845244124e-06, "loss": 0.0382, "step": 790650 }, { "epoch": 7.77, "grad_norm": 2.014802932739258, "learning_rate": 1.125691362070164e-06, "loss": 0.0205, "step": 790675 }, { "epoch": 7.77, "grad_norm": 3.1841564178466797, "learning_rate": 1.1255672396159157e-06, "loss": 0.046, "step": 790700 }, { "epoch": 7.77, "grad_norm": 1.0896215438842773, "learning_rate": 1.1254431171616671e-06, "loss": 0.0112, "step": 790725 }, { "epoch": 7.77, "grad_norm": 2.3871846199035645, "learning_rate": 1.1253189947074185e-06, "loss": 0.0547, "step": 790750 }, { "epoch": 7.78, "grad_norm": 3.7104296684265137, "learning_rate": 1.1251948722531702e-06, "loss": 0.0126, "step": 790775 }, { "epoch": 7.78, "grad_norm": 1.7482099533081055, "learning_rate": 1.1250707497989218e-06, "loss": 0.0571, "step": 790800 }, { "epoch": 7.78, "grad_norm": 5.239782333374023, "learning_rate": 1.1249466273446732e-06, "loss": 0.0272, "step": 790825 }, { "epoch": 7.78, "grad_norm": 3.1268222332000732, "learning_rate": 1.1248225048904247e-06, "loss": 0.0616, "step": 790850 }, { "epoch": 7.78, "grad_norm": 4.347519397735596, "learning_rate": 1.1246983824361763e-06, "loss": 0.0234, "step": 790875 }, { "epoch": 7.78, "grad_norm": 4.541003704071045, "learning_rate": 1.124574259981928e-06, "loss": 0.0798, "step": 790900 }, { "epoch": 7.78, "grad_norm": 0.7336350679397583, "learning_rate": 1.1244501375276794e-06, "loss": 0.0179, "step": 790925 }, { "epoch": 7.78, "grad_norm": 5.383853435516357, "learning_rate": 1.124326015073431e-06, "loss": 0.0694, "step": 790950 }, { "epoch": 7.78, "grad_norm": 13.550761222839355, "learning_rate": 1.1242018926191824e-06, "loss": 0.0333, "step": 790975 }, { "epoch": 7.78, "grad_norm": 5.110873699188232, "learning_rate": 1.124077770164934e-06, "loss": 0.0452, "step": 791000 }, { "epoch": 7.78, "grad_norm": 0.13788875937461853, "learning_rate": 1.1239536477106855e-06, "loss": 0.0206, "step": 791025 }, { "epoch": 7.78, "grad_norm": 4.156344890594482, "learning_rate": 1.123829525256437e-06, "loss": 0.0617, "step": 791050 }, { "epoch": 7.78, "grad_norm": 7.9245758056640625, "learning_rate": 1.1237054028021885e-06, "loss": 0.0178, "step": 791075 }, { "epoch": 7.78, "grad_norm": 10.87452507019043, "learning_rate": 1.1235812803479402e-06, "loss": 0.0589, "step": 791100 }, { "epoch": 7.78, "grad_norm": 0.5354119539260864, "learning_rate": 1.1234571578936918e-06, "loss": 0.0159, "step": 791125 }, { "epoch": 7.78, "grad_norm": 1.79828679561615, "learning_rate": 1.1233330354394432e-06, "loss": 0.0484, "step": 791150 }, { "epoch": 7.78, "grad_norm": 0.17676714062690735, "learning_rate": 1.1232089129851946e-06, "loss": 0.0187, "step": 791175 }, { "epoch": 7.78, "grad_norm": 4.839673042297363, "learning_rate": 1.1230847905309463e-06, "loss": 0.054, "step": 791200 }, { "epoch": 7.78, "grad_norm": 0.5157690048217773, "learning_rate": 1.122960668076698e-06, "loss": 0.0225, "step": 791225 }, { "epoch": 7.78, "grad_norm": 3.6555941104888916, "learning_rate": 1.1228365456224493e-06, "loss": 0.0612, "step": 791250 }, { "epoch": 7.78, "grad_norm": 3.0098159313201904, "learning_rate": 1.122712423168201e-06, "loss": 0.0249, "step": 791275 }, { "epoch": 7.78, "grad_norm": 2.900766372680664, "learning_rate": 1.1225883007139524e-06, "loss": 0.0654, "step": 791300 }, { "epoch": 7.78, "grad_norm": 2.0662858486175537, "learning_rate": 1.122464178259704e-06, "loss": 0.007, "step": 791325 }, { "epoch": 7.78, "grad_norm": 3.2164855003356934, "learning_rate": 1.1223400558054555e-06, "loss": 0.0715, "step": 791350 }, { "epoch": 7.78, "grad_norm": 0.4598686397075653, "learning_rate": 1.122215933351207e-06, "loss": 0.0277, "step": 791375 }, { "epoch": 7.78, "grad_norm": 3.337120771408081, "learning_rate": 1.1220918108969587e-06, "loss": 0.0549, "step": 791400 }, { "epoch": 7.78, "grad_norm": 16.17186737060547, "learning_rate": 1.1219676884427101e-06, "loss": 0.0178, "step": 791425 }, { "epoch": 7.78, "grad_norm": 4.24808931350708, "learning_rate": 1.1218435659884616e-06, "loss": 0.0397, "step": 791450 }, { "epoch": 7.78, "grad_norm": 0.609489917755127, "learning_rate": 1.1217194435342132e-06, "loss": 0.0041, "step": 791475 }, { "epoch": 7.78, "grad_norm": 4.563695430755615, "learning_rate": 1.1215953210799648e-06, "loss": 0.0622, "step": 791500 }, { "epoch": 7.78, "grad_norm": 1.0340172052383423, "learning_rate": 1.1214711986257163e-06, "loss": 0.0247, "step": 791525 }, { "epoch": 7.78, "grad_norm": 2.0844852924346924, "learning_rate": 1.121347076171468e-06, "loss": 0.054, "step": 791550 }, { "epoch": 7.78, "grad_norm": 5.076680660247803, "learning_rate": 1.1212229537172193e-06, "loss": 0.0227, "step": 791575 }, { "epoch": 7.78, "grad_norm": 4.126434326171875, "learning_rate": 1.121098831262971e-06, "loss": 0.0614, "step": 791600 }, { "epoch": 7.78, "grad_norm": 0.028857257217168808, "learning_rate": 1.1209747088087224e-06, "loss": 0.0154, "step": 791625 }, { "epoch": 7.78, "grad_norm": 17.075237274169922, "learning_rate": 1.120850586354474e-06, "loss": 0.0487, "step": 791650 }, { "epoch": 7.78, "grad_norm": 0.24508216977119446, "learning_rate": 1.1207264639002254e-06, "loss": 0.0229, "step": 791675 }, { "epoch": 7.78, "grad_norm": 1.6986123323440552, "learning_rate": 1.120602341445977e-06, "loss": 0.0425, "step": 791700 }, { "epoch": 7.78, "grad_norm": 18.782644271850586, "learning_rate": 1.1204782189917287e-06, "loss": 0.0144, "step": 791725 }, { "epoch": 7.78, "grad_norm": 1.9708184003829956, "learning_rate": 1.1203540965374801e-06, "loss": 0.0595, "step": 791750 }, { "epoch": 7.78, "grad_norm": 0.02328350394964218, "learning_rate": 1.1202299740832316e-06, "loss": 0.0182, "step": 791775 }, { "epoch": 7.79, "grad_norm": 3.1710567474365234, "learning_rate": 1.1201058516289832e-06, "loss": 0.0513, "step": 791800 }, { "epoch": 7.79, "grad_norm": 0.1436261683702469, "learning_rate": 1.1199817291747348e-06, "loss": 0.0191, "step": 791825 }, { "epoch": 7.79, "grad_norm": 3.1981475353240967, "learning_rate": 1.1198576067204862e-06, "loss": 0.0733, "step": 791850 }, { "epoch": 7.79, "grad_norm": 0.312002032995224, "learning_rate": 1.1197334842662377e-06, "loss": 0.0113, "step": 791875 }, { "epoch": 7.79, "grad_norm": 4.673643112182617, "learning_rate": 1.1196093618119893e-06, "loss": 0.0338, "step": 791900 }, { "epoch": 7.79, "grad_norm": 0.1442631483078003, "learning_rate": 1.119485239357741e-06, "loss": 0.0183, "step": 791925 }, { "epoch": 7.79, "grad_norm": 3.357452630996704, "learning_rate": 1.1193611169034924e-06, "loss": 0.0461, "step": 791950 }, { "epoch": 7.79, "grad_norm": 0.5871738791465759, "learning_rate": 1.119236994449244e-06, "loss": 0.0244, "step": 791975 }, { "epoch": 7.79, "grad_norm": 4.503090858459473, "learning_rate": 1.1191128719949954e-06, "loss": 0.0488, "step": 792000 }, { "epoch": 7.79, "grad_norm": 5.107701778411865, "learning_rate": 1.118988749540747e-06, "loss": 0.0103, "step": 792025 }, { "epoch": 7.79, "grad_norm": 4.557156562805176, "learning_rate": 1.1188646270864985e-06, "loss": 0.0616, "step": 792050 }, { "epoch": 7.79, "grad_norm": 1.6661112308502197, "learning_rate": 1.1187405046322501e-06, "loss": 0.0204, "step": 792075 }, { "epoch": 7.79, "grad_norm": 3.4680283069610596, "learning_rate": 1.1186163821780015e-06, "loss": 0.0617, "step": 792100 }, { "epoch": 7.79, "grad_norm": 4.702353000640869, "learning_rate": 1.1184922597237532e-06, "loss": 0.0257, "step": 792125 }, { "epoch": 7.79, "grad_norm": 2.935854434967041, "learning_rate": 1.1183681372695048e-06, "loss": 0.0583, "step": 792150 }, { "epoch": 7.79, "grad_norm": 2.1425564289093018, "learning_rate": 1.1182440148152562e-06, "loss": 0.0131, "step": 792175 }, { "epoch": 7.79, "grad_norm": 2.2630836963653564, "learning_rate": 1.1181198923610077e-06, "loss": 0.0654, "step": 792200 }, { "epoch": 7.79, "grad_norm": 6.540763854980469, "learning_rate": 1.1179957699067593e-06, "loss": 0.0267, "step": 792225 }, { "epoch": 7.79, "grad_norm": 2.117414712905884, "learning_rate": 1.117871647452511e-06, "loss": 0.0404, "step": 792250 }, { "epoch": 7.79, "grad_norm": 0.08741921186447144, "learning_rate": 1.1177475249982623e-06, "loss": 0.0211, "step": 792275 }, { "epoch": 7.79, "grad_norm": 2.83001446723938, "learning_rate": 1.1176234025440138e-06, "loss": 0.0269, "step": 792300 }, { "epoch": 7.79, "grad_norm": 4.986306667327881, "learning_rate": 1.1174992800897654e-06, "loss": 0.0171, "step": 792325 }, { "epoch": 7.79, "grad_norm": 3.907627820968628, "learning_rate": 1.117375157635517e-06, "loss": 0.067, "step": 792350 }, { "epoch": 7.79, "grad_norm": 0.3711646795272827, "learning_rate": 1.1172510351812685e-06, "loss": 0.0142, "step": 792375 }, { "epoch": 7.79, "grad_norm": 4.5583977699279785, "learning_rate": 1.11712691272702e-06, "loss": 0.0763, "step": 792400 }, { "epoch": 7.79, "grad_norm": 7.180679798126221, "learning_rate": 1.1170027902727715e-06, "loss": 0.0236, "step": 792425 }, { "epoch": 7.79, "grad_norm": 2.452343702316284, "learning_rate": 1.1168786678185232e-06, "loss": 0.0536, "step": 792450 }, { "epoch": 7.79, "grad_norm": 0.12489202618598938, "learning_rate": 1.1167545453642746e-06, "loss": 0.0181, "step": 792475 }, { "epoch": 7.79, "grad_norm": 2.2752573490142822, "learning_rate": 1.1166304229100262e-06, "loss": 0.0428, "step": 792500 }, { "epoch": 7.79, "grad_norm": 1.0475072860717773, "learning_rate": 1.1165063004557776e-06, "loss": 0.022, "step": 792525 }, { "epoch": 7.79, "grad_norm": 4.501756191253662, "learning_rate": 1.1163821780015293e-06, "loss": 0.0587, "step": 792550 }, { "epoch": 7.79, "grad_norm": 0.02387017011642456, "learning_rate": 1.116258055547281e-06, "loss": 0.0124, "step": 792575 }, { "epoch": 7.79, "grad_norm": 4.36020565032959, "learning_rate": 1.1161339330930323e-06, "loss": 0.0544, "step": 792600 }, { "epoch": 7.79, "grad_norm": 5.164957523345947, "learning_rate": 1.1160098106387838e-06, "loss": 0.0244, "step": 792625 }, { "epoch": 7.79, "grad_norm": 2.9952127933502197, "learning_rate": 1.1158856881845354e-06, "loss": 0.0538, "step": 792650 }, { "epoch": 7.79, "grad_norm": 0.27693432569503784, "learning_rate": 1.115761565730287e-06, "loss": 0.0213, "step": 792675 }, { "epoch": 7.79, "grad_norm": 3.0762717723846436, "learning_rate": 1.1156374432760384e-06, "loss": 0.0575, "step": 792700 }, { "epoch": 7.79, "grad_norm": 3.3896892070770264, "learning_rate": 1.11551332082179e-06, "loss": 0.0175, "step": 792725 }, { "epoch": 7.79, "grad_norm": 3.50334095954895, "learning_rate": 1.1153891983675417e-06, "loss": 0.0514, "step": 792750 }, { "epoch": 7.79, "grad_norm": 1.6731514930725098, "learning_rate": 1.1152650759132931e-06, "loss": 0.0149, "step": 792775 }, { "epoch": 7.79, "grad_norm": 3.8882603645324707, "learning_rate": 1.1151409534590446e-06, "loss": 0.0386, "step": 792800 }, { "epoch": 7.8, "grad_norm": 7.425607204437256, "learning_rate": 1.1150168310047962e-06, "loss": 0.0243, "step": 792825 }, { "epoch": 7.8, "grad_norm": 3.335636854171753, "learning_rate": 1.1148927085505478e-06, "loss": 0.0563, "step": 792850 }, { "epoch": 7.8, "grad_norm": 7.9532341957092285, "learning_rate": 1.1147685860962993e-06, "loss": 0.0105, "step": 792875 }, { "epoch": 7.8, "grad_norm": 3.9019935131073, "learning_rate": 1.1146494285402207e-06, "loss": 0.0637, "step": 792900 }, { "epoch": 7.8, "grad_norm": 0.15620917081832886, "learning_rate": 1.1145253060859723e-06, "loss": 0.0261, "step": 792925 }, { "epoch": 7.8, "grad_norm": 5.087230682373047, "learning_rate": 1.1144011836317238e-06, "loss": 0.0529, "step": 792950 }, { "epoch": 7.8, "grad_norm": 4.226147651672363, "learning_rate": 1.1142770611774754e-06, "loss": 0.0187, "step": 792975 }, { "epoch": 7.8, "grad_norm": 1.9026094675064087, "learning_rate": 1.114152938723227e-06, "loss": 0.0574, "step": 793000 }, { "epoch": 7.8, "grad_norm": 1.0335774421691895, "learning_rate": 1.1140288162689785e-06, "loss": 0.027, "step": 793025 }, { "epoch": 7.8, "grad_norm": 3.409811496734619, "learning_rate": 1.1139046938147299e-06, "loss": 0.0572, "step": 793050 }, { "epoch": 7.8, "grad_norm": 5.861052989959717, "learning_rate": 1.1137805713604815e-06, "loss": 0.0229, "step": 793075 }, { "epoch": 7.8, "grad_norm": 3.104412078857422, "learning_rate": 1.1136564489062332e-06, "loss": 0.0645, "step": 793100 }, { "epoch": 7.8, "grad_norm": 0.1853516846895218, "learning_rate": 1.1135323264519846e-06, "loss": 0.0088, "step": 793125 }, { "epoch": 7.8, "grad_norm": 3.970503091812134, "learning_rate": 1.113408203997736e-06, "loss": 0.08, "step": 793150 }, { "epoch": 7.8, "grad_norm": 0.2473968267440796, "learning_rate": 1.1132840815434876e-06, "loss": 0.0221, "step": 793175 }, { "epoch": 7.8, "grad_norm": 4.2572855949401855, "learning_rate": 1.1131599590892393e-06, "loss": 0.059, "step": 793200 }, { "epoch": 7.8, "grad_norm": 2.3723695278167725, "learning_rate": 1.1130358366349907e-06, "loss": 0.0231, "step": 793225 }, { "epoch": 7.8, "grad_norm": 3.7186810970306396, "learning_rate": 1.1129117141807421e-06, "loss": 0.0621, "step": 793250 }, { "epoch": 7.8, "grad_norm": 7.254424095153809, "learning_rate": 1.1127875917264938e-06, "loss": 0.0222, "step": 793275 }, { "epoch": 7.8, "grad_norm": 3.3356499671936035, "learning_rate": 1.1126634692722454e-06, "loss": 0.0514, "step": 793300 }, { "epoch": 7.8, "grad_norm": 1.0445195436477661, "learning_rate": 1.1125393468179968e-06, "loss": 0.0133, "step": 793325 }, { "epoch": 7.8, "grad_norm": 4.633252143859863, "learning_rate": 1.1124152243637484e-06, "loss": 0.0471, "step": 793350 }, { "epoch": 7.8, "grad_norm": 0.20173382759094238, "learning_rate": 1.1122911019094999e-06, "loss": 0.0216, "step": 793375 }, { "epoch": 7.8, "grad_norm": 3.1544840335845947, "learning_rate": 1.1121669794552515e-06, "loss": 0.0577, "step": 793400 }, { "epoch": 7.8, "grad_norm": 3.7115025520324707, "learning_rate": 1.1120428570010031e-06, "loss": 0.0239, "step": 793425 }, { "epoch": 7.8, "grad_norm": 4.3439040184021, "learning_rate": 1.1119187345467546e-06, "loss": 0.0664, "step": 793450 }, { "epoch": 7.8, "grad_norm": 2.5428037643432617, "learning_rate": 1.111794612092506e-06, "loss": 0.0199, "step": 793475 }, { "epoch": 7.8, "grad_norm": 2.481153726577759, "learning_rate": 1.1116704896382576e-06, "loss": 0.0539, "step": 793500 }, { "epoch": 7.8, "grad_norm": 0.09035170823335648, "learning_rate": 1.1115463671840093e-06, "loss": 0.0187, "step": 793525 }, { "epoch": 7.8, "grad_norm": 2.181122303009033, "learning_rate": 1.1114222447297607e-06, "loss": 0.0462, "step": 793550 }, { "epoch": 7.8, "grad_norm": 6.253968238830566, "learning_rate": 1.111298122275512e-06, "loss": 0.0132, "step": 793575 }, { "epoch": 7.8, "grad_norm": 2.192253351211548, "learning_rate": 1.1111739998212637e-06, "loss": 0.0316, "step": 793600 }, { "epoch": 7.8, "grad_norm": 0.25224539637565613, "learning_rate": 1.1110498773670154e-06, "loss": 0.0146, "step": 793625 }, { "epoch": 7.8, "grad_norm": 3.8185808658599854, "learning_rate": 1.1109257549127668e-06, "loss": 0.047, "step": 793650 }, { "epoch": 7.8, "grad_norm": 13.179649353027344, "learning_rate": 1.1108016324585182e-06, "loss": 0.0183, "step": 793675 }, { "epoch": 7.8, "grad_norm": 3.6456618309020996, "learning_rate": 1.1106775100042699e-06, "loss": 0.0378, "step": 793700 }, { "epoch": 7.8, "grad_norm": 5.262162208557129, "learning_rate": 1.1105533875500215e-06, "loss": 0.0219, "step": 793725 }, { "epoch": 7.8, "grad_norm": 1.9582946300506592, "learning_rate": 1.110429265095773e-06, "loss": 0.0543, "step": 793750 }, { "epoch": 7.8, "grad_norm": 0.21052555739879608, "learning_rate": 1.1103051426415245e-06, "loss": 0.0182, "step": 793775 }, { "epoch": 7.8, "grad_norm": 3.068315029144287, "learning_rate": 1.110181020187276e-06, "loss": 0.077, "step": 793800 }, { "epoch": 7.81, "grad_norm": 0.2092253863811493, "learning_rate": 1.1100568977330276e-06, "loss": 0.0215, "step": 793825 }, { "epoch": 7.81, "grad_norm": 4.048862457275391, "learning_rate": 1.1099327752787792e-06, "loss": 0.0478, "step": 793850 }, { "epoch": 7.81, "grad_norm": 0.22090105712413788, "learning_rate": 1.1098086528245307e-06, "loss": 0.0179, "step": 793875 }, { "epoch": 7.81, "grad_norm": 2.463618040084839, "learning_rate": 1.109684530370282e-06, "loss": 0.0757, "step": 793900 }, { "epoch": 7.81, "grad_norm": 14.511125564575195, "learning_rate": 1.1095604079160337e-06, "loss": 0.0242, "step": 793925 }, { "epoch": 7.81, "grad_norm": 3.82666015625, "learning_rate": 1.1094362854617854e-06, "loss": 0.0414, "step": 793950 }, { "epoch": 7.81, "grad_norm": 0.027003688737750053, "learning_rate": 1.1093121630075368e-06, "loss": 0.0089, "step": 793975 }, { "epoch": 7.81, "grad_norm": 2.4514434337615967, "learning_rate": 1.1091880405532882e-06, "loss": 0.0493, "step": 794000 }, { "epoch": 7.81, "grad_norm": 6.3064799308776855, "learning_rate": 1.1090639180990398e-06, "loss": 0.0143, "step": 794025 }, { "epoch": 7.81, "grad_norm": 4.927177429199219, "learning_rate": 1.1089397956447915e-06, "loss": 0.0457, "step": 794050 }, { "epoch": 7.81, "grad_norm": 1.5110121965408325, "learning_rate": 1.1088156731905429e-06, "loss": 0.0229, "step": 794075 }, { "epoch": 7.81, "grad_norm": 3.2305235862731934, "learning_rate": 1.1086915507362945e-06, "loss": 0.0493, "step": 794100 }, { "epoch": 7.81, "grad_norm": 8.92856216430664, "learning_rate": 1.108567428282046e-06, "loss": 0.0255, "step": 794125 }, { "epoch": 7.81, "grad_norm": 2.9367756843566895, "learning_rate": 1.1084433058277976e-06, "loss": 0.0495, "step": 794150 }, { "epoch": 7.81, "grad_norm": 0.6089922189712524, "learning_rate": 1.108319183373549e-06, "loss": 0.0105, "step": 794175 }, { "epoch": 7.81, "grad_norm": 3.8120615482330322, "learning_rate": 1.1081950609193006e-06, "loss": 0.0649, "step": 794200 }, { "epoch": 7.81, "grad_norm": 1.1741727590560913, "learning_rate": 1.1080709384650523e-06, "loss": 0.0242, "step": 794225 }, { "epoch": 7.81, "grad_norm": 4.678829669952393, "learning_rate": 1.1079468160108037e-06, "loss": 0.0541, "step": 794250 }, { "epoch": 7.81, "grad_norm": 13.663060188293457, "learning_rate": 1.1078226935565553e-06, "loss": 0.0145, "step": 794275 }, { "epoch": 7.81, "grad_norm": 2.0616602897644043, "learning_rate": 1.1076985711023068e-06, "loss": 0.0554, "step": 794300 }, { "epoch": 7.81, "grad_norm": 3.6094977855682373, "learning_rate": 1.1075744486480584e-06, "loss": 0.0157, "step": 794325 }, { "epoch": 7.81, "grad_norm": 3.7330894470214844, "learning_rate": 1.1074503261938098e-06, "loss": 0.052, "step": 794350 }, { "epoch": 7.81, "grad_norm": 11.788978576660156, "learning_rate": 1.1073262037395615e-06, "loss": 0.0203, "step": 794375 }, { "epoch": 7.81, "grad_norm": 3.232294797897339, "learning_rate": 1.1072020812853129e-06, "loss": 0.0577, "step": 794400 }, { "epoch": 7.81, "grad_norm": 0.027369899675250053, "learning_rate": 1.1070779588310645e-06, "loss": 0.018, "step": 794425 }, { "epoch": 7.81, "grad_norm": 4.774018287658691, "learning_rate": 1.1069538363768161e-06, "loss": 0.06, "step": 794450 }, { "epoch": 7.81, "grad_norm": 5.502200126647949, "learning_rate": 1.1068297139225676e-06, "loss": 0.0193, "step": 794475 }, { "epoch": 7.81, "grad_norm": 3.320582151412964, "learning_rate": 1.106705591468319e-06, "loss": 0.047, "step": 794500 }, { "epoch": 7.81, "grad_norm": 0.6239957213401794, "learning_rate": 1.1065814690140706e-06, "loss": 0.0156, "step": 794525 }, { "epoch": 7.81, "grad_norm": 2.4898650646209717, "learning_rate": 1.1064573465598223e-06, "loss": 0.0826, "step": 794550 }, { "epoch": 7.81, "grad_norm": 10.763228416442871, "learning_rate": 1.1063332241055737e-06, "loss": 0.0213, "step": 794575 }, { "epoch": 7.81, "grad_norm": 3.321458101272583, "learning_rate": 1.1062091016513251e-06, "loss": 0.0429, "step": 794600 }, { "epoch": 7.81, "grad_norm": 0.14152079820632935, "learning_rate": 1.1060849791970767e-06, "loss": 0.0095, "step": 794625 }, { "epoch": 7.81, "grad_norm": 2.7633132934570312, "learning_rate": 1.1059608567428284e-06, "loss": 0.0386, "step": 794650 }, { "epoch": 7.81, "grad_norm": 0.22419357299804688, "learning_rate": 1.1058367342885798e-06, "loss": 0.0134, "step": 794675 }, { "epoch": 7.81, "grad_norm": 3.7319371700286865, "learning_rate": 1.1057126118343314e-06, "loss": 0.06, "step": 794700 }, { "epoch": 7.81, "grad_norm": 3.306600332260132, "learning_rate": 1.1055884893800829e-06, "loss": 0.0201, "step": 794725 }, { "epoch": 7.81, "grad_norm": 2.7358603477478027, "learning_rate": 1.1054643669258345e-06, "loss": 0.0525, "step": 794750 }, { "epoch": 7.81, "grad_norm": 8.655839920043945, "learning_rate": 1.105340244471586e-06, "loss": 0.013, "step": 794775 }, { "epoch": 7.81, "grad_norm": 10.865409851074219, "learning_rate": 1.1052161220173376e-06, "loss": 0.0482, "step": 794800 }, { "epoch": 7.81, "grad_norm": 2.453794240951538, "learning_rate": 1.105091999563089e-06, "loss": 0.0272, "step": 794825 }, { "epoch": 7.82, "grad_norm": 1.7546459436416626, "learning_rate": 1.1049678771088406e-06, "loss": 0.0697, "step": 794850 }, { "epoch": 7.82, "grad_norm": 15.223298072814941, "learning_rate": 1.1048437546545922e-06, "loss": 0.0194, "step": 794875 }, { "epoch": 7.82, "grad_norm": 2.1658577919006348, "learning_rate": 1.1047196322003437e-06, "loss": 0.0523, "step": 794900 }, { "epoch": 7.82, "grad_norm": 7.929093360900879, "learning_rate": 1.104595509746095e-06, "loss": 0.0268, "step": 794925 }, { "epoch": 7.82, "grad_norm": 3.364339828491211, "learning_rate": 1.1044713872918467e-06, "loss": 0.0483, "step": 794950 }, { "epoch": 7.82, "grad_norm": 0.13177801668643951, "learning_rate": 1.1043472648375984e-06, "loss": 0.0205, "step": 794975 }, { "epoch": 7.82, "grad_norm": 2.9131383895874023, "learning_rate": 1.1042231423833498e-06, "loss": 0.0386, "step": 795000 }, { "epoch": 7.82, "grad_norm": 2.1889429092407227, "learning_rate": 1.1040990199291012e-06, "loss": 0.0245, "step": 795025 }, { "epoch": 7.82, "grad_norm": 3.2322301864624023, "learning_rate": 1.1039748974748528e-06, "loss": 0.05, "step": 795050 }, { "epoch": 7.82, "grad_norm": 0.3981228172779083, "learning_rate": 1.1038507750206045e-06, "loss": 0.0186, "step": 795075 }, { "epoch": 7.82, "grad_norm": 2.4601759910583496, "learning_rate": 1.103726652566356e-06, "loss": 0.0459, "step": 795100 }, { "epoch": 7.82, "grad_norm": 0.16037438809871674, "learning_rate": 1.1036025301121075e-06, "loss": 0.0224, "step": 795125 }, { "epoch": 7.82, "grad_norm": 2.208859920501709, "learning_rate": 1.103478407657859e-06, "loss": 0.0843, "step": 795150 }, { "epoch": 7.82, "grad_norm": 1.4769278764724731, "learning_rate": 1.1033542852036106e-06, "loss": 0.0314, "step": 795175 }, { "epoch": 7.82, "grad_norm": 2.6351687908172607, "learning_rate": 1.103235127647532e-06, "loss": 0.0708, "step": 795200 }, { "epoch": 7.82, "grad_norm": 8.19378662109375, "learning_rate": 1.1031110051932837e-06, "loss": 0.0245, "step": 795225 }, { "epoch": 7.82, "grad_norm": 2.522662401199341, "learning_rate": 1.102986882739035e-06, "loss": 0.0517, "step": 795250 }, { "epoch": 7.82, "grad_norm": 1.3104593753814697, "learning_rate": 1.1028627602847865e-06, "loss": 0.0159, "step": 795275 }, { "epoch": 7.82, "grad_norm": 2.6054298877716064, "learning_rate": 1.1027386378305382e-06, "loss": 0.0513, "step": 795300 }, { "epoch": 7.82, "grad_norm": 0.015979403629899025, "learning_rate": 1.1026145153762898e-06, "loss": 0.0125, "step": 795325 }, { "epoch": 7.82, "grad_norm": 3.968200922012329, "learning_rate": 1.1024903929220412e-06, "loss": 0.0844, "step": 795350 }, { "epoch": 7.82, "grad_norm": 6.052824974060059, "learning_rate": 1.1023662704677926e-06, "loss": 0.019, "step": 795375 }, { "epoch": 7.82, "grad_norm": 2.4422237873077393, "learning_rate": 1.1022421480135443e-06, "loss": 0.0724, "step": 795400 }, { "epoch": 7.82, "grad_norm": 0.24186179041862488, "learning_rate": 1.102118025559296e-06, "loss": 0.0158, "step": 795425 }, { "epoch": 7.82, "grad_norm": 4.394886016845703, "learning_rate": 1.1019939031050473e-06, "loss": 0.0445, "step": 795450 }, { "epoch": 7.82, "grad_norm": 0.09193848073482513, "learning_rate": 1.101869780650799e-06, "loss": 0.0092, "step": 795475 }, { "epoch": 7.82, "grad_norm": 2.594266414642334, "learning_rate": 1.1017456581965504e-06, "loss": 0.046, "step": 795500 }, { "epoch": 7.82, "grad_norm": 4.040196895599365, "learning_rate": 1.101621535742302e-06, "loss": 0.0204, "step": 795525 }, { "epoch": 7.82, "grad_norm": 3.4981329441070557, "learning_rate": 1.1014974132880535e-06, "loss": 0.0591, "step": 795550 }, { "epoch": 7.82, "grad_norm": 0.5739818215370178, "learning_rate": 1.101373290833805e-06, "loss": 0.0196, "step": 795575 }, { "epoch": 7.82, "grad_norm": 2.0961053371429443, "learning_rate": 1.1012491683795565e-06, "loss": 0.075, "step": 795600 }, { "epoch": 7.82, "grad_norm": 0.2072296291589737, "learning_rate": 1.1011250459253081e-06, "loss": 0.0164, "step": 795625 }, { "epoch": 7.82, "grad_norm": 1.9801348447799683, "learning_rate": 1.1010009234710598e-06, "loss": 0.0513, "step": 795650 }, { "epoch": 7.82, "grad_norm": 0.08928566426038742, "learning_rate": 1.1008768010168112e-06, "loss": 0.0138, "step": 795675 }, { "epoch": 7.82, "grad_norm": 5.0799560546875, "learning_rate": 1.1007526785625628e-06, "loss": 0.0418, "step": 795700 }, { "epoch": 7.82, "grad_norm": 1.8320260047912598, "learning_rate": 1.1006285561083145e-06, "loss": 0.0182, "step": 795725 }, { "epoch": 7.82, "grad_norm": 4.141543865203857, "learning_rate": 1.100504433654066e-06, "loss": 0.0842, "step": 795750 }, { "epoch": 7.82, "grad_norm": 10.978754043579102, "learning_rate": 1.1003803111998173e-06, "loss": 0.0251, "step": 795775 }, { "epoch": 7.82, "grad_norm": 2.864675998687744, "learning_rate": 1.100256188745569e-06, "loss": 0.0656, "step": 795800 }, { "epoch": 7.82, "grad_norm": 0.06477632373571396, "learning_rate": 1.1001320662913206e-06, "loss": 0.0278, "step": 795825 }, { "epoch": 7.82, "grad_norm": 2.0620298385620117, "learning_rate": 1.100007943837072e-06, "loss": 0.0636, "step": 795850 }, { "epoch": 7.83, "grad_norm": 1.0815632343292236, "learning_rate": 1.0998838213828234e-06, "loss": 0.0134, "step": 795875 }, { "epoch": 7.83, "grad_norm": 1.7258968353271484, "learning_rate": 1.099759698928575e-06, "loss": 0.0435, "step": 795900 }, { "epoch": 7.83, "grad_norm": 0.09959863871335983, "learning_rate": 1.0996355764743267e-06, "loss": 0.0188, "step": 795925 }, { "epoch": 7.83, "grad_norm": 2.3993940353393555, "learning_rate": 1.0995114540200781e-06, "loss": 0.0528, "step": 795950 }, { "epoch": 7.83, "grad_norm": 9.7086820602417, "learning_rate": 1.0993873315658296e-06, "loss": 0.0299, "step": 795975 }, { "epoch": 7.83, "grad_norm": 4.919755935668945, "learning_rate": 1.0992632091115812e-06, "loss": 0.0654, "step": 796000 }, { "epoch": 7.83, "grad_norm": 0.3483085334300995, "learning_rate": 1.0991390866573328e-06, "loss": 0.0211, "step": 796025 }, { "epoch": 7.83, "grad_norm": 4.0866498947143555, "learning_rate": 1.0990149642030842e-06, "loss": 0.0387, "step": 796050 }, { "epoch": 7.83, "grad_norm": 16.556556701660156, "learning_rate": 1.0988908417488359e-06, "loss": 0.0252, "step": 796075 }, { "epoch": 7.83, "grad_norm": 1.6339406967163086, "learning_rate": 1.0987667192945873e-06, "loss": 0.056, "step": 796100 }, { "epoch": 7.83, "grad_norm": 9.60170841217041, "learning_rate": 1.098642596840339e-06, "loss": 0.0152, "step": 796125 }, { "epoch": 7.83, "grad_norm": 1.8886250257492065, "learning_rate": 1.0985184743860906e-06, "loss": 0.0567, "step": 796150 }, { "epoch": 7.83, "grad_norm": 6.120396137237549, "learning_rate": 1.098394351931842e-06, "loss": 0.0165, "step": 796175 }, { "epoch": 7.83, "grad_norm": 4.17319393157959, "learning_rate": 1.0982702294775934e-06, "loss": 0.0376, "step": 796200 }, { "epoch": 7.83, "grad_norm": 0.17401085793972015, "learning_rate": 1.098146107023345e-06, "loss": 0.0148, "step": 796225 }, { "epoch": 7.83, "grad_norm": 2.4918642044067383, "learning_rate": 1.0980219845690967e-06, "loss": 0.0597, "step": 796250 }, { "epoch": 7.83, "grad_norm": 15.563528060913086, "learning_rate": 1.0978978621148481e-06, "loss": 0.0197, "step": 796275 }, { "epoch": 7.83, "grad_norm": 2.797131061553955, "learning_rate": 1.0977737396605995e-06, "loss": 0.0669, "step": 796300 }, { "epoch": 7.83, "grad_norm": 1.7196357250213623, "learning_rate": 1.0976496172063512e-06, "loss": 0.0125, "step": 796325 }, { "epoch": 7.83, "grad_norm": 4.266289234161377, "learning_rate": 1.0975254947521028e-06, "loss": 0.0742, "step": 796350 }, { "epoch": 7.83, "grad_norm": 12.688620567321777, "learning_rate": 1.0974013722978542e-06, "loss": 0.028, "step": 796375 }, { "epoch": 7.83, "grad_norm": 3.87129282951355, "learning_rate": 1.0972772498436057e-06, "loss": 0.0938, "step": 796400 }, { "epoch": 7.83, "grad_norm": 2.216198205947876, "learning_rate": 1.0971531273893573e-06, "loss": 0.0227, "step": 796425 }, { "epoch": 7.83, "grad_norm": 5.747567176818848, "learning_rate": 1.097029004935109e-06, "loss": 0.0626, "step": 796450 }, { "epoch": 7.83, "grad_norm": 6.471340179443359, "learning_rate": 1.0969048824808603e-06, "loss": 0.03, "step": 796475 }, { "epoch": 7.83, "grad_norm": 10.8690185546875, "learning_rate": 1.096780760026612e-06, "loss": 0.0707, "step": 796500 }, { "epoch": 7.83, "grad_norm": 0.10111987590789795, "learning_rate": 1.0966566375723634e-06, "loss": 0.0103, "step": 796525 }, { "epoch": 7.83, "grad_norm": 4.197123050689697, "learning_rate": 1.096532515118115e-06, "loss": 0.051, "step": 796550 }, { "epoch": 7.83, "grad_norm": 1.641063928604126, "learning_rate": 1.0964083926638667e-06, "loss": 0.0252, "step": 796575 }, { "epoch": 7.83, "grad_norm": 2.8868937492370605, "learning_rate": 1.096284270209618e-06, "loss": 0.0583, "step": 796600 }, { "epoch": 7.83, "grad_norm": 3.4563188552856445, "learning_rate": 1.0961601477553695e-06, "loss": 0.0198, "step": 796625 }, { "epoch": 7.83, "grad_norm": 3.1495447158813477, "learning_rate": 1.0960360253011212e-06, "loss": 0.0515, "step": 796650 }, { "epoch": 7.83, "grad_norm": 7.862451076507568, "learning_rate": 1.0959119028468728e-06, "loss": 0.0245, "step": 796675 }, { "epoch": 7.83, "grad_norm": 2.655604600906372, "learning_rate": 1.0957877803926242e-06, "loss": 0.0521, "step": 796700 }, { "epoch": 7.83, "grad_norm": 1.2714061737060547, "learning_rate": 1.0956636579383756e-06, "loss": 0.0257, "step": 796725 }, { "epoch": 7.83, "grad_norm": 3.0813674926757812, "learning_rate": 1.0955395354841273e-06, "loss": 0.0618, "step": 796750 }, { "epoch": 7.83, "grad_norm": 4.211644172668457, "learning_rate": 1.095415413029879e-06, "loss": 0.0212, "step": 796775 }, { "epoch": 7.83, "grad_norm": 2.096189260482788, "learning_rate": 1.0952912905756303e-06, "loss": 0.0659, "step": 796800 }, { "epoch": 7.83, "grad_norm": 0.48366689682006836, "learning_rate": 1.0951671681213818e-06, "loss": 0.0233, "step": 796825 }, { "epoch": 7.83, "grad_norm": 3.4706339836120605, "learning_rate": 1.0950430456671334e-06, "loss": 0.0532, "step": 796850 }, { "epoch": 7.84, "grad_norm": 4.253726482391357, "learning_rate": 1.094918923212885e-06, "loss": 0.0203, "step": 796875 }, { "epoch": 7.84, "grad_norm": 4.277647495269775, "learning_rate": 1.0947948007586364e-06, "loss": 0.0292, "step": 796900 }, { "epoch": 7.84, "grad_norm": 1.4527885913848877, "learning_rate": 1.094670678304388e-06, "loss": 0.0211, "step": 796925 }, { "epoch": 7.84, "grad_norm": 3.1745264530181885, "learning_rate": 1.0945465558501395e-06, "loss": 0.0508, "step": 796950 }, { "epoch": 7.84, "grad_norm": 0.7989071011543274, "learning_rate": 1.0944224333958911e-06, "loss": 0.0237, "step": 796975 }, { "epoch": 7.84, "grad_norm": 3.4814608097076416, "learning_rate": 1.0942983109416428e-06, "loss": 0.0496, "step": 797000 }, { "epoch": 7.84, "grad_norm": 0.040432192385196686, "learning_rate": 1.0941741884873942e-06, "loss": 0.0157, "step": 797025 }, { "epoch": 7.84, "grad_norm": 3.9836974143981934, "learning_rate": 1.0940500660331458e-06, "loss": 0.0743, "step": 797050 }, { "epoch": 7.84, "grad_norm": 1.1857264041900635, "learning_rate": 1.0939259435788973e-06, "loss": 0.02, "step": 797075 }, { "epoch": 7.84, "grad_norm": 2.1674089431762695, "learning_rate": 1.0938018211246489e-06, "loss": 0.0351, "step": 797100 }, { "epoch": 7.84, "grad_norm": 2.4011776447296143, "learning_rate": 1.0936776986704003e-06, "loss": 0.0075, "step": 797125 }, { "epoch": 7.84, "grad_norm": 4.785344123840332, "learning_rate": 1.093553576216152e-06, "loss": 0.0744, "step": 797150 }, { "epoch": 7.84, "grad_norm": 0.18346530199050903, "learning_rate": 1.0934294537619036e-06, "loss": 0.017, "step": 797175 }, { "epoch": 7.84, "grad_norm": 2.7291419506073, "learning_rate": 1.093305331307655e-06, "loss": 0.053, "step": 797200 }, { "epoch": 7.84, "grad_norm": 1.3900386095046997, "learning_rate": 1.0931812088534064e-06, "loss": 0.0141, "step": 797225 }, { "epoch": 7.84, "grad_norm": 3.3599040508270264, "learning_rate": 1.093057086399158e-06, "loss": 0.0502, "step": 797250 }, { "epoch": 7.84, "grad_norm": 4.727325439453125, "learning_rate": 1.0929329639449097e-06, "loss": 0.0273, "step": 797275 }, { "epoch": 7.84, "grad_norm": 3.334531545639038, "learning_rate": 1.0928088414906611e-06, "loss": 0.0345, "step": 797300 }, { "epoch": 7.84, "grad_norm": 8.945487976074219, "learning_rate": 1.0926847190364126e-06, "loss": 0.0243, "step": 797325 }, { "epoch": 7.84, "grad_norm": 4.1536664962768555, "learning_rate": 1.0925605965821642e-06, "loss": 0.0467, "step": 797350 }, { "epoch": 7.84, "grad_norm": 0.6225927472114563, "learning_rate": 1.0924364741279158e-06, "loss": 0.016, "step": 797375 }, { "epoch": 7.84, "grad_norm": 3.7116830348968506, "learning_rate": 1.0923123516736672e-06, "loss": 0.0731, "step": 797400 }, { "epoch": 7.84, "grad_norm": 1.7906402349472046, "learning_rate": 1.0921882292194189e-06, "loss": 0.0257, "step": 797425 }, { "epoch": 7.84, "grad_norm": 2.799489974975586, "learning_rate": 1.0920641067651703e-06, "loss": 0.0562, "step": 797450 }, { "epoch": 7.84, "grad_norm": 0.6958701610565186, "learning_rate": 1.091939984310922e-06, "loss": 0.0138, "step": 797475 }, { "epoch": 7.84, "grad_norm": 4.614925861358643, "learning_rate": 1.0918158618566734e-06, "loss": 0.0463, "step": 797500 }, { "epoch": 7.84, "grad_norm": 2.038463830947876, "learning_rate": 1.091691739402425e-06, "loss": 0.0082, "step": 797525 }, { "epoch": 7.84, "grad_norm": 3.5252668857574463, "learning_rate": 1.0915676169481764e-06, "loss": 0.0473, "step": 797550 }, { "epoch": 7.84, "grad_norm": 0.7922297716140747, "learning_rate": 1.091443494493928e-06, "loss": 0.017, "step": 797575 }, { "epoch": 7.84, "grad_norm": 2.6778459548950195, "learning_rate": 1.0913193720396797e-06, "loss": 0.0637, "step": 797600 }, { "epoch": 7.84, "grad_norm": 0.5343368649482727, "learning_rate": 1.0911952495854311e-06, "loss": 0.0118, "step": 797625 }, { "epoch": 7.84, "grad_norm": 2.238157033920288, "learning_rate": 1.0910711271311825e-06, "loss": 0.0669, "step": 797650 }, { "epoch": 7.84, "grad_norm": 5.527296543121338, "learning_rate": 1.0909470046769342e-06, "loss": 0.0167, "step": 797675 }, { "epoch": 7.84, "grad_norm": 2.222323179244995, "learning_rate": 1.0908228822226858e-06, "loss": 0.0438, "step": 797700 }, { "epoch": 7.84, "grad_norm": 0.25648584961891174, "learning_rate": 1.0906987597684372e-06, "loss": 0.0131, "step": 797725 }, { "epoch": 7.84, "grad_norm": 3.420788288116455, "learning_rate": 1.0905746373141887e-06, "loss": 0.0614, "step": 797750 }, { "epoch": 7.84, "grad_norm": 0.4138597249984741, "learning_rate": 1.0904505148599403e-06, "loss": 0.0202, "step": 797775 }, { "epoch": 7.84, "grad_norm": 3.953188180923462, "learning_rate": 1.090326392405692e-06, "loss": 0.0538, "step": 797800 }, { "epoch": 7.84, "grad_norm": 0.07253990322351456, "learning_rate": 1.0902022699514433e-06, "loss": 0.0191, "step": 797825 }, { "epoch": 7.84, "grad_norm": 3.7627780437469482, "learning_rate": 1.090078147497195e-06, "loss": 0.0591, "step": 797850 }, { "epoch": 7.84, "grad_norm": 2.095644235610962, "learning_rate": 1.0899540250429464e-06, "loss": 0.0165, "step": 797875 }, { "epoch": 7.85, "grad_norm": 3.034993886947632, "learning_rate": 1.089829902588698e-06, "loss": 0.0603, "step": 797900 }, { "epoch": 7.85, "grad_norm": 17.03717803955078, "learning_rate": 1.0897057801344495e-06, "loss": 0.0235, "step": 797925 }, { "epoch": 7.85, "grad_norm": 2.3626279830932617, "learning_rate": 1.089581657680201e-06, "loss": 0.0598, "step": 797950 }, { "epoch": 7.85, "grad_norm": 6.3340067863464355, "learning_rate": 1.0894575352259525e-06, "loss": 0.0239, "step": 797975 }, { "epoch": 7.85, "grad_norm": 3.356738567352295, "learning_rate": 1.0893334127717042e-06, "loss": 0.0378, "step": 798000 }, { "epoch": 7.85, "grad_norm": 8.048087120056152, "learning_rate": 1.0892092903174558e-06, "loss": 0.0223, "step": 798025 }, { "epoch": 7.85, "grad_norm": 3.5986251831054688, "learning_rate": 1.0890851678632072e-06, "loss": 0.0362, "step": 798050 }, { "epoch": 7.85, "grad_norm": 3.220731258392334, "learning_rate": 1.0889610454089586e-06, "loss": 0.0311, "step": 798075 }, { "epoch": 7.85, "grad_norm": 3.003861427307129, "learning_rate": 1.0888369229547103e-06, "loss": 0.0615, "step": 798100 }, { "epoch": 7.85, "grad_norm": 2.0161643028259277, "learning_rate": 1.088712800500462e-06, "loss": 0.0143, "step": 798125 }, { "epoch": 7.85, "grad_norm": 2.7954633235931396, "learning_rate": 1.0885886780462133e-06, "loss": 0.0467, "step": 798150 }, { "epoch": 7.85, "grad_norm": 0.11540629714727402, "learning_rate": 1.0884645555919648e-06, "loss": 0.023, "step": 798175 }, { "epoch": 7.85, "grad_norm": 3.6739094257354736, "learning_rate": 1.0883404331377164e-06, "loss": 0.0443, "step": 798200 }, { "epoch": 7.85, "grad_norm": 4.770630359649658, "learning_rate": 1.088216310683468e-06, "loss": 0.0184, "step": 798225 }, { "epoch": 7.85, "grad_norm": 5.316059112548828, "learning_rate": 1.0880921882292194e-06, "loss": 0.0721, "step": 798250 }, { "epoch": 7.85, "grad_norm": 8.08474349975586, "learning_rate": 1.087968065774971e-06, "loss": 0.0213, "step": 798275 }, { "epoch": 7.85, "grad_norm": 3.561228036880493, "learning_rate": 1.0878439433207225e-06, "loss": 0.0571, "step": 798300 }, { "epoch": 7.85, "grad_norm": 14.455503463745117, "learning_rate": 1.0877198208664741e-06, "loss": 0.0308, "step": 798325 }, { "epoch": 7.85, "grad_norm": 5.442323207855225, "learning_rate": 1.0875956984122256e-06, "loss": 0.0525, "step": 798350 }, { "epoch": 7.85, "grad_norm": 2.38761568069458, "learning_rate": 1.0874715759579772e-06, "loss": 0.0192, "step": 798375 }, { "epoch": 7.85, "grad_norm": 4.178064823150635, "learning_rate": 1.0873474535037286e-06, "loss": 0.0563, "step": 798400 }, { "epoch": 7.85, "grad_norm": 9.040637969970703, "learning_rate": 1.0872233310494803e-06, "loss": 0.0202, "step": 798425 }, { "epoch": 7.85, "grad_norm": 2.1465232372283936, "learning_rate": 1.0870992085952319e-06, "loss": 0.0343, "step": 798450 }, { "epoch": 7.85, "grad_norm": 10.067225456237793, "learning_rate": 1.0869750861409833e-06, "loss": 0.0247, "step": 798475 }, { "epoch": 7.85, "grad_norm": 2.738560914993286, "learning_rate": 1.086850963686735e-06, "loss": 0.0504, "step": 798500 }, { "epoch": 7.85, "grad_norm": 5.775944232940674, "learning_rate": 1.0867268412324866e-06, "loss": 0.0207, "step": 798525 }, { "epoch": 7.85, "grad_norm": 2.920914649963379, "learning_rate": 1.086602718778238e-06, "loss": 0.0705, "step": 798550 }, { "epoch": 7.85, "grad_norm": 5.723874568939209, "learning_rate": 1.0864785963239894e-06, "loss": 0.012, "step": 798575 }, { "epoch": 7.85, "grad_norm": 5.004447937011719, "learning_rate": 1.086354473869741e-06, "loss": 0.0647, "step": 798600 }, { "epoch": 7.85, "grad_norm": 5.846856594085693, "learning_rate": 1.0862303514154927e-06, "loss": 0.0319, "step": 798625 }, { "epoch": 7.85, "grad_norm": 3.7126455307006836, "learning_rate": 1.0861062289612441e-06, "loss": 0.0568, "step": 798650 }, { "epoch": 7.85, "grad_norm": 10.671330451965332, "learning_rate": 1.0859821065069955e-06, "loss": 0.022, "step": 798675 }, { "epoch": 7.85, "grad_norm": 3.290480136871338, "learning_rate": 1.0858579840527472e-06, "loss": 0.0755, "step": 798700 }, { "epoch": 7.85, "grad_norm": 0.3907349109649658, "learning_rate": 1.0857338615984988e-06, "loss": 0.0229, "step": 798725 }, { "epoch": 7.85, "grad_norm": 3.431631088256836, "learning_rate": 1.0856097391442502e-06, "loss": 0.0618, "step": 798750 }, { "epoch": 7.85, "grad_norm": 1.6972882747650146, "learning_rate": 1.0854856166900017e-06, "loss": 0.0108, "step": 798775 }, { "epoch": 7.85, "grad_norm": 3.028376579284668, "learning_rate": 1.0853614942357533e-06, "loss": 0.0466, "step": 798800 }, { "epoch": 7.85, "grad_norm": 0.3761840760707855, "learning_rate": 1.085237371781505e-06, "loss": 0.0185, "step": 798825 }, { "epoch": 7.85, "grad_norm": 4.663626670837402, "learning_rate": 1.0851132493272564e-06, "loss": 0.0449, "step": 798850 }, { "epoch": 7.85, "grad_norm": 6.847384929656982, "learning_rate": 1.084989126873008e-06, "loss": 0.0339, "step": 798875 }, { "epoch": 7.85, "grad_norm": 4.367579460144043, "learning_rate": 1.0848650044187594e-06, "loss": 0.0604, "step": 798900 }, { "epoch": 7.86, "grad_norm": 3.244192600250244, "learning_rate": 1.084740881964511e-06, "loss": 0.0118, "step": 798925 }, { "epoch": 7.86, "grad_norm": 3.643726348876953, "learning_rate": 1.0846167595102627e-06, "loss": 0.0946, "step": 798950 }, { "epoch": 7.86, "grad_norm": 6.560542583465576, "learning_rate": 1.084492637056014e-06, "loss": 0.0229, "step": 798975 }, { "epoch": 7.86, "grad_norm": 3.0995423793792725, "learning_rate": 1.0843734794999356e-06, "loss": 0.0564, "step": 799000 }, { "epoch": 7.86, "grad_norm": 0.27676916122436523, "learning_rate": 1.084249357045687e-06, "loss": 0.0141, "step": 799025 }, { "epoch": 7.86, "grad_norm": 0.9734936356544495, "learning_rate": 1.0841252345914386e-06, "loss": 0.0474, "step": 799050 }, { "epoch": 7.86, "grad_norm": 0.02944021299481392, "learning_rate": 1.0840011121371902e-06, "loss": 0.0145, "step": 799075 }, { "epoch": 7.86, "grad_norm": 3.669633626937866, "learning_rate": 1.0838769896829417e-06, "loss": 0.0617, "step": 799100 }, { "epoch": 7.86, "grad_norm": 1.307680606842041, "learning_rate": 1.083752867228693e-06, "loss": 0.0249, "step": 799125 }, { "epoch": 7.86, "grad_norm": 3.225062370300293, "learning_rate": 1.0836287447744447e-06, "loss": 0.0536, "step": 799150 }, { "epoch": 7.86, "grad_norm": 8.031790733337402, "learning_rate": 1.0835046223201964e-06, "loss": 0.0196, "step": 799175 }, { "epoch": 7.86, "grad_norm": 4.584626197814941, "learning_rate": 1.0833804998659478e-06, "loss": 0.0522, "step": 799200 }, { "epoch": 7.86, "grad_norm": 0.02708553895354271, "learning_rate": 1.0832563774116994e-06, "loss": 0.0198, "step": 799225 }, { "epoch": 7.86, "grad_norm": 1.6849292516708374, "learning_rate": 1.0831322549574508e-06, "loss": 0.0723, "step": 799250 }, { "epoch": 7.86, "grad_norm": 6.8874406814575195, "learning_rate": 1.0830081325032025e-06, "loss": 0.0151, "step": 799275 }, { "epoch": 7.86, "grad_norm": 3.5348551273345947, "learning_rate": 1.0828840100489541e-06, "loss": 0.0565, "step": 799300 }, { "epoch": 7.86, "grad_norm": 2.625105381011963, "learning_rate": 1.0827598875947055e-06, "loss": 0.0213, "step": 799325 }, { "epoch": 7.86, "grad_norm": 2.748854398727417, "learning_rate": 1.082635765140457e-06, "loss": 0.078, "step": 799350 }, { "epoch": 7.86, "grad_norm": 0.028522852808237076, "learning_rate": 1.0825116426862086e-06, "loss": 0.0138, "step": 799375 }, { "epoch": 7.86, "grad_norm": 1.8434908390045166, "learning_rate": 1.0823875202319602e-06, "loss": 0.0543, "step": 799400 }, { "epoch": 7.86, "grad_norm": 5.474796295166016, "learning_rate": 1.0822633977777117e-06, "loss": 0.0273, "step": 799425 }, { "epoch": 7.86, "grad_norm": 2.050673484802246, "learning_rate": 1.082139275323463e-06, "loss": 0.0652, "step": 799450 }, { "epoch": 7.86, "grad_norm": 1.3938679695129395, "learning_rate": 1.0820151528692147e-06, "loss": 0.0218, "step": 799475 }, { "epoch": 7.86, "grad_norm": 3.1731603145599365, "learning_rate": 1.0818910304149664e-06, "loss": 0.0511, "step": 799500 }, { "epoch": 7.86, "grad_norm": 9.163243293762207, "learning_rate": 1.0817669079607178e-06, "loss": 0.0153, "step": 799525 }, { "epoch": 7.86, "grad_norm": 4.816813945770264, "learning_rate": 1.0816427855064692e-06, "loss": 0.0427, "step": 799550 }, { "epoch": 7.86, "grad_norm": 0.6563768982887268, "learning_rate": 1.0815186630522208e-06, "loss": 0.0112, "step": 799575 }, { "epoch": 7.86, "grad_norm": 2.544304847717285, "learning_rate": 1.0813945405979725e-06, "loss": 0.0591, "step": 799600 }, { "epoch": 7.86, "grad_norm": 1.0104175806045532, "learning_rate": 1.0812704181437239e-06, "loss": 0.0177, "step": 799625 }, { "epoch": 7.86, "grad_norm": 2.8910908699035645, "learning_rate": 1.0811462956894755e-06, "loss": 0.0464, "step": 799650 }, { "epoch": 7.86, "grad_norm": 0.4187540113925934, "learning_rate": 1.081022173235227e-06, "loss": 0.0277, "step": 799675 }, { "epoch": 7.86, "grad_norm": 7.214015483856201, "learning_rate": 1.0808980507809786e-06, "loss": 0.0754, "step": 799700 }, { "epoch": 7.86, "grad_norm": 6.376769542694092, "learning_rate": 1.0807739283267302e-06, "loss": 0.021, "step": 799725 }, { "epoch": 7.86, "grad_norm": 3.648817539215088, "learning_rate": 1.0806498058724816e-06, "loss": 0.0477, "step": 799750 }, { "epoch": 7.86, "grad_norm": 0.07678840309381485, "learning_rate": 1.080525683418233e-06, "loss": 0.0184, "step": 799775 }, { "epoch": 7.86, "grad_norm": 4.1083173751831055, "learning_rate": 1.0804015609639847e-06, "loss": 0.0445, "step": 799800 }, { "epoch": 7.86, "grad_norm": 3.8881072998046875, "learning_rate": 1.0802774385097363e-06, "loss": 0.017, "step": 799825 }, { "epoch": 7.86, "grad_norm": 5.836214065551758, "learning_rate": 1.0801533160554878e-06, "loss": 0.0692, "step": 799850 }, { "epoch": 7.86, "grad_norm": 3.1261022090911865, "learning_rate": 1.0800291936012392e-06, "loss": 0.0266, "step": 799875 }, { "epoch": 7.86, "grad_norm": 2.460012435913086, "learning_rate": 1.0799050711469908e-06, "loss": 0.0486, "step": 799900 }, { "epoch": 7.86, "grad_norm": 0.11614478379487991, "learning_rate": 1.0797809486927425e-06, "loss": 0.0289, "step": 799925 }, { "epoch": 7.87, "grad_norm": 3.2043211460113525, "learning_rate": 1.0796568262384939e-06, "loss": 0.0821, "step": 799950 }, { "epoch": 7.87, "grad_norm": 2.3874030113220215, "learning_rate": 1.0795327037842455e-06, "loss": 0.0114, "step": 799975 }, { "epoch": 7.87, "grad_norm": 1.929667353630066, "learning_rate": 1.0794085813299971e-06, "loss": 0.0578, "step": 800000 }, { "epoch": 7.87, "eval_loss": 0.9059613943099976, "eval_runtime": 6085.7386, "eval_samples_per_second": 1.556, "eval_steps_per_second": 0.195, "eval_wer": 0.1124058456190047, "step": 800000 }, { "epoch": 7.87, "grad_norm": 2.901763677597046, "learning_rate": 1.0792844588757486e-06, "loss": 0.0231, "step": 800025 }, { "epoch": 7.87, "grad_norm": 4.052356719970703, "learning_rate": 1.0791603364215e-06, "loss": 0.0653, "step": 800050 }, { "epoch": 7.87, "grad_norm": 1.7805085182189941, "learning_rate": 1.0790362139672516e-06, "loss": 0.0213, "step": 800075 }, { "epoch": 7.87, "grad_norm": 3.6594884395599365, "learning_rate": 1.0789120915130033e-06, "loss": 0.0479, "step": 800100 }, { "epoch": 7.87, "grad_norm": 1.4006999731063843, "learning_rate": 1.0787879690587547e-06, "loss": 0.0152, "step": 800125 }, { "epoch": 7.87, "grad_norm": 3.4238739013671875, "learning_rate": 1.0786638466045063e-06, "loss": 0.0473, "step": 800150 }, { "epoch": 7.87, "grad_norm": 0.005256311036646366, "learning_rate": 1.0785397241502577e-06, "loss": 0.0188, "step": 800175 }, { "epoch": 7.87, "grad_norm": 3.527697801589966, "learning_rate": 1.0784156016960094e-06, "loss": 0.0413, "step": 800200 }, { "epoch": 7.87, "grad_norm": 0.9310088753700256, "learning_rate": 1.0782914792417608e-06, "loss": 0.0111, "step": 800225 }, { "epoch": 7.87, "grad_norm": 2.592250347137451, "learning_rate": 1.0781673567875124e-06, "loss": 0.0657, "step": 800250 }, { "epoch": 7.87, "grad_norm": 0.24566270411014557, "learning_rate": 1.0780432343332639e-06, "loss": 0.0214, "step": 800275 }, { "epoch": 7.87, "grad_norm": 8.719167709350586, "learning_rate": 1.0779191118790155e-06, "loss": 0.0721, "step": 800300 }, { "epoch": 7.87, "grad_norm": 5.991993427276611, "learning_rate": 1.0777949894247671e-06, "loss": 0.0247, "step": 800325 }, { "epoch": 7.87, "grad_norm": 3.003155469894409, "learning_rate": 1.0776708669705186e-06, "loss": 0.0462, "step": 800350 }, { "epoch": 7.87, "grad_norm": 3.4026939868927, "learning_rate": 1.07754674451627e-06, "loss": 0.0126, "step": 800375 }, { "epoch": 7.87, "grad_norm": 5.0663862228393555, "learning_rate": 1.0774226220620216e-06, "loss": 0.054, "step": 800400 }, { "epoch": 7.87, "grad_norm": 12.472102165222168, "learning_rate": 1.0772984996077732e-06, "loss": 0.0136, "step": 800425 }, { "epoch": 7.87, "grad_norm": 3.167726755142212, "learning_rate": 1.0771743771535247e-06, "loss": 0.0447, "step": 800450 }, { "epoch": 7.87, "grad_norm": 5.422735691070557, "learning_rate": 1.077050254699276e-06, "loss": 0.0181, "step": 800475 }, { "epoch": 7.87, "grad_norm": 4.3985419273376465, "learning_rate": 1.0769261322450277e-06, "loss": 0.0529, "step": 800500 }, { "epoch": 7.87, "grad_norm": 2.311220169067383, "learning_rate": 1.0768020097907794e-06, "loss": 0.0155, "step": 800525 }, { "epoch": 7.87, "grad_norm": 2.8518621921539307, "learning_rate": 1.0766778873365308e-06, "loss": 0.0664, "step": 800550 }, { "epoch": 7.87, "grad_norm": 3.8237175941467285, "learning_rate": 1.0765537648822824e-06, "loss": 0.0134, "step": 800575 }, { "epoch": 7.87, "grad_norm": 2.46844220161438, "learning_rate": 1.0764296424280338e-06, "loss": 0.0603, "step": 800600 }, { "epoch": 7.87, "grad_norm": 1.8698493242263794, "learning_rate": 1.0763055199737855e-06, "loss": 0.02, "step": 800625 }, { "epoch": 7.87, "grad_norm": 4.489534854888916, "learning_rate": 1.076181397519537e-06, "loss": 0.0609, "step": 800650 }, { "epoch": 7.87, "grad_norm": 0.23697534203529358, "learning_rate": 1.0760572750652885e-06, "loss": 0.023, "step": 800675 }, { "epoch": 7.87, "grad_norm": 2.821990728378296, "learning_rate": 1.07593315261104e-06, "loss": 0.0859, "step": 800700 }, { "epoch": 7.87, "grad_norm": 0.05667561665177345, "learning_rate": 1.0758090301567916e-06, "loss": 0.0141, "step": 800725 }, { "epoch": 7.87, "grad_norm": 3.431089162826538, "learning_rate": 1.0756849077025432e-06, "loss": 0.0617, "step": 800750 }, { "epoch": 7.87, "grad_norm": 15.574113845825195, "learning_rate": 1.0755607852482947e-06, "loss": 0.0188, "step": 800775 }, { "epoch": 7.87, "grad_norm": 3.0855724811553955, "learning_rate": 1.075436662794046e-06, "loss": 0.0687, "step": 800800 }, { "epoch": 7.87, "grad_norm": 0.7022401690483093, "learning_rate": 1.0753125403397977e-06, "loss": 0.035, "step": 800825 }, { "epoch": 7.87, "grad_norm": 3.5264410972595215, "learning_rate": 1.0751884178855493e-06, "loss": 0.0968, "step": 800850 }, { "epoch": 7.87, "grad_norm": 2.6642515659332275, "learning_rate": 1.0750642954313008e-06, "loss": 0.0245, "step": 800875 }, { "epoch": 7.87, "grad_norm": 4.101685523986816, "learning_rate": 1.0749401729770522e-06, "loss": 0.0573, "step": 800900 }, { "epoch": 7.87, "grad_norm": 3.7435402870178223, "learning_rate": 1.0748160505228038e-06, "loss": 0.0181, "step": 800925 }, { "epoch": 7.88, "grad_norm": 4.305859565734863, "learning_rate": 1.0746919280685555e-06, "loss": 0.0476, "step": 800950 }, { "epoch": 7.88, "grad_norm": 3.710970163345337, "learning_rate": 1.0745678056143069e-06, "loss": 0.0265, "step": 800975 }, { "epoch": 7.88, "grad_norm": 2.5537519454956055, "learning_rate": 1.0744436831600585e-06, "loss": 0.0616, "step": 801000 }, { "epoch": 7.88, "grad_norm": 8.907282829284668, "learning_rate": 1.07431956070581e-06, "loss": 0.0175, "step": 801025 }, { "epoch": 7.88, "grad_norm": 4.309966564178467, "learning_rate": 1.0741954382515616e-06, "loss": 0.0629, "step": 801050 }, { "epoch": 7.88, "grad_norm": 5.915927410125732, "learning_rate": 1.074071315797313e-06, "loss": 0.0263, "step": 801075 }, { "epoch": 7.88, "grad_norm": 3.638904333114624, "learning_rate": 1.0739471933430646e-06, "loss": 0.0743, "step": 801100 }, { "epoch": 7.88, "grad_norm": 3.232783079147339, "learning_rate": 1.073823070888816e-06, "loss": 0.0224, "step": 801125 }, { "epoch": 7.88, "grad_norm": 4.279238700866699, "learning_rate": 1.0736989484345677e-06, "loss": 0.0485, "step": 801150 }, { "epoch": 7.88, "grad_norm": 1.8278570175170898, "learning_rate": 1.0735748259803193e-06, "loss": 0.0152, "step": 801175 }, { "epoch": 7.88, "grad_norm": 2.1933515071868896, "learning_rate": 1.0734507035260708e-06, "loss": 0.0326, "step": 801200 }, { "epoch": 7.88, "grad_norm": 0.2923356592655182, "learning_rate": 1.0733265810718222e-06, "loss": 0.0254, "step": 801225 }, { "epoch": 7.88, "grad_norm": 2.7220003604888916, "learning_rate": 1.0732024586175738e-06, "loss": 0.0531, "step": 801250 }, { "epoch": 7.88, "grad_norm": 9.891385078430176, "learning_rate": 1.0730783361633254e-06, "loss": 0.0243, "step": 801275 }, { "epoch": 7.88, "grad_norm": 3.046422004699707, "learning_rate": 1.0729542137090769e-06, "loss": 0.0848, "step": 801300 }, { "epoch": 7.88, "grad_norm": 1.752748727798462, "learning_rate": 1.0728300912548285e-06, "loss": 0.0127, "step": 801325 }, { "epoch": 7.88, "grad_norm": 3.784536123275757, "learning_rate": 1.0727059688005801e-06, "loss": 0.0788, "step": 801350 }, { "epoch": 7.88, "grad_norm": 67.96814727783203, "learning_rate": 1.0725818463463316e-06, "loss": 0.0309, "step": 801375 }, { "epoch": 7.88, "grad_norm": 4.218774318695068, "learning_rate": 1.072457723892083e-06, "loss": 0.0754, "step": 801400 }, { "epoch": 7.88, "grad_norm": 0.22022745013237, "learning_rate": 1.0723336014378346e-06, "loss": 0.0135, "step": 801425 }, { "epoch": 7.88, "grad_norm": 3.913485050201416, "learning_rate": 1.0722094789835863e-06, "loss": 0.0522, "step": 801450 }, { "epoch": 7.88, "grad_norm": 0.032321229577064514, "learning_rate": 1.0720853565293377e-06, "loss": 0.03, "step": 801475 }, { "epoch": 7.88, "grad_norm": 2.359602689743042, "learning_rate": 1.071961234075089e-06, "loss": 0.0738, "step": 801500 }, { "epoch": 7.88, "grad_norm": 0.49556809663772583, "learning_rate": 1.0718371116208407e-06, "loss": 0.0172, "step": 801525 }, { "epoch": 7.88, "grad_norm": 2.618471384048462, "learning_rate": 1.0717129891665924e-06, "loss": 0.0465, "step": 801550 }, { "epoch": 7.88, "grad_norm": 0.20368479192256927, "learning_rate": 1.0715888667123438e-06, "loss": 0.0197, "step": 801575 }, { "epoch": 7.88, "grad_norm": 2.9464988708496094, "learning_rate": 1.0714647442580954e-06, "loss": 0.0494, "step": 801600 }, { "epoch": 7.88, "grad_norm": 0.10662752389907837, "learning_rate": 1.0713406218038469e-06, "loss": 0.0166, "step": 801625 }, { "epoch": 7.88, "grad_norm": 3.8695061206817627, "learning_rate": 1.0712164993495985e-06, "loss": 0.0609, "step": 801650 }, { "epoch": 7.88, "grad_norm": 0.0679759606719017, "learning_rate": 1.0710923768953501e-06, "loss": 0.0149, "step": 801675 }, { "epoch": 7.88, "grad_norm": 2.604902982711792, "learning_rate": 1.0709682544411015e-06, "loss": 0.0653, "step": 801700 }, { "epoch": 7.88, "grad_norm": 0.43494078516960144, "learning_rate": 1.070844131986853e-06, "loss": 0.02, "step": 801725 }, { "epoch": 7.88, "grad_norm": 2.6687796115875244, "learning_rate": 1.0707200095326046e-06, "loss": 0.0698, "step": 801750 }, { "epoch": 7.88, "grad_norm": 0.08293961733579636, "learning_rate": 1.0705958870783562e-06, "loss": 0.015, "step": 801775 }, { "epoch": 7.88, "grad_norm": 3.6660239696502686, "learning_rate": 1.0704717646241077e-06, "loss": 0.0467, "step": 801800 }, { "epoch": 7.88, "grad_norm": 1.8526993989944458, "learning_rate": 1.070347642169859e-06, "loss": 0.0275, "step": 801825 }, { "epoch": 7.88, "grad_norm": 3.330799102783203, "learning_rate": 1.0702235197156107e-06, "loss": 0.061, "step": 801850 }, { "epoch": 7.88, "grad_norm": 2.546415328979492, "learning_rate": 1.0700993972613624e-06, "loss": 0.0243, "step": 801875 }, { "epoch": 7.88, "grad_norm": 2.355246067047119, "learning_rate": 1.0699752748071138e-06, "loss": 0.0415, "step": 801900 }, { "epoch": 7.88, "grad_norm": 0.07246409356594086, "learning_rate": 1.0698511523528652e-06, "loss": 0.0308, "step": 801925 }, { "epoch": 7.88, "grad_norm": 3.0323026180267334, "learning_rate": 1.0697270298986168e-06, "loss": 0.0598, "step": 801950 }, { "epoch": 7.89, "grad_norm": 0.018379835411906242, "learning_rate": 1.0696029074443685e-06, "loss": 0.0126, "step": 801975 }, { "epoch": 7.89, "grad_norm": 3.9386863708496094, "learning_rate": 1.06947878499012e-06, "loss": 0.0395, "step": 802000 }, { "epoch": 7.89, "grad_norm": 4.693242073059082, "learning_rate": 1.0693546625358715e-06, "loss": 0.0142, "step": 802025 }, { "epoch": 7.89, "grad_norm": 2.4459004402160645, "learning_rate": 1.069230540081623e-06, "loss": 0.0483, "step": 802050 }, { "epoch": 7.89, "grad_norm": 1.2894587516784668, "learning_rate": 1.0691064176273746e-06, "loss": 0.0118, "step": 802075 }, { "epoch": 7.89, "grad_norm": 2.4437079429626465, "learning_rate": 1.0689822951731262e-06, "loss": 0.0394, "step": 802100 }, { "epoch": 7.89, "grad_norm": 1.0275006294250488, "learning_rate": 1.0688581727188776e-06, "loss": 0.0139, "step": 802125 }, { "epoch": 7.89, "grad_norm": 3.8764302730560303, "learning_rate": 1.068734050264629e-06, "loss": 0.0648, "step": 802150 }, { "epoch": 7.89, "grad_norm": 17.7020320892334, "learning_rate": 1.0686099278103807e-06, "loss": 0.0211, "step": 802175 }, { "epoch": 7.89, "grad_norm": 2.8558621406555176, "learning_rate": 1.0684858053561323e-06, "loss": 0.0612, "step": 802200 }, { "epoch": 7.89, "grad_norm": 0.043698009103536606, "learning_rate": 1.0683616829018838e-06, "loss": 0.0142, "step": 802225 }, { "epoch": 7.89, "grad_norm": 4.002659320831299, "learning_rate": 1.0682375604476352e-06, "loss": 0.0492, "step": 802250 }, { "epoch": 7.89, "grad_norm": 0.38905155658721924, "learning_rate": 1.0681134379933868e-06, "loss": 0.0133, "step": 802275 }, { "epoch": 7.89, "grad_norm": 4.290280818939209, "learning_rate": 1.0679893155391385e-06, "loss": 0.0732, "step": 802300 }, { "epoch": 7.89, "grad_norm": 0.10687465220689774, "learning_rate": 1.0678651930848899e-06, "loss": 0.0217, "step": 802325 }, { "epoch": 7.89, "grad_norm": 5.1895904541015625, "learning_rate": 1.0677410706306413e-06, "loss": 0.0727, "step": 802350 }, { "epoch": 7.89, "grad_norm": 2.007319927215576, "learning_rate": 1.067616948176393e-06, "loss": 0.0205, "step": 802375 }, { "epoch": 7.89, "grad_norm": 4.277202606201172, "learning_rate": 1.0674928257221446e-06, "loss": 0.0497, "step": 802400 }, { "epoch": 7.89, "grad_norm": 5.924382209777832, "learning_rate": 1.067368703267896e-06, "loss": 0.0256, "step": 802425 }, { "epoch": 7.89, "grad_norm": 2.0480027198791504, "learning_rate": 1.0672445808136476e-06, "loss": 0.0554, "step": 802450 }, { "epoch": 7.89, "grad_norm": 6.919567108154297, "learning_rate": 1.067120458359399e-06, "loss": 0.0293, "step": 802475 }, { "epoch": 7.89, "grad_norm": 2.8762364387512207, "learning_rate": 1.0669963359051507e-06, "loss": 0.0585, "step": 802500 }, { "epoch": 7.89, "grad_norm": 6.296191215515137, "learning_rate": 1.0668722134509023e-06, "loss": 0.0171, "step": 802525 }, { "epoch": 7.89, "grad_norm": 3.294011116027832, "learning_rate": 1.0667480909966537e-06, "loss": 0.0434, "step": 802550 }, { "epoch": 7.89, "grad_norm": 0.4519745111465454, "learning_rate": 1.0666239685424052e-06, "loss": 0.0205, "step": 802575 }, { "epoch": 7.89, "grad_norm": 3.165708065032959, "learning_rate": 1.0664998460881568e-06, "loss": 0.0606, "step": 802600 }, { "epoch": 7.89, "grad_norm": 5.099981784820557, "learning_rate": 1.0663757236339084e-06, "loss": 0.0163, "step": 802625 }, { "epoch": 7.89, "grad_norm": 3.908278465270996, "learning_rate": 1.0662516011796599e-06, "loss": 0.0609, "step": 802650 }, { "epoch": 7.89, "grad_norm": 1.2896970510482788, "learning_rate": 1.0661274787254113e-06, "loss": 0.0169, "step": 802675 }, { "epoch": 7.89, "grad_norm": 1.672661542892456, "learning_rate": 1.066003356271163e-06, "loss": 0.0656, "step": 802700 }, { "epoch": 7.89, "grad_norm": 5.520538806915283, "learning_rate": 1.0658792338169146e-06, "loss": 0.0152, "step": 802725 }, { "epoch": 7.89, "grad_norm": 3.1687936782836914, "learning_rate": 1.065755111362666e-06, "loss": 0.0447, "step": 802750 }, { "epoch": 7.89, "grad_norm": 0.2785768508911133, "learning_rate": 1.0656309889084176e-06, "loss": 0.0204, "step": 802775 }, { "epoch": 7.89, "grad_norm": 2.8269081115722656, "learning_rate": 1.0655068664541692e-06, "loss": 0.0466, "step": 802800 }, { "epoch": 7.89, "grad_norm": 0.013825230300426483, "learning_rate": 1.0653827439999207e-06, "loss": 0.0242, "step": 802825 }, { "epoch": 7.89, "grad_norm": 3.35353422164917, "learning_rate": 1.065258621545672e-06, "loss": 0.0401, "step": 802850 }, { "epoch": 7.89, "grad_norm": 4.957696437835693, "learning_rate": 1.0651344990914237e-06, "loss": 0.0238, "step": 802875 }, { "epoch": 7.89, "grad_norm": 7.688169479370117, "learning_rate": 1.0650103766371754e-06, "loss": 0.0816, "step": 802900 }, { "epoch": 7.89, "grad_norm": 4.885686874389648, "learning_rate": 1.0648862541829268e-06, "loss": 0.0209, "step": 802925 }, { "epoch": 7.89, "grad_norm": 2.548765182495117, "learning_rate": 1.0647621317286784e-06, "loss": 0.0681, "step": 802950 }, { "epoch": 7.89, "grad_norm": 1.086226224899292, "learning_rate": 1.0646380092744298e-06, "loss": 0.0152, "step": 802975 }, { "epoch": 7.9, "grad_norm": 4.255760669708252, "learning_rate": 1.0645188517183513e-06, "loss": 0.0302, "step": 803000 }, { "epoch": 7.9, "grad_norm": 0.6344292163848877, "learning_rate": 1.064394729264103e-06, "loss": 0.0249, "step": 803025 }, { "epoch": 7.9, "grad_norm": 4.013428211212158, "learning_rate": 1.0642706068098546e-06, "loss": 0.0469, "step": 803050 }, { "epoch": 7.9, "grad_norm": 16.020204544067383, "learning_rate": 1.064146484355606e-06, "loss": 0.0168, "step": 803075 }, { "epoch": 7.9, "grad_norm": 3.7657313346862793, "learning_rate": 1.0640223619013574e-06, "loss": 0.055, "step": 803100 }, { "epoch": 7.9, "grad_norm": 6.792675495147705, "learning_rate": 1.063898239447109e-06, "loss": 0.0227, "step": 803125 }, { "epoch": 7.9, "grad_norm": 2.632601737976074, "learning_rate": 1.0637790818910305e-06, "loss": 0.0595, "step": 803150 }, { "epoch": 7.9, "grad_norm": 7.754266738891602, "learning_rate": 1.0636549594367821e-06, "loss": 0.0131, "step": 803175 }, { "epoch": 7.9, "grad_norm": 3.6595747470855713, "learning_rate": 1.0635308369825336e-06, "loss": 0.0711, "step": 803200 }, { "epoch": 7.9, "grad_norm": 0.6208100914955139, "learning_rate": 1.0634067145282852e-06, "loss": 0.0287, "step": 803225 }, { "epoch": 7.9, "grad_norm": 2.3655292987823486, "learning_rate": 1.0632825920740366e-06, "loss": 0.0505, "step": 803250 }, { "epoch": 7.9, "grad_norm": 9.716523170471191, "learning_rate": 1.0631584696197883e-06, "loss": 0.0303, "step": 803275 }, { "epoch": 7.9, "grad_norm": 2.385734796524048, "learning_rate": 1.0630343471655397e-06, "loss": 0.062, "step": 803300 }, { "epoch": 7.9, "grad_norm": 19.791772842407227, "learning_rate": 1.0629102247112913e-06, "loss": 0.0242, "step": 803325 }, { "epoch": 7.9, "grad_norm": 5.1514105796813965, "learning_rate": 1.0627861022570427e-06, "loss": 0.0491, "step": 803350 }, { "epoch": 7.9, "grad_norm": 7.5930256843566895, "learning_rate": 1.0626619798027944e-06, "loss": 0.0244, "step": 803375 }, { "epoch": 7.9, "grad_norm": 5.346989631652832, "learning_rate": 1.062537857348546e-06, "loss": 0.0472, "step": 803400 }, { "epoch": 7.9, "grad_norm": 4.882645606994629, "learning_rate": 1.0624137348942974e-06, "loss": 0.0109, "step": 803425 }, { "epoch": 7.9, "grad_norm": 4.6720428466796875, "learning_rate": 1.0622896124400488e-06, "loss": 0.0586, "step": 803450 }, { "epoch": 7.9, "grad_norm": 21.562030792236328, "learning_rate": 1.0621654899858005e-06, "loss": 0.0254, "step": 803475 }, { "epoch": 7.9, "grad_norm": 6.537650108337402, "learning_rate": 1.0620413675315521e-06, "loss": 0.0593, "step": 803500 }, { "epoch": 7.9, "grad_norm": 3.5706517696380615, "learning_rate": 1.0619172450773035e-06, "loss": 0.0252, "step": 803525 }, { "epoch": 7.9, "grad_norm": 4.782974720001221, "learning_rate": 1.061793122623055e-06, "loss": 0.0551, "step": 803550 }, { "epoch": 7.9, "grad_norm": 0.9620619416236877, "learning_rate": 1.0616690001688066e-06, "loss": 0.0239, "step": 803575 }, { "epoch": 7.9, "grad_norm": 4.289209842681885, "learning_rate": 1.0615448777145582e-06, "loss": 0.0421, "step": 803600 }, { "epoch": 7.9, "grad_norm": 6.115896701812744, "learning_rate": 1.0614207552603097e-06, "loss": 0.0236, "step": 803625 }, { "epoch": 7.9, "grad_norm": 3.40922474861145, "learning_rate": 1.0612966328060613e-06, "loss": 0.0452, "step": 803650 }, { "epoch": 7.9, "grad_norm": 2.1941773891448975, "learning_rate": 1.0611725103518127e-06, "loss": 0.0242, "step": 803675 }, { "epoch": 7.9, "grad_norm": 3.5611917972564697, "learning_rate": 1.0610483878975644e-06, "loss": 0.0575, "step": 803700 }, { "epoch": 7.9, "grad_norm": 4.684050559997559, "learning_rate": 1.0609242654433158e-06, "loss": 0.0143, "step": 803725 }, { "epoch": 7.9, "grad_norm": 2.0665416717529297, "learning_rate": 1.0608001429890674e-06, "loss": 0.0754, "step": 803750 }, { "epoch": 7.9, "grad_norm": 1.1255013942718506, "learning_rate": 1.0606760205348188e-06, "loss": 0.0209, "step": 803775 }, { "epoch": 7.9, "grad_norm": 5.271154880523682, "learning_rate": 1.0605518980805705e-06, "loss": 0.0708, "step": 803800 }, { "epoch": 7.9, "grad_norm": 0.09368180483579636, "learning_rate": 1.060427775626322e-06, "loss": 0.0212, "step": 803825 }, { "epoch": 7.9, "grad_norm": 4.114503860473633, "learning_rate": 1.0603036531720735e-06, "loss": 0.0385, "step": 803850 }, { "epoch": 7.9, "grad_norm": 0.15934841334819794, "learning_rate": 1.060179530717825e-06, "loss": 0.032, "step": 803875 }, { "epoch": 7.9, "grad_norm": 3.9085917472839355, "learning_rate": 1.0600554082635766e-06, "loss": 0.043, "step": 803900 }, { "epoch": 7.9, "grad_norm": 1.5482717752456665, "learning_rate": 1.0599312858093282e-06, "loss": 0.0151, "step": 803925 }, { "epoch": 7.9, "grad_norm": 2.216986894607544, "learning_rate": 1.0598071633550796e-06, "loss": 0.0535, "step": 803950 }, { "epoch": 7.9, "grad_norm": 1.258021593093872, "learning_rate": 1.059683040900831e-06, "loss": 0.02, "step": 803975 }, { "epoch": 7.91, "grad_norm": 4.704383850097656, "learning_rate": 1.0595589184465827e-06, "loss": 0.056, "step": 804000 }, { "epoch": 7.91, "grad_norm": 14.50514030456543, "learning_rate": 1.0594347959923343e-06, "loss": 0.0256, "step": 804025 }, { "epoch": 7.91, "grad_norm": 5.282994270324707, "learning_rate": 1.0593106735380858e-06, "loss": 0.0521, "step": 804050 }, { "epoch": 7.91, "grad_norm": 0.2786892354488373, "learning_rate": 1.0591865510838374e-06, "loss": 0.0317, "step": 804075 }, { "epoch": 7.91, "grad_norm": 3.5460023880004883, "learning_rate": 1.0590624286295888e-06, "loss": 0.0655, "step": 804100 }, { "epoch": 7.91, "grad_norm": 5.890127182006836, "learning_rate": 1.0589383061753405e-06, "loss": 0.0168, "step": 804125 }, { "epoch": 7.91, "grad_norm": 2.4288177490234375, "learning_rate": 1.0588141837210919e-06, "loss": 0.064, "step": 804150 }, { "epoch": 7.91, "grad_norm": 7.007326602935791, "learning_rate": 1.0586900612668435e-06, "loss": 0.0154, "step": 804175 }, { "epoch": 7.91, "grad_norm": 1.3534574508666992, "learning_rate": 1.058565938812595e-06, "loss": 0.0466, "step": 804200 }, { "epoch": 7.91, "grad_norm": 0.047209009528160095, "learning_rate": 1.0584418163583466e-06, "loss": 0.0157, "step": 804225 }, { "epoch": 7.91, "grad_norm": 4.6465654373168945, "learning_rate": 1.0583176939040982e-06, "loss": 0.0494, "step": 804250 }, { "epoch": 7.91, "grad_norm": 0.3934216797351837, "learning_rate": 1.0581935714498496e-06, "loss": 0.0172, "step": 804275 }, { "epoch": 7.91, "grad_norm": 2.5416810512542725, "learning_rate": 1.058069448995601e-06, "loss": 0.0423, "step": 804300 }, { "epoch": 7.91, "grad_norm": 1.8413513898849487, "learning_rate": 1.0579453265413527e-06, "loss": 0.0206, "step": 804325 }, { "epoch": 7.91, "grad_norm": 3.970027446746826, "learning_rate": 1.0578212040871043e-06, "loss": 0.0505, "step": 804350 }, { "epoch": 7.91, "grad_norm": 5.701254844665527, "learning_rate": 1.0576970816328557e-06, "loss": 0.0196, "step": 804375 }, { "epoch": 7.91, "grad_norm": 5.735814571380615, "learning_rate": 1.0575729591786074e-06, "loss": 0.0497, "step": 804400 }, { "epoch": 7.91, "grad_norm": 1.8441963195800781, "learning_rate": 1.057448836724359e-06, "loss": 0.0345, "step": 804425 }, { "epoch": 7.91, "grad_norm": 1.4744055271148682, "learning_rate": 1.0573247142701104e-06, "loss": 0.0456, "step": 804450 }, { "epoch": 7.91, "grad_norm": 3.5380210876464844, "learning_rate": 1.0572005918158619e-06, "loss": 0.0245, "step": 804475 }, { "epoch": 7.91, "grad_norm": 3.8757920265197754, "learning_rate": 1.0570764693616135e-06, "loss": 0.0776, "step": 804500 }, { "epoch": 7.91, "grad_norm": 0.04539521411061287, "learning_rate": 1.0569523469073651e-06, "loss": 0.0193, "step": 804525 }, { "epoch": 7.91, "grad_norm": 3.266098737716675, "learning_rate": 1.0568282244531166e-06, "loss": 0.0521, "step": 804550 }, { "epoch": 7.91, "grad_norm": 1.9703384637832642, "learning_rate": 1.056704101998868e-06, "loss": 0.0213, "step": 804575 }, { "epoch": 7.91, "grad_norm": 2.392401695251465, "learning_rate": 1.0565799795446196e-06, "loss": 0.0557, "step": 804600 }, { "epoch": 7.91, "grad_norm": 1.0144455432891846, "learning_rate": 1.0564558570903712e-06, "loss": 0.0184, "step": 804625 }, { "epoch": 7.91, "grad_norm": 1.2121798992156982, "learning_rate": 1.0563317346361227e-06, "loss": 0.0387, "step": 804650 }, { "epoch": 7.91, "grad_norm": 8.849907875061035, "learning_rate": 1.0562076121818743e-06, "loss": 0.0235, "step": 804675 }, { "epoch": 7.91, "grad_norm": 3.019132375717163, "learning_rate": 1.0560834897276257e-06, "loss": 0.0737, "step": 804700 }, { "epoch": 7.91, "grad_norm": 19.459575653076172, "learning_rate": 1.0559593672733774e-06, "loss": 0.0256, "step": 804725 }, { "epoch": 7.91, "grad_norm": 4.7769904136657715, "learning_rate": 1.055835244819129e-06, "loss": 0.0523, "step": 804750 }, { "epoch": 7.91, "grad_norm": 0.12798500061035156, "learning_rate": 1.0557111223648804e-06, "loss": 0.0222, "step": 804775 }, { "epoch": 7.91, "grad_norm": 3.3889317512512207, "learning_rate": 1.0555869999106318e-06, "loss": 0.0595, "step": 804800 }, { "epoch": 7.91, "grad_norm": 0.1839669644832611, "learning_rate": 1.0554628774563835e-06, "loss": 0.0331, "step": 804825 }, { "epoch": 7.91, "grad_norm": 3.669995069503784, "learning_rate": 1.0553387550021351e-06, "loss": 0.0441, "step": 804850 }, { "epoch": 7.91, "grad_norm": 5.6560187339782715, "learning_rate": 1.0552146325478865e-06, "loss": 0.0298, "step": 804875 }, { "epoch": 7.91, "grad_norm": 4.0771989822387695, "learning_rate": 1.055090510093638e-06, "loss": 0.0538, "step": 804900 }, { "epoch": 7.91, "grad_norm": 2.0001394748687744, "learning_rate": 1.0549663876393896e-06, "loss": 0.02, "step": 804925 }, { "epoch": 7.91, "grad_norm": 2.8644495010375977, "learning_rate": 1.0548422651851412e-06, "loss": 0.0397, "step": 804950 }, { "epoch": 7.91, "grad_norm": 0.24131396412849426, "learning_rate": 1.0547181427308927e-06, "loss": 0.0171, "step": 804975 }, { "epoch": 7.91, "grad_norm": 5.827075004577637, "learning_rate": 1.054594020276644e-06, "loss": 0.0732, "step": 805000 }, { "epoch": 7.92, "grad_norm": 8.3513822555542, "learning_rate": 1.0544698978223957e-06, "loss": 0.0219, "step": 805025 }, { "epoch": 7.92, "grad_norm": 2.9000470638275146, "learning_rate": 1.0543457753681473e-06, "loss": 0.0668, "step": 805050 }, { "epoch": 7.92, "grad_norm": 1.7006473541259766, "learning_rate": 1.0542216529138988e-06, "loss": 0.0136, "step": 805075 }, { "epoch": 7.92, "grad_norm": 3.3538801670074463, "learning_rate": 1.0540975304596504e-06, "loss": 0.049, "step": 805100 }, { "epoch": 7.92, "grad_norm": 0.560479998588562, "learning_rate": 1.0539734080054018e-06, "loss": 0.0167, "step": 805125 }, { "epoch": 7.92, "grad_norm": 2.5275752544403076, "learning_rate": 1.0538492855511535e-06, "loss": 0.0383, "step": 805150 }, { "epoch": 7.92, "grad_norm": 6.435823440551758, "learning_rate": 1.053725163096905e-06, "loss": 0.0328, "step": 805175 }, { "epoch": 7.92, "grad_norm": 5.480246067047119, "learning_rate": 1.0536010406426565e-06, "loss": 0.0652, "step": 805200 }, { "epoch": 7.92, "grad_norm": 0.892608642578125, "learning_rate": 1.053476918188408e-06, "loss": 0.0159, "step": 805225 }, { "epoch": 7.92, "grad_norm": 3.4104838371276855, "learning_rate": 1.0533527957341596e-06, "loss": 0.0638, "step": 805250 }, { "epoch": 7.92, "grad_norm": 6.062882423400879, "learning_rate": 1.0532286732799112e-06, "loss": 0.0215, "step": 805275 }, { "epoch": 7.92, "grad_norm": 3.33585262298584, "learning_rate": 1.0531045508256626e-06, "loss": 0.0663, "step": 805300 }, { "epoch": 7.92, "grad_norm": 0.8505678772926331, "learning_rate": 1.052980428371414e-06, "loss": 0.0214, "step": 805325 }, { "epoch": 7.92, "grad_norm": 3.3743321895599365, "learning_rate": 1.0528563059171657e-06, "loss": 0.0587, "step": 805350 }, { "epoch": 7.92, "grad_norm": 8.42732048034668, "learning_rate": 1.0527321834629173e-06, "loss": 0.0257, "step": 805375 }, { "epoch": 7.92, "grad_norm": 5.190219402313232, "learning_rate": 1.0526080610086688e-06, "loss": 0.0335, "step": 805400 }, { "epoch": 7.92, "grad_norm": 0.3848191201686859, "learning_rate": 1.0524839385544202e-06, "loss": 0.0189, "step": 805425 }, { "epoch": 7.92, "grad_norm": 1.8753868341445923, "learning_rate": 1.0523598161001718e-06, "loss": 0.04, "step": 805450 }, { "epoch": 7.92, "grad_norm": 3.760063409805298, "learning_rate": 1.0522356936459234e-06, "loss": 0.0207, "step": 805475 }, { "epoch": 7.92, "grad_norm": 2.91770076751709, "learning_rate": 1.0521115711916749e-06, "loss": 0.0505, "step": 805500 }, { "epoch": 7.92, "grad_norm": 10.674686431884766, "learning_rate": 1.0519874487374265e-06, "loss": 0.0214, "step": 805525 }, { "epoch": 7.92, "grad_norm": 3.413614511489868, "learning_rate": 1.051863326283178e-06, "loss": 0.0511, "step": 805550 }, { "epoch": 7.92, "grad_norm": 8.940706253051758, "learning_rate": 1.0517392038289296e-06, "loss": 0.0167, "step": 805575 }, { "epoch": 7.92, "grad_norm": 1.740699052810669, "learning_rate": 1.0516150813746812e-06, "loss": 0.0563, "step": 805600 }, { "epoch": 7.92, "grad_norm": 3.250004291534424, "learning_rate": 1.0514909589204326e-06, "loss": 0.0202, "step": 805625 }, { "epoch": 7.92, "grad_norm": 4.211057186126709, "learning_rate": 1.051366836466184e-06, "loss": 0.0599, "step": 805650 }, { "epoch": 7.92, "grad_norm": 0.8519747257232666, "learning_rate": 1.0512427140119357e-06, "loss": 0.0153, "step": 805675 }, { "epoch": 7.92, "grad_norm": 1.846890926361084, "learning_rate": 1.0511185915576873e-06, "loss": 0.0447, "step": 805700 }, { "epoch": 7.92, "grad_norm": 6.500308036804199, "learning_rate": 1.0509944691034387e-06, "loss": 0.0186, "step": 805725 }, { "epoch": 7.92, "grad_norm": 4.3661298751831055, "learning_rate": 1.0508703466491904e-06, "loss": 0.0792, "step": 805750 }, { "epoch": 7.92, "grad_norm": 0.11423962563276291, "learning_rate": 1.050746224194942e-06, "loss": 0.0234, "step": 805775 }, { "epoch": 7.92, "grad_norm": 2.4939255714416504, "learning_rate": 1.0506221017406934e-06, "loss": 0.0509, "step": 805800 }, { "epoch": 7.92, "grad_norm": 21.677574157714844, "learning_rate": 1.0504979792864449e-06, "loss": 0.019, "step": 805825 }, { "epoch": 7.92, "grad_norm": 4.149301528930664, "learning_rate": 1.0503738568321965e-06, "loss": 0.0423, "step": 805850 }, { "epoch": 7.92, "grad_norm": 0.03348154574632645, "learning_rate": 1.0502497343779481e-06, "loss": 0.0107, "step": 805875 }, { "epoch": 7.92, "grad_norm": 5.623308181762695, "learning_rate": 1.0501305768218696e-06, "loss": 0.0876, "step": 805900 }, { "epoch": 7.92, "grad_norm": 0.09237337857484818, "learning_rate": 1.050006454367621e-06, "loss": 0.02, "step": 805925 }, { "epoch": 7.92, "grad_norm": 3.839747190475464, "learning_rate": 1.0498823319133726e-06, "loss": 0.0585, "step": 805950 }, { "epoch": 7.92, "grad_norm": 0.9488422274589539, "learning_rate": 1.049758209459124e-06, "loss": 0.0143, "step": 805975 }, { "epoch": 7.92, "grad_norm": 4.267871379852295, "learning_rate": 1.0496340870048757e-06, "loss": 0.0449, "step": 806000 }, { "epoch": 7.92, "grad_norm": 5.010007381439209, "learning_rate": 1.0495099645506271e-06, "loss": 0.0254, "step": 806025 }, { "epoch": 7.93, "grad_norm": 3.0951459407806396, "learning_rate": 1.0493858420963788e-06, "loss": 0.0438, "step": 806050 }, { "epoch": 7.93, "grad_norm": 13.300023078918457, "learning_rate": 1.0492617196421302e-06, "loss": 0.0186, "step": 806075 }, { "epoch": 7.93, "grad_norm": 3.107879161834717, "learning_rate": 1.0491375971878818e-06, "loss": 0.0585, "step": 806100 }, { "epoch": 7.93, "grad_norm": 0.9094064235687256, "learning_rate": 1.0490134747336334e-06, "loss": 0.0164, "step": 806125 }, { "epoch": 7.93, "grad_norm": 3.0241525173187256, "learning_rate": 1.0488893522793849e-06, "loss": 0.0556, "step": 806150 }, { "epoch": 7.93, "grad_norm": 0.06402674317359924, "learning_rate": 1.0487652298251363e-06, "loss": 0.0131, "step": 806175 }, { "epoch": 7.93, "grad_norm": 3.9277026653289795, "learning_rate": 1.048641107370888e-06, "loss": 0.075, "step": 806200 }, { "epoch": 7.93, "grad_norm": 0.08023791760206223, "learning_rate": 1.0485169849166396e-06, "loss": 0.0292, "step": 806225 }, { "epoch": 7.93, "grad_norm": 2.7524163722991943, "learning_rate": 1.048392862462391e-06, "loss": 0.0522, "step": 806250 }, { "epoch": 7.93, "grad_norm": 6.1659932136535645, "learning_rate": 1.0482687400081424e-06, "loss": 0.0203, "step": 806275 }, { "epoch": 7.93, "grad_norm": 3.395435094833374, "learning_rate": 1.048144617553894e-06, "loss": 0.0564, "step": 806300 }, { "epoch": 7.93, "grad_norm": 0.4671217203140259, "learning_rate": 1.0480204950996457e-06, "loss": 0.0212, "step": 806325 }, { "epoch": 7.93, "grad_norm": 9.154756546020508, "learning_rate": 1.047896372645397e-06, "loss": 0.0627, "step": 806350 }, { "epoch": 7.93, "grad_norm": 0.04646418243646622, "learning_rate": 1.0477722501911487e-06, "loss": 0.0138, "step": 806375 }, { "epoch": 7.93, "grad_norm": 2.9901833534240723, "learning_rate": 1.0476481277369002e-06, "loss": 0.0646, "step": 806400 }, { "epoch": 7.93, "grad_norm": 3.856740951538086, "learning_rate": 1.0475240052826518e-06, "loss": 0.0205, "step": 806425 }, { "epoch": 7.93, "grad_norm": 3.15520977973938, "learning_rate": 1.0473998828284032e-06, "loss": 0.0569, "step": 806450 }, { "epoch": 7.93, "grad_norm": 0.01819990575313568, "learning_rate": 1.0472757603741549e-06, "loss": 0.0247, "step": 806475 }, { "epoch": 7.93, "grad_norm": 5.318183422088623, "learning_rate": 1.0471516379199063e-06, "loss": 0.0721, "step": 806500 }, { "epoch": 7.93, "grad_norm": 2.6746718883514404, "learning_rate": 1.047027515465658e-06, "loss": 0.0237, "step": 806525 }, { "epoch": 7.93, "grad_norm": 5.59511661529541, "learning_rate": 1.0469033930114095e-06, "loss": 0.0603, "step": 806550 }, { "epoch": 7.93, "grad_norm": 14.50828742980957, "learning_rate": 1.046779270557161e-06, "loss": 0.0173, "step": 806575 }, { "epoch": 7.93, "grad_norm": 4.2458977699279785, "learning_rate": 1.0466551481029124e-06, "loss": 0.0722, "step": 806600 }, { "epoch": 7.93, "grad_norm": 1.0819138288497925, "learning_rate": 1.046531025648664e-06, "loss": 0.0116, "step": 806625 }, { "epoch": 7.93, "grad_norm": 5.365462779998779, "learning_rate": 1.0464069031944157e-06, "loss": 0.0366, "step": 806650 }, { "epoch": 7.93, "grad_norm": 1.162373661994934, "learning_rate": 1.046282780740167e-06, "loss": 0.0253, "step": 806675 }, { "epoch": 7.93, "grad_norm": 3.167147159576416, "learning_rate": 1.0461586582859185e-06, "loss": 0.0518, "step": 806700 }, { "epoch": 7.93, "grad_norm": 0.014484211802482605, "learning_rate": 1.0460345358316701e-06, "loss": 0.0122, "step": 806725 }, { "epoch": 7.93, "grad_norm": 3.2173941135406494, "learning_rate": 1.0459104133774218e-06, "loss": 0.0577, "step": 806750 }, { "epoch": 7.93, "grad_norm": 5.301976680755615, "learning_rate": 1.0457862909231732e-06, "loss": 0.0157, "step": 806775 }, { "epoch": 7.93, "grad_norm": 2.475571870803833, "learning_rate": 1.0456621684689248e-06, "loss": 0.0562, "step": 806800 }, { "epoch": 7.93, "grad_norm": 0.5086121559143066, "learning_rate": 1.0455380460146763e-06, "loss": 0.0226, "step": 806825 }, { "epoch": 7.93, "grad_norm": 2.53775691986084, "learning_rate": 1.0454139235604279e-06, "loss": 0.0622, "step": 806850 }, { "epoch": 7.93, "grad_norm": 0.10143596678972244, "learning_rate": 1.0452898011061793e-06, "loss": 0.011, "step": 806875 }, { "epoch": 7.93, "grad_norm": 3.2284622192382812, "learning_rate": 1.045165678651931e-06, "loss": 0.0413, "step": 806900 }, { "epoch": 7.93, "grad_norm": 0.43720775842666626, "learning_rate": 1.0450415561976824e-06, "loss": 0.0301, "step": 806925 }, { "epoch": 7.93, "grad_norm": 2.2194228172302246, "learning_rate": 1.044917433743434e-06, "loss": 0.0535, "step": 806950 }, { "epoch": 7.93, "grad_norm": 0.8706116080284119, "learning_rate": 1.0447933112891856e-06, "loss": 0.0296, "step": 806975 }, { "epoch": 7.93, "grad_norm": 3.552109718322754, "learning_rate": 1.044669188834937e-06, "loss": 0.0622, "step": 807000 }, { "epoch": 7.93, "grad_norm": 0.9870038032531738, "learning_rate": 1.0445450663806885e-06, "loss": 0.0225, "step": 807025 }, { "epoch": 7.94, "grad_norm": 7.155928134918213, "learning_rate": 1.0444209439264401e-06, "loss": 0.053, "step": 807050 }, { "epoch": 7.94, "grad_norm": 0.2625313997268677, "learning_rate": 1.0442968214721918e-06, "loss": 0.0176, "step": 807075 }, { "epoch": 7.94, "grad_norm": 2.4538092613220215, "learning_rate": 1.0441726990179432e-06, "loss": 0.0678, "step": 807100 }, { "epoch": 7.94, "grad_norm": 20.412809371948242, "learning_rate": 1.0440485765636946e-06, "loss": 0.0157, "step": 807125 }, { "epoch": 7.94, "grad_norm": 4.638206481933594, "learning_rate": 1.0439244541094462e-06, "loss": 0.0789, "step": 807150 }, { "epoch": 7.94, "grad_norm": 0.641459047794342, "learning_rate": 1.0438003316551979e-06, "loss": 0.0256, "step": 807175 }, { "epoch": 7.94, "grad_norm": 3.097202777862549, "learning_rate": 1.0436762092009493e-06, "loss": 0.0532, "step": 807200 }, { "epoch": 7.94, "grad_norm": 1.7767455577850342, "learning_rate": 1.043552086746701e-06, "loss": 0.018, "step": 807225 }, { "epoch": 7.94, "grad_norm": 7.282085418701172, "learning_rate": 1.0434279642924526e-06, "loss": 0.0433, "step": 807250 }, { "epoch": 7.94, "grad_norm": 1.8412885665893555, "learning_rate": 1.043303841838204e-06, "loss": 0.0105, "step": 807275 }, { "epoch": 7.94, "grad_norm": 4.335249423980713, "learning_rate": 1.0431797193839554e-06, "loss": 0.0606, "step": 807300 }, { "epoch": 7.94, "grad_norm": 4.321570873260498, "learning_rate": 1.043055596929707e-06, "loss": 0.0304, "step": 807325 }, { "epoch": 7.94, "grad_norm": 2.8063061237335205, "learning_rate": 1.0429314744754587e-06, "loss": 0.0556, "step": 807350 }, { "epoch": 7.94, "grad_norm": 0.5982139706611633, "learning_rate": 1.0428073520212101e-06, "loss": 0.0165, "step": 807375 }, { "epoch": 7.94, "grad_norm": 3.2053916454315186, "learning_rate": 1.0426832295669617e-06, "loss": 0.0552, "step": 807400 }, { "epoch": 7.94, "grad_norm": 6.912973403930664, "learning_rate": 1.0425591071127132e-06, "loss": 0.0295, "step": 807425 }, { "epoch": 7.94, "grad_norm": 8.229759216308594, "learning_rate": 1.0424349846584648e-06, "loss": 0.0453, "step": 807450 }, { "epoch": 7.94, "grad_norm": 0.45263364911079407, "learning_rate": 1.0423108622042164e-06, "loss": 0.0143, "step": 807475 }, { "epoch": 7.94, "grad_norm": 3.3256471157073975, "learning_rate": 1.0421867397499679e-06, "loss": 0.0688, "step": 807500 }, { "epoch": 7.94, "grad_norm": 4.900091171264648, "learning_rate": 1.0420626172957193e-06, "loss": 0.0224, "step": 807525 }, { "epoch": 7.94, "grad_norm": 2.54569935798645, "learning_rate": 1.041938494841471e-06, "loss": 0.0563, "step": 807550 }, { "epoch": 7.94, "grad_norm": 3.050658941268921, "learning_rate": 1.0418143723872226e-06, "loss": 0.0161, "step": 807575 }, { "epoch": 7.94, "grad_norm": 3.1879236698150635, "learning_rate": 1.041690249932974e-06, "loss": 0.0757, "step": 807600 }, { "epoch": 7.94, "grad_norm": 1.2787861824035645, "learning_rate": 1.0415661274787254e-06, "loss": 0.0224, "step": 807625 }, { "epoch": 7.94, "grad_norm": 7.72606897354126, "learning_rate": 1.041442005024477e-06, "loss": 0.0454, "step": 807650 }, { "epoch": 7.94, "grad_norm": 0.08399936556816101, "learning_rate": 1.0413178825702287e-06, "loss": 0.0164, "step": 807675 }, { "epoch": 7.94, "grad_norm": 5.079944133758545, "learning_rate": 1.04119376011598e-06, "loss": 0.0581, "step": 807700 }, { "epoch": 7.94, "grad_norm": 9.694110870361328, "learning_rate": 1.0410696376617315e-06, "loss": 0.013, "step": 807725 }, { "epoch": 7.94, "grad_norm": 3.571209192276001, "learning_rate": 1.0409455152074832e-06, "loss": 0.052, "step": 807750 }, { "epoch": 7.94, "grad_norm": 3.694925308227539, "learning_rate": 1.0408213927532348e-06, "loss": 0.0158, "step": 807775 }, { "epoch": 7.94, "grad_norm": 2.9563510417938232, "learning_rate": 1.0406972702989862e-06, "loss": 0.0762, "step": 807800 }, { "epoch": 7.94, "grad_norm": 2.412198543548584, "learning_rate": 1.0405731478447378e-06, "loss": 0.0267, "step": 807825 }, { "epoch": 7.94, "grad_norm": 3.309582471847534, "learning_rate": 1.0404490253904893e-06, "loss": 0.0515, "step": 807850 }, { "epoch": 7.94, "grad_norm": 3.5745065212249756, "learning_rate": 1.040324902936241e-06, "loss": 0.0333, "step": 807875 }, { "epoch": 7.94, "grad_norm": 4.6447062492370605, "learning_rate": 1.0402007804819925e-06, "loss": 0.0536, "step": 807900 }, { "epoch": 7.94, "grad_norm": 1.910967230796814, "learning_rate": 1.040076658027744e-06, "loss": 0.0092, "step": 807925 }, { "epoch": 7.94, "grad_norm": 2.793085813522339, "learning_rate": 1.0399525355734954e-06, "loss": 0.0534, "step": 807950 }, { "epoch": 7.94, "grad_norm": 0.359967440366745, "learning_rate": 1.039828413119247e-06, "loss": 0.0113, "step": 807975 }, { "epoch": 7.94, "grad_norm": 5.379702568054199, "learning_rate": 1.0397042906649987e-06, "loss": 0.0467, "step": 808000 }, { "epoch": 7.94, "grad_norm": 2.033267021179199, "learning_rate": 1.03958016821075e-06, "loss": 0.0157, "step": 808025 }, { "epoch": 7.94, "grad_norm": 3.383537530899048, "learning_rate": 1.0394560457565015e-06, "loss": 0.07, "step": 808050 }, { "epoch": 7.95, "grad_norm": 0.8153514862060547, "learning_rate": 1.0393319233022531e-06, "loss": 0.0114, "step": 808075 }, { "epoch": 7.95, "grad_norm": 2.6357297897338867, "learning_rate": 1.0392078008480048e-06, "loss": 0.0618, "step": 808100 }, { "epoch": 7.95, "grad_norm": 0.049401525408029556, "learning_rate": 1.0390836783937562e-06, "loss": 0.0115, "step": 808125 }, { "epoch": 7.95, "grad_norm": 2.0979723930358887, "learning_rate": 1.0389595559395076e-06, "loss": 0.0615, "step": 808150 }, { "epoch": 7.95, "grad_norm": 9.038389205932617, "learning_rate": 1.0388354334852593e-06, "loss": 0.0133, "step": 808175 }, { "epoch": 7.95, "grad_norm": 3.995544672012329, "learning_rate": 1.0387113110310109e-06, "loss": 0.0532, "step": 808200 }, { "epoch": 7.95, "grad_norm": 0.13955926895141602, "learning_rate": 1.0385871885767623e-06, "loss": 0.0136, "step": 808225 }, { "epoch": 7.95, "grad_norm": 4.315087795257568, "learning_rate": 1.038463066122514e-06, "loss": 0.0413, "step": 808250 }, { "epoch": 7.95, "grad_norm": 0.07126734405755997, "learning_rate": 1.0383389436682654e-06, "loss": 0.0229, "step": 808275 }, { "epoch": 7.95, "grad_norm": 3.9815008640289307, "learning_rate": 1.038214821214017e-06, "loss": 0.0593, "step": 808300 }, { "epoch": 7.95, "grad_norm": 0.00666264770552516, "learning_rate": 1.0380906987597686e-06, "loss": 0.0145, "step": 808325 }, { "epoch": 7.95, "grad_norm": 3.671170234680176, "learning_rate": 1.03796657630552e-06, "loss": 0.0526, "step": 808350 }, { "epoch": 7.95, "grad_norm": 10.937528610229492, "learning_rate": 1.0378424538512715e-06, "loss": 0.0285, "step": 808375 }, { "epoch": 7.95, "grad_norm": 2.3752119541168213, "learning_rate": 1.0377183313970231e-06, "loss": 0.068, "step": 808400 }, { "epoch": 7.95, "grad_norm": 6.501829147338867, "learning_rate": 1.0375942089427748e-06, "loss": 0.0204, "step": 808425 }, { "epoch": 7.95, "grad_norm": 3.560589075088501, "learning_rate": 1.0374700864885262e-06, "loss": 0.043, "step": 808450 }, { "epoch": 7.95, "grad_norm": 9.154658317565918, "learning_rate": 1.0373459640342776e-06, "loss": 0.0302, "step": 808475 }, { "epoch": 7.95, "grad_norm": 1.5854572057724, "learning_rate": 1.0372218415800292e-06, "loss": 0.0569, "step": 808500 }, { "epoch": 7.95, "grad_norm": 0.662497878074646, "learning_rate": 1.0370977191257809e-06, "loss": 0.0155, "step": 808525 }, { "epoch": 7.95, "grad_norm": 4.252945423126221, "learning_rate": 1.0369735966715323e-06, "loss": 0.0422, "step": 808550 }, { "epoch": 7.95, "grad_norm": 0.43095818161964417, "learning_rate": 1.036849474217284e-06, "loss": 0.0124, "step": 808575 }, { "epoch": 7.95, "grad_norm": 2.51261568069458, "learning_rate": 1.0367253517630354e-06, "loss": 0.0536, "step": 808600 }, { "epoch": 7.95, "grad_norm": 1.4076892137527466, "learning_rate": 1.036601229308787e-06, "loss": 0.0324, "step": 808625 }, { "epoch": 7.95, "grad_norm": 4.563902854919434, "learning_rate": 1.0364771068545384e-06, "loss": 0.0482, "step": 808650 }, { "epoch": 7.95, "grad_norm": 0.8707435131072998, "learning_rate": 1.03635298440029e-06, "loss": 0.0159, "step": 808675 }, { "epoch": 7.95, "grad_norm": 4.916388034820557, "learning_rate": 1.0362288619460417e-06, "loss": 0.0466, "step": 808700 }, { "epoch": 7.95, "grad_norm": 3.8639111518859863, "learning_rate": 1.036104739491793e-06, "loss": 0.0271, "step": 808725 }, { "epoch": 7.95, "grad_norm": 3.7873642444610596, "learning_rate": 1.0359806170375447e-06, "loss": 0.05, "step": 808750 }, { "epoch": 7.95, "grad_norm": 11.36811637878418, "learning_rate": 1.0358564945832962e-06, "loss": 0.0321, "step": 808775 }, { "epoch": 7.95, "grad_norm": 4.583232879638672, "learning_rate": 1.0357323721290478e-06, "loss": 0.0695, "step": 808800 }, { "epoch": 7.95, "grad_norm": 1.0922266244888306, "learning_rate": 1.0356082496747992e-06, "loss": 0.0189, "step": 808825 }, { "epoch": 7.95, "grad_norm": 2.439143419265747, "learning_rate": 1.0354841272205509e-06, "loss": 0.0498, "step": 808850 }, { "epoch": 7.95, "grad_norm": 8.825139045715332, "learning_rate": 1.0353600047663023e-06, "loss": 0.0219, "step": 808875 }, { "epoch": 7.95, "grad_norm": 3.3952455520629883, "learning_rate": 1.035235882312054e-06, "loss": 0.0701, "step": 808900 }, { "epoch": 7.95, "grad_norm": 5.240914821624756, "learning_rate": 1.0351117598578055e-06, "loss": 0.0251, "step": 808925 }, { "epoch": 7.95, "grad_norm": 4.243247032165527, "learning_rate": 1.034987637403557e-06, "loss": 0.0594, "step": 808950 }, { "epoch": 7.95, "grad_norm": 11.833246231079102, "learning_rate": 1.0348635149493084e-06, "loss": 0.0407, "step": 808975 }, { "epoch": 7.95, "grad_norm": 2.4322564601898193, "learning_rate": 1.03473939249506e-06, "loss": 0.058, "step": 809000 }, { "epoch": 7.95, "grad_norm": 0.06616292893886566, "learning_rate": 1.0346152700408117e-06, "loss": 0.0188, "step": 809025 }, { "epoch": 7.95, "grad_norm": 3.8245596885681152, "learning_rate": 1.034491147586563e-06, "loss": 0.0398, "step": 809050 }, { "epoch": 7.95, "grad_norm": 0.7997961044311523, "learning_rate": 1.0343670251323145e-06, "loss": 0.0133, "step": 809075 }, { "epoch": 7.96, "grad_norm": 7.823350429534912, "learning_rate": 1.0342429026780661e-06, "loss": 0.058, "step": 809100 }, { "epoch": 7.96, "grad_norm": 6.551615238189697, "learning_rate": 1.0341187802238178e-06, "loss": 0.0228, "step": 809125 }, { "epoch": 7.96, "grad_norm": 3.4635555744171143, "learning_rate": 1.0339946577695692e-06, "loss": 0.045, "step": 809150 }, { "epoch": 7.96, "grad_norm": 3.93971586227417, "learning_rate": 1.0338705353153208e-06, "loss": 0.0212, "step": 809175 }, { "epoch": 7.96, "grad_norm": 3.405648946762085, "learning_rate": 1.0337464128610723e-06, "loss": 0.0475, "step": 809200 }, { "epoch": 7.96, "grad_norm": 9.240511894226074, "learning_rate": 1.033622290406824e-06, "loss": 0.0335, "step": 809225 }, { "epoch": 7.96, "grad_norm": 4.317921161651611, "learning_rate": 1.0334981679525753e-06, "loss": 0.0522, "step": 809250 }, { "epoch": 7.96, "grad_norm": 1.0179346799850464, "learning_rate": 1.033374045498327e-06, "loss": 0.0242, "step": 809275 }, { "epoch": 7.96, "grad_norm": 4.885571479797363, "learning_rate": 1.0332499230440784e-06, "loss": 0.0496, "step": 809300 }, { "epoch": 7.96, "grad_norm": 0.11103282868862152, "learning_rate": 1.03312580058983e-06, "loss": 0.0128, "step": 809325 }, { "epoch": 7.96, "grad_norm": 1.8573944568634033, "learning_rate": 1.0330016781355816e-06, "loss": 0.0624, "step": 809350 }, { "epoch": 7.96, "grad_norm": 2.3552186489105225, "learning_rate": 1.032877555681333e-06, "loss": 0.0235, "step": 809375 }, { "epoch": 7.96, "grad_norm": 3.0764529705047607, "learning_rate": 1.0327534332270845e-06, "loss": 0.0593, "step": 809400 }, { "epoch": 7.96, "grad_norm": 1.2940492630004883, "learning_rate": 1.0326293107728361e-06, "loss": 0.0102, "step": 809425 }, { "epoch": 7.96, "grad_norm": 2.71437406539917, "learning_rate": 1.0325051883185878e-06, "loss": 0.0503, "step": 809450 }, { "epoch": 7.96, "grad_norm": 6.577212333679199, "learning_rate": 1.0323810658643392e-06, "loss": 0.0286, "step": 809475 }, { "epoch": 7.96, "grad_norm": 2.14011812210083, "learning_rate": 1.0322569434100906e-06, "loss": 0.049, "step": 809500 }, { "epoch": 7.96, "grad_norm": 7.942969799041748, "learning_rate": 1.0321328209558422e-06, "loss": 0.0201, "step": 809525 }, { "epoch": 7.96, "grad_norm": 3.7892234325408936, "learning_rate": 1.0320086985015939e-06, "loss": 0.0485, "step": 809550 }, { "epoch": 7.96, "grad_norm": 4.239175319671631, "learning_rate": 1.0318845760473453e-06, "loss": 0.0197, "step": 809575 }, { "epoch": 7.96, "grad_norm": 2.9453954696655273, "learning_rate": 1.031760453593097e-06, "loss": 0.0416, "step": 809600 }, { "epoch": 7.96, "grad_norm": 5.770501613616943, "learning_rate": 1.0316363311388484e-06, "loss": 0.0094, "step": 809625 }, { "epoch": 7.96, "grad_norm": 4.126926422119141, "learning_rate": 1.0315122086846e-06, "loss": 0.0639, "step": 809650 }, { "epoch": 7.96, "grad_norm": 2.050492525100708, "learning_rate": 1.0313880862303514e-06, "loss": 0.0195, "step": 809675 }, { "epoch": 7.96, "grad_norm": 2.0044291019439697, "learning_rate": 1.031263963776103e-06, "loss": 0.0645, "step": 809700 }, { "epoch": 7.96, "grad_norm": 0.1696828454732895, "learning_rate": 1.0311398413218545e-06, "loss": 0.0161, "step": 809725 }, { "epoch": 7.96, "grad_norm": 3.643953800201416, "learning_rate": 1.0310157188676061e-06, "loss": 0.0458, "step": 809750 }, { "epoch": 7.96, "grad_norm": 2.9758098125457764, "learning_rate": 1.0308915964133578e-06, "loss": 0.014, "step": 809775 }, { "epoch": 7.96, "grad_norm": 3.5880556106567383, "learning_rate": 1.0307674739591092e-06, "loss": 0.0691, "step": 809800 }, { "epoch": 7.96, "grad_norm": 10.560542106628418, "learning_rate": 1.0306433515048606e-06, "loss": 0.0237, "step": 809825 }, { "epoch": 7.96, "grad_norm": 4.01595401763916, "learning_rate": 1.0305192290506122e-06, "loss": 0.0498, "step": 809850 }, { "epoch": 7.96, "grad_norm": 0.16384319961071014, "learning_rate": 1.0303951065963639e-06, "loss": 0.0107, "step": 809875 }, { "epoch": 7.96, "grad_norm": 3.5689163208007812, "learning_rate": 1.0302709841421153e-06, "loss": 0.0505, "step": 809900 }, { "epoch": 7.96, "grad_norm": 0.07943356037139893, "learning_rate": 1.0301468616878667e-06, "loss": 0.0215, "step": 809925 }, { "epoch": 7.96, "grad_norm": 2.651299476623535, "learning_rate": 1.0300277041317884e-06, "loss": 0.0665, "step": 809950 }, { "epoch": 7.96, "grad_norm": 5.581493854522705, "learning_rate": 1.0299035816775398e-06, "loss": 0.0184, "step": 809975 }, { "epoch": 7.96, "grad_norm": 12.642123222351074, "learning_rate": 1.0297794592232914e-06, "loss": 0.0518, "step": 810000 }, { "epoch": 7.96, "grad_norm": 0.04455553740262985, "learning_rate": 1.0296553367690429e-06, "loss": 0.0146, "step": 810025 }, { "epoch": 7.96, "grad_norm": 3.3090813159942627, "learning_rate": 1.0295312143147945e-06, "loss": 0.0513, "step": 810050 }, { "epoch": 7.96, "grad_norm": 2.1259171962738037, "learning_rate": 1.029407091860546e-06, "loss": 0.0149, "step": 810075 }, { "epoch": 7.97, "grad_norm": 2.9646592140197754, "learning_rate": 1.0292829694062976e-06, "loss": 0.0758, "step": 810100 }, { "epoch": 7.97, "grad_norm": 9.086758613586426, "learning_rate": 1.0291588469520492e-06, "loss": 0.0186, "step": 810125 }, { "epoch": 7.97, "grad_norm": 7.20650053024292, "learning_rate": 1.0290347244978006e-06, "loss": 0.064, "step": 810150 }, { "epoch": 7.97, "grad_norm": 7.89832878112793, "learning_rate": 1.0289106020435522e-06, "loss": 0.0236, "step": 810175 }, { "epoch": 7.97, "grad_norm": 2.6425020694732666, "learning_rate": 1.0287864795893037e-06, "loss": 0.0805, "step": 810200 }, { "epoch": 7.97, "grad_norm": 12.809541702270508, "learning_rate": 1.0286623571350553e-06, "loss": 0.0232, "step": 810225 }, { "epoch": 7.97, "grad_norm": 3.5825092792510986, "learning_rate": 1.0285382346808067e-06, "loss": 0.061, "step": 810250 }, { "epoch": 7.97, "grad_norm": 15.000520706176758, "learning_rate": 1.0284141122265584e-06, "loss": 0.0098, "step": 810275 }, { "epoch": 7.97, "grad_norm": 3.6250882148742676, "learning_rate": 1.02828998977231e-06, "loss": 0.0531, "step": 810300 }, { "epoch": 7.97, "grad_norm": 2.28662371635437, "learning_rate": 1.0281658673180614e-06, "loss": 0.0081, "step": 810325 }, { "epoch": 7.97, "grad_norm": 5.224179267883301, "learning_rate": 1.0280417448638128e-06, "loss": 0.0618, "step": 810350 }, { "epoch": 7.97, "grad_norm": 4.394721031188965, "learning_rate": 1.0279176224095645e-06, "loss": 0.0272, "step": 810375 }, { "epoch": 7.97, "grad_norm": 3.1572537422180176, "learning_rate": 1.0277934999553161e-06, "loss": 0.0473, "step": 810400 }, { "epoch": 7.97, "grad_norm": 16.05885124206543, "learning_rate": 1.0276693775010675e-06, "loss": 0.0172, "step": 810425 }, { "epoch": 7.97, "grad_norm": 2.986051321029663, "learning_rate": 1.027545255046819e-06, "loss": 0.0459, "step": 810450 }, { "epoch": 7.97, "grad_norm": 2.506230115890503, "learning_rate": 1.0274211325925706e-06, "loss": 0.0246, "step": 810475 }, { "epoch": 7.97, "grad_norm": 3.411508321762085, "learning_rate": 1.0272970101383222e-06, "loss": 0.0394, "step": 810500 }, { "epoch": 7.97, "grad_norm": 3.8556532859802246, "learning_rate": 1.0271728876840737e-06, "loss": 0.0245, "step": 810525 }, { "epoch": 7.97, "grad_norm": 3.5724079608917236, "learning_rate": 1.0270487652298253e-06, "loss": 0.073, "step": 810550 }, { "epoch": 7.97, "grad_norm": 6.712469577789307, "learning_rate": 1.0269246427755767e-06, "loss": 0.0185, "step": 810575 }, { "epoch": 7.97, "grad_norm": 3.107870578765869, "learning_rate": 1.0268005203213283e-06, "loss": 0.0732, "step": 810600 }, { "epoch": 7.97, "grad_norm": 5.161228179931641, "learning_rate": 1.02667639786708e-06, "loss": 0.0175, "step": 810625 }, { "epoch": 7.97, "grad_norm": 2.1329290866851807, "learning_rate": 1.0265522754128314e-06, "loss": 0.0568, "step": 810650 }, { "epoch": 7.97, "grad_norm": 7.007063865661621, "learning_rate": 1.0264281529585828e-06, "loss": 0.0192, "step": 810675 }, { "epoch": 7.97, "grad_norm": 3.6601665019989014, "learning_rate": 1.0263089954025043e-06, "loss": 0.0758, "step": 810700 }, { "epoch": 7.97, "grad_norm": 4.504962921142578, "learning_rate": 1.026184872948256e-06, "loss": 0.0114, "step": 810725 }, { "epoch": 7.97, "grad_norm": 2.69047474861145, "learning_rate": 1.0260607504940075e-06, "loss": 0.0456, "step": 810750 }, { "epoch": 7.97, "grad_norm": 1.9528414011001587, "learning_rate": 1.025936628039759e-06, "loss": 0.0155, "step": 810775 }, { "epoch": 7.97, "grad_norm": 3.2695255279541016, "learning_rate": 1.0258125055855104e-06, "loss": 0.0637, "step": 810800 }, { "epoch": 7.97, "grad_norm": 5.212718486785889, "learning_rate": 1.025688383131262e-06, "loss": 0.0292, "step": 810825 }, { "epoch": 7.97, "grad_norm": 2.671266555786133, "learning_rate": 1.0255642606770137e-06, "loss": 0.0483, "step": 810850 }, { "epoch": 7.97, "grad_norm": 2.0842034816741943, "learning_rate": 1.025440138222765e-06, "loss": 0.0147, "step": 810875 }, { "epoch": 7.97, "grad_norm": 2.8221259117126465, "learning_rate": 1.0253160157685167e-06, "loss": 0.0551, "step": 810900 }, { "epoch": 7.97, "grad_norm": 2.172274589538574, "learning_rate": 1.0251918933142681e-06, "loss": 0.0166, "step": 810925 }, { "epoch": 7.97, "grad_norm": 2.7911484241485596, "learning_rate": 1.0250677708600198e-06, "loss": 0.0771, "step": 810950 }, { "epoch": 7.97, "grad_norm": 0.8175020813941956, "learning_rate": 1.0249436484057714e-06, "loss": 0.0201, "step": 810975 }, { "epoch": 7.97, "grad_norm": 2.2968556880950928, "learning_rate": 1.0248195259515228e-06, "loss": 0.0706, "step": 811000 }, { "epoch": 7.97, "grad_norm": 0.134884774684906, "learning_rate": 1.0246954034972743e-06, "loss": 0.0198, "step": 811025 }, { "epoch": 7.97, "grad_norm": 1.4926635026931763, "learning_rate": 1.024571281043026e-06, "loss": 0.0636, "step": 811050 }, { "epoch": 7.97, "grad_norm": 0.043475277721881866, "learning_rate": 1.0244471585887775e-06, "loss": 0.0143, "step": 811075 }, { "epoch": 7.97, "grad_norm": 2.7080776691436768, "learning_rate": 1.024323036134529e-06, "loss": 0.0636, "step": 811100 }, { "epoch": 7.98, "grad_norm": 9.989439010620117, "learning_rate": 1.0241989136802804e-06, "loss": 0.022, "step": 811125 }, { "epoch": 7.98, "grad_norm": 2.8182921409606934, "learning_rate": 1.024074791226032e-06, "loss": 0.0593, "step": 811150 }, { "epoch": 7.98, "grad_norm": 0.046084366738796234, "learning_rate": 1.0239506687717836e-06, "loss": 0.0148, "step": 811175 }, { "epoch": 7.98, "grad_norm": 17.11472511291504, "learning_rate": 1.023826546317535e-06, "loss": 0.0446, "step": 811200 }, { "epoch": 7.98, "grad_norm": 0.6249064803123474, "learning_rate": 1.0237024238632865e-06, "loss": 0.0116, "step": 811225 }, { "epoch": 7.98, "grad_norm": 2.464660882949829, "learning_rate": 1.0235783014090381e-06, "loss": 0.0743, "step": 811250 }, { "epoch": 7.98, "grad_norm": 11.00045394897461, "learning_rate": 1.0234541789547898e-06, "loss": 0.0178, "step": 811275 }, { "epoch": 7.98, "grad_norm": 4.4324541091918945, "learning_rate": 1.0233300565005412e-06, "loss": 0.0492, "step": 811300 }, { "epoch": 7.98, "grad_norm": 0.08366667479276657, "learning_rate": 1.0232059340462928e-06, "loss": 0.0341, "step": 811325 }, { "epoch": 7.98, "grad_norm": 5.209552764892578, "learning_rate": 1.0230818115920442e-06, "loss": 0.0565, "step": 811350 }, { "epoch": 7.98, "grad_norm": 0.650490939617157, "learning_rate": 1.0229576891377959e-06, "loss": 0.016, "step": 811375 }, { "epoch": 7.98, "grad_norm": 2.796159267425537, "learning_rate": 1.0228335666835475e-06, "loss": 0.0489, "step": 811400 }, { "epoch": 7.98, "grad_norm": 2.259779691696167, "learning_rate": 1.022709444229299e-06, "loss": 0.0219, "step": 811425 }, { "epoch": 7.98, "grad_norm": 3.9285037517547607, "learning_rate": 1.0225853217750504e-06, "loss": 0.0486, "step": 811450 }, { "epoch": 7.98, "grad_norm": 0.009064874611794949, "learning_rate": 1.022461199320802e-06, "loss": 0.017, "step": 811475 }, { "epoch": 7.98, "grad_norm": 2.1341123580932617, "learning_rate": 1.0223370768665536e-06, "loss": 0.0453, "step": 811500 }, { "epoch": 7.98, "grad_norm": 2.0305957794189453, "learning_rate": 1.022212954412305e-06, "loss": 0.0279, "step": 811525 }, { "epoch": 7.98, "grad_norm": 1.5306718349456787, "learning_rate": 1.0220888319580565e-06, "loss": 0.0616, "step": 811550 }, { "epoch": 7.98, "grad_norm": 7.505186557769775, "learning_rate": 1.0219647095038081e-06, "loss": 0.0177, "step": 811575 }, { "epoch": 7.98, "grad_norm": 3.8144774436950684, "learning_rate": 1.0218405870495597e-06, "loss": 0.0648, "step": 811600 }, { "epoch": 7.98, "grad_norm": 15.794302940368652, "learning_rate": 1.0217164645953112e-06, "loss": 0.0162, "step": 811625 }, { "epoch": 7.98, "grad_norm": 4.917563438415527, "learning_rate": 1.0215923421410628e-06, "loss": 0.072, "step": 811650 }, { "epoch": 7.98, "grad_norm": 0.3494337201118469, "learning_rate": 1.0214682196868144e-06, "loss": 0.0155, "step": 811675 }, { "epoch": 7.98, "grad_norm": 3.705191135406494, "learning_rate": 1.0213440972325659e-06, "loss": 0.0493, "step": 811700 }, { "epoch": 7.98, "grad_norm": 0.020213700830936432, "learning_rate": 1.0212199747783173e-06, "loss": 0.0266, "step": 811725 }, { "epoch": 7.98, "grad_norm": 4.405981063842773, "learning_rate": 1.021095852324069e-06, "loss": 0.0581, "step": 811750 }, { "epoch": 7.98, "grad_norm": 0.0851409062743187, "learning_rate": 1.0209717298698206e-06, "loss": 0.017, "step": 811775 }, { "epoch": 7.98, "grad_norm": 2.8811962604522705, "learning_rate": 1.020847607415572e-06, "loss": 0.0453, "step": 811800 }, { "epoch": 7.98, "grad_norm": 11.450862884521484, "learning_rate": 1.0207234849613236e-06, "loss": 0.0197, "step": 811825 }, { "epoch": 7.98, "grad_norm": 2.5530142784118652, "learning_rate": 1.020599362507075e-06, "loss": 0.0505, "step": 811850 }, { "epoch": 7.98, "grad_norm": 7.07099723815918, "learning_rate": 1.0204752400528267e-06, "loss": 0.012, "step": 811875 }, { "epoch": 7.98, "grad_norm": 2.8832998275756836, "learning_rate": 1.020351117598578e-06, "loss": 0.0607, "step": 811900 }, { "epoch": 7.98, "grad_norm": 0.5709952116012573, "learning_rate": 1.0202269951443297e-06, "loss": 0.0082, "step": 811925 }, { "epoch": 7.98, "grad_norm": 4.8807501792907715, "learning_rate": 1.0201028726900812e-06, "loss": 0.0688, "step": 811950 }, { "epoch": 7.98, "grad_norm": 1.6694886684417725, "learning_rate": 1.0199787502358328e-06, "loss": 0.0261, "step": 811975 }, { "epoch": 7.98, "grad_norm": 1.3204761743545532, "learning_rate": 1.0198546277815844e-06, "loss": 0.0565, "step": 812000 }, { "epoch": 7.98, "grad_norm": 1.538908839225769, "learning_rate": 1.0197305053273358e-06, "loss": 0.0283, "step": 812025 }, { "epoch": 7.98, "grad_norm": 1.5765188932418823, "learning_rate": 1.0196063828730873e-06, "loss": 0.0393, "step": 812050 }, { "epoch": 7.98, "grad_norm": 5.57636022567749, "learning_rate": 1.019482260418839e-06, "loss": 0.0273, "step": 812075 }, { "epoch": 7.98, "grad_norm": 1.7165147066116333, "learning_rate": 1.0193581379645905e-06, "loss": 0.0651, "step": 812100 }, { "epoch": 7.98, "grad_norm": 0.04543038830161095, "learning_rate": 1.019234015510342e-06, "loss": 0.0151, "step": 812125 }, { "epoch": 7.99, "grad_norm": 5.0570597648620605, "learning_rate": 1.0191098930560934e-06, "loss": 0.0594, "step": 812150 }, { "epoch": 7.99, "grad_norm": 15.661099433898926, "learning_rate": 1.018985770601845e-06, "loss": 0.0224, "step": 812175 }, { "epoch": 7.99, "grad_norm": 2.3382821083068848, "learning_rate": 1.0188616481475967e-06, "loss": 0.0596, "step": 812200 }, { "epoch": 7.99, "grad_norm": 5.008064270019531, "learning_rate": 1.018737525693348e-06, "loss": 0.0153, "step": 812225 }, { "epoch": 7.99, "grad_norm": 4.427187919616699, "learning_rate": 1.0186134032390997e-06, "loss": 0.057, "step": 812250 }, { "epoch": 7.99, "grad_norm": 6.344561576843262, "learning_rate": 1.0184892807848511e-06, "loss": 0.0226, "step": 812275 }, { "epoch": 7.99, "grad_norm": 4.8177490234375, "learning_rate": 1.0183651583306028e-06, "loss": 0.0522, "step": 812300 }, { "epoch": 7.99, "grad_norm": 3.3193788528442383, "learning_rate": 1.0182410358763542e-06, "loss": 0.0114, "step": 812325 }, { "epoch": 7.99, "grad_norm": 3.642239809036255, "learning_rate": 1.0181169134221058e-06, "loss": 0.0611, "step": 812350 }, { "epoch": 7.99, "grad_norm": 0.08228000998497009, "learning_rate": 1.0179927909678573e-06, "loss": 0.0177, "step": 812375 }, { "epoch": 7.99, "grad_norm": 3.25925612449646, "learning_rate": 1.0178686685136089e-06, "loss": 0.0502, "step": 812400 }, { "epoch": 7.99, "grad_norm": 0.4544191360473633, "learning_rate": 1.0177445460593605e-06, "loss": 0.0225, "step": 812425 }, { "epoch": 7.99, "grad_norm": 3.626640796661377, "learning_rate": 1.017620423605112e-06, "loss": 0.0556, "step": 812450 }, { "epoch": 7.99, "grad_norm": 8.795197486877441, "learning_rate": 1.0174963011508634e-06, "loss": 0.0338, "step": 812475 }, { "epoch": 7.99, "grad_norm": 2.103501081466675, "learning_rate": 1.017372178696615e-06, "loss": 0.0636, "step": 812500 }, { "epoch": 7.99, "grad_norm": 28.037708282470703, "learning_rate": 1.0172480562423666e-06, "loss": 0.0287, "step": 812525 }, { "epoch": 7.99, "grad_norm": 4.584221363067627, "learning_rate": 1.017123933788118e-06, "loss": 0.0635, "step": 812550 }, { "epoch": 7.99, "grad_norm": 8.79632568359375, "learning_rate": 1.0169998113338695e-06, "loss": 0.0185, "step": 812575 }, { "epoch": 7.99, "grad_norm": 1.9554837942123413, "learning_rate": 1.0168756888796211e-06, "loss": 0.0458, "step": 812600 }, { "epoch": 7.99, "grad_norm": 5.24825382232666, "learning_rate": 1.0167515664253728e-06, "loss": 0.0171, "step": 812625 }, { "epoch": 7.99, "grad_norm": 3.483546495437622, "learning_rate": 1.0166274439711242e-06, "loss": 0.0629, "step": 812650 }, { "epoch": 7.99, "grad_norm": 3.061763048171997, "learning_rate": 1.0165033215168758e-06, "loss": 0.0142, "step": 812675 }, { "epoch": 7.99, "grad_norm": 3.1166999340057373, "learning_rate": 1.0163791990626272e-06, "loss": 0.0487, "step": 812700 }, { "epoch": 7.99, "grad_norm": 9.122663497924805, "learning_rate": 1.0162550766083789e-06, "loss": 0.022, "step": 812725 }, { "epoch": 7.99, "grad_norm": 5.122560977935791, "learning_rate": 1.0161309541541303e-06, "loss": 0.0576, "step": 812750 }, { "epoch": 7.99, "grad_norm": 11.664796829223633, "learning_rate": 1.016006831699882e-06, "loss": 0.0183, "step": 812775 }, { "epoch": 7.99, "grad_norm": 3.624539375305176, "learning_rate": 1.0158827092456334e-06, "loss": 0.0611, "step": 812800 }, { "epoch": 7.99, "grad_norm": 0.6974709033966064, "learning_rate": 1.015758586791385e-06, "loss": 0.0134, "step": 812825 }, { "epoch": 7.99, "grad_norm": 4.76875114440918, "learning_rate": 1.0156344643371366e-06, "loss": 0.0536, "step": 812850 }, { "epoch": 7.99, "grad_norm": 0.06765156984329224, "learning_rate": 1.015510341882888e-06, "loss": 0.0188, "step": 812875 }, { "epoch": 7.99, "grad_norm": 3.565572738647461, "learning_rate": 1.0153862194286395e-06, "loss": 0.0687, "step": 812900 }, { "epoch": 7.99, "grad_norm": 2.0865254402160645, "learning_rate": 1.0152620969743911e-06, "loss": 0.0237, "step": 812925 }, { "epoch": 7.99, "grad_norm": 4.406447410583496, "learning_rate": 1.0151379745201427e-06, "loss": 0.0711, "step": 812950 }, { "epoch": 7.99, "grad_norm": 0.22445522248744965, "learning_rate": 1.0150138520658942e-06, "loss": 0.0251, "step": 812975 }, { "epoch": 7.99, "grad_norm": 3.098358392715454, "learning_rate": 1.0148897296116458e-06, "loss": 0.0563, "step": 813000 }, { "epoch": 7.99, "grad_norm": 10.228997230529785, "learning_rate": 1.0147656071573974e-06, "loss": 0.017, "step": 813025 }, { "epoch": 7.99, "grad_norm": 2.5950047969818115, "learning_rate": 1.0146414847031489e-06, "loss": 0.0438, "step": 813050 }, { "epoch": 7.99, "grad_norm": 10.016658782958984, "learning_rate": 1.0145173622489003e-06, "loss": 0.0197, "step": 813075 }, { "epoch": 7.99, "grad_norm": 2.434627056121826, "learning_rate": 1.014393239794652e-06, "loss": 0.0618, "step": 813100 }, { "epoch": 7.99, "grad_norm": 0.6269965171813965, "learning_rate": 1.0142691173404036e-06, "loss": 0.0215, "step": 813125 }, { "epoch": 8.0, "grad_norm": 4.474010467529297, "learning_rate": 1.014149959784325e-06, "loss": 0.0546, "step": 813150 }, { "epoch": 8.0, "grad_norm": 5.285694599151611, "learning_rate": 1.0140258373300764e-06, "loss": 0.0215, "step": 813175 }, { "epoch": 8.0, "grad_norm": 3.5579311847686768, "learning_rate": 1.013901714875828e-06, "loss": 0.0775, "step": 813200 }, { "epoch": 8.0, "grad_norm": 0.4755915403366089, "learning_rate": 1.0137775924215795e-06, "loss": 0.0152, "step": 813225 }, { "epoch": 8.0, "grad_norm": 2.2944180965423584, "learning_rate": 1.0136534699673311e-06, "loss": 0.0326, "step": 813250 }, { "epoch": 8.0, "grad_norm": 2.118788719177246, "learning_rate": 1.0135293475130828e-06, "loss": 0.0225, "step": 813275 }, { "epoch": 8.0, "grad_norm": 5.1567769050598145, "learning_rate": 1.0134052250588342e-06, "loss": 0.0505, "step": 813300 }, { "epoch": 8.0, "grad_norm": 4.269989013671875, "learning_rate": 1.0132811026045856e-06, "loss": 0.0142, "step": 813325 }, { "epoch": 8.0, "grad_norm": 2.830404043197632, "learning_rate": 1.0131569801503372e-06, "loss": 0.0576, "step": 813350 }, { "epoch": 8.0, "grad_norm": 0.1958330124616623, "learning_rate": 1.0130328576960889e-06, "loss": 0.0171, "step": 813375 }, { "epoch": 8.0, "grad_norm": 2.3503987789154053, "learning_rate": 1.0129087352418403e-06, "loss": 0.0464, "step": 813400 }, { "epoch": 8.0, "grad_norm": 6.731716632843018, "learning_rate": 1.0127846127875917e-06, "loss": 0.0259, "step": 813425 }, { "epoch": 8.0, "grad_norm": 2.413280963897705, "learning_rate": 1.0126604903333434e-06, "loss": 0.069, "step": 813450 }, { "epoch": 8.0, "grad_norm": 5.227848052978516, "learning_rate": 1.012536367879095e-06, "loss": 0.0216, "step": 813475 }, { "epoch": 8.0, "grad_norm": 4.89772367477417, "learning_rate": 1.0124122454248464e-06, "loss": 0.0821, "step": 813500 }, { "epoch": 8.0, "grad_norm": 6.494566917419434, "learning_rate": 1.0122881229705978e-06, "loss": 0.0296, "step": 813525 }, { "epoch": 8.0, "grad_norm": 2.967400550842285, "learning_rate": 1.0121640005163495e-06, "loss": 0.0494, "step": 813550 }, { "epoch": 8.0, "grad_norm": 5.751565933227539, "learning_rate": 1.012039878062101e-06, "loss": 0.0278, "step": 813575 }, { "epoch": 8.0, "grad_norm": 3.46783709526062, "learning_rate": 1.0119157556078525e-06, "loss": 0.0861, "step": 813600 }, { "epoch": 8.0, "grad_norm": 0.060945745557546616, "learning_rate": 1.0117916331536042e-06, "loss": 0.0207, "step": 813625 }, { "epoch": 8.0, "grad_norm": 6.200270175933838, "learning_rate": 1.0116675106993556e-06, "loss": 0.0667, "step": 813650 }, { "epoch": 8.0, "grad_norm": 0.3043985068798065, "learning_rate": 1.0115433882451072e-06, "loss": 0.0147, "step": 813675 }, { "epoch": 8.0, "grad_norm": 9.267763137817383, "learning_rate": 1.0114192657908589e-06, "loss": 0.0277, "step": 813700 }, { "epoch": 8.0, "grad_norm": 1.314925193786621, "learning_rate": 1.0112951433366103e-06, "loss": 0.02, "step": 813725 }, { "epoch": 8.0, "grad_norm": 0.24576111137866974, "learning_rate": 1.0111710208823617e-06, "loss": 0.0273, "step": 813750 }, { "epoch": 8.0, "grad_norm": 1.8010863065719604, "learning_rate": 1.0110468984281133e-06, "loss": 0.0231, "step": 813775 }, { "epoch": 8.0, "grad_norm": 0.8990573883056641, "learning_rate": 1.010922775973865e-06, "loss": 0.0301, "step": 813800 }, { "epoch": 8.0, "grad_norm": 0.16841846704483032, "learning_rate": 1.0107986535196164e-06, "loss": 0.0333, "step": 813825 }, { "epoch": 8.0, "grad_norm": 1.2237833738327026, "learning_rate": 1.0106745310653678e-06, "loss": 0.0251, "step": 813850 }, { "epoch": 8.0, "grad_norm": 0.23626457154750824, "learning_rate": 1.0105504086111195e-06, "loss": 0.0213, "step": 813875 }, { "epoch": 8.0, "grad_norm": 2.4520609378814697, "learning_rate": 1.010426286156871e-06, "loss": 0.0317, "step": 813900 }, { "epoch": 8.0, "grad_norm": 0.8302595019340515, "learning_rate": 1.0103021637026225e-06, "loss": 0.0174, "step": 813925 }, { "epoch": 8.0, "grad_norm": 11.46703052520752, "learning_rate": 1.010178041248374e-06, "loss": 0.0349, "step": 813950 }, { "epoch": 8.0, "grad_norm": 0.030931103974580765, "learning_rate": 1.0100539187941256e-06, "loss": 0.0177, "step": 813975 }, { "epoch": 8.0, "grad_norm": 8.50158977508545, "learning_rate": 1.0099297963398772e-06, "loss": 0.0263, "step": 814000 }, { "epoch": 8.0, "grad_norm": 1.2787671089172363, "learning_rate": 1.0098056738856286e-06, "loss": 0.0329, "step": 814025 }, { "epoch": 8.0, "grad_norm": 5.453257083892822, "learning_rate": 1.0096815514313803e-06, "loss": 0.019, "step": 814050 }, { "epoch": 8.0, "grad_norm": 7.973667621612549, "learning_rate": 1.0095574289771317e-06, "loss": 0.0341, "step": 814075 }, { "epoch": 8.0, "grad_norm": 6.091340065002441, "learning_rate": 1.0094333065228833e-06, "loss": 0.0214, "step": 814100 }, { "epoch": 8.0, "grad_norm": 0.17462027072906494, "learning_rate": 1.009309184068635e-06, "loss": 0.0222, "step": 814125 }, { "epoch": 8.0, "grad_norm": 6.10091495513916, "learning_rate": 1.0091850616143864e-06, "loss": 0.0287, "step": 814150 }, { "epoch": 8.01, "grad_norm": 0.2319939285516739, "learning_rate": 1.0090609391601378e-06, "loss": 0.0285, "step": 814175 }, { "epoch": 8.01, "grad_norm": 7.983305931091309, "learning_rate": 1.0089368167058894e-06, "loss": 0.0233, "step": 814200 }, { "epoch": 8.01, "grad_norm": 0.006211211904883385, "learning_rate": 1.008812694251641e-06, "loss": 0.0289, "step": 814225 }, { "epoch": 8.01, "grad_norm": 15.033714294433594, "learning_rate": 1.0086885717973925e-06, "loss": 0.0143, "step": 814250 }, { "epoch": 8.01, "grad_norm": 1.9656352996826172, "learning_rate": 1.008564449343144e-06, "loss": 0.0261, "step": 814275 }, { "epoch": 8.01, "grad_norm": 9.74162483215332, "learning_rate": 1.0084403268888956e-06, "loss": 0.0268, "step": 814300 }, { "epoch": 8.01, "grad_norm": 1.5309900045394897, "learning_rate": 1.0083162044346472e-06, "loss": 0.0529, "step": 814325 }, { "epoch": 8.01, "grad_norm": 3.1194534301757812, "learning_rate": 1.0081920819803986e-06, "loss": 0.0192, "step": 814350 }, { "epoch": 8.01, "grad_norm": 1.8192518949508667, "learning_rate": 1.00806795952615e-06, "loss": 0.0268, "step": 814375 }, { "epoch": 8.01, "grad_norm": 7.997999668121338, "learning_rate": 1.0079438370719017e-06, "loss": 0.022, "step": 814400 }, { "epoch": 8.01, "grad_norm": 0.07476823031902313, "learning_rate": 1.0078197146176533e-06, "loss": 0.0568, "step": 814425 }, { "epoch": 8.01, "grad_norm": 9.314152717590332, "learning_rate": 1.0076955921634047e-06, "loss": 0.0321, "step": 814450 }, { "epoch": 8.01, "grad_norm": 0.06082766875624657, "learning_rate": 1.0075714697091564e-06, "loss": 0.0294, "step": 814475 }, { "epoch": 8.01, "grad_norm": 6.829448223114014, "learning_rate": 1.007447347254908e-06, "loss": 0.0327, "step": 814500 }, { "epoch": 8.01, "grad_norm": 0.16354291141033173, "learning_rate": 1.0073232248006594e-06, "loss": 0.0207, "step": 814525 }, { "epoch": 8.01, "grad_norm": 6.829858779907227, "learning_rate": 1.007199102346411e-06, "loss": 0.0283, "step": 814550 }, { "epoch": 8.01, "grad_norm": 0.7039214968681335, "learning_rate": 1.0070749798921625e-06, "loss": 0.0434, "step": 814575 }, { "epoch": 8.01, "grad_norm": 12.086753845214844, "learning_rate": 1.0069508574379141e-06, "loss": 0.0317, "step": 814600 }, { "epoch": 8.01, "grad_norm": 0.02787706069648266, "learning_rate": 1.0068267349836655e-06, "loss": 0.0259, "step": 814625 }, { "epoch": 8.01, "grad_norm": 3.1251347064971924, "learning_rate": 1.0067026125294172e-06, "loss": 0.0294, "step": 814650 }, { "epoch": 8.01, "grad_norm": 0.14267829060554504, "learning_rate": 1.0065784900751686e-06, "loss": 0.0245, "step": 814675 }, { "epoch": 8.01, "grad_norm": 3.588031530380249, "learning_rate": 1.0064543676209202e-06, "loss": 0.0287, "step": 814700 }, { "epoch": 8.01, "grad_norm": 0.21941518783569336, "learning_rate": 1.0063302451666719e-06, "loss": 0.0286, "step": 814725 }, { "epoch": 8.01, "grad_norm": 0.4246283173561096, "learning_rate": 1.0062061227124233e-06, "loss": 0.0332, "step": 814750 }, { "epoch": 8.01, "grad_norm": 4.411144256591797, "learning_rate": 1.0060820002581747e-06, "loss": 0.0339, "step": 814775 }, { "epoch": 8.01, "grad_norm": 0.5794529914855957, "learning_rate": 1.0059578778039263e-06, "loss": 0.0217, "step": 814800 }, { "epoch": 8.01, "grad_norm": 0.5768197774887085, "learning_rate": 1.005833755349678e-06, "loss": 0.0393, "step": 814825 }, { "epoch": 8.01, "grad_norm": 16.318952560424805, "learning_rate": 1.0057096328954294e-06, "loss": 0.0339, "step": 814850 }, { "epoch": 8.01, "grad_norm": 0.26951178908348083, "learning_rate": 1.0055855104411808e-06, "loss": 0.0408, "step": 814875 }, { "epoch": 8.01, "grad_norm": 7.642350673675537, "learning_rate": 1.0054613879869325e-06, "loss": 0.0455, "step": 814900 }, { "epoch": 8.01, "grad_norm": 0.5220405459403992, "learning_rate": 1.005337265532684e-06, "loss": 0.0214, "step": 814925 }, { "epoch": 8.01, "grad_norm": 2.624802827835083, "learning_rate": 1.0052131430784355e-06, "loss": 0.0244, "step": 814950 }, { "epoch": 8.01, "grad_norm": 3.189854860305786, "learning_rate": 1.0050890206241872e-06, "loss": 0.0371, "step": 814975 }, { "epoch": 8.01, "grad_norm": 2.7714269161224365, "learning_rate": 1.0049648981699386e-06, "loss": 0.0439, "step": 815000 }, { "epoch": 8.01, "grad_norm": 0.9351344108581543, "learning_rate": 1.0048407757156902e-06, "loss": 0.0363, "step": 815025 }, { "epoch": 8.01, "grad_norm": 10.840185165405273, "learning_rate": 1.0047166532614416e-06, "loss": 0.0229, "step": 815050 }, { "epoch": 8.01, "grad_norm": 0.3240847885608673, "learning_rate": 1.0045925308071933e-06, "loss": 0.0248, "step": 815075 }, { "epoch": 8.01, "grad_norm": 2.262458086013794, "learning_rate": 1.0044684083529447e-06, "loss": 0.0202, "step": 815100 }, { "epoch": 8.01, "grad_norm": 2.4730048179626465, "learning_rate": 1.0043442858986963e-06, "loss": 0.0287, "step": 815125 }, { "epoch": 8.01, "grad_norm": 8.892058372497559, "learning_rate": 1.004220163444448e-06, "loss": 0.0289, "step": 815150 }, { "epoch": 8.01, "grad_norm": 0.0669863224029541, "learning_rate": 1.0040960409901994e-06, "loss": 0.0346, "step": 815175 }, { "epoch": 8.02, "grad_norm": 11.017340660095215, "learning_rate": 1.0039719185359508e-06, "loss": 0.0297, "step": 815200 }, { "epoch": 8.02, "grad_norm": 0.014139290899038315, "learning_rate": 1.0038477960817024e-06, "loss": 0.0398, "step": 815225 }, { "epoch": 8.02, "grad_norm": 5.651130676269531, "learning_rate": 1.003723673627454e-06, "loss": 0.0324, "step": 815250 }, { "epoch": 8.02, "grad_norm": 1.6755090951919556, "learning_rate": 1.0035995511732055e-06, "loss": 0.0227, "step": 815275 }, { "epoch": 8.02, "grad_norm": 6.889786720275879, "learning_rate": 1.003475428718957e-06, "loss": 0.0354, "step": 815300 }, { "epoch": 8.02, "grad_norm": 0.042799029499292374, "learning_rate": 1.0033513062647086e-06, "loss": 0.0319, "step": 815325 }, { "epoch": 8.02, "grad_norm": 20.014793395996094, "learning_rate": 1.0032271838104602e-06, "loss": 0.0367, "step": 815350 }, { "epoch": 8.02, "grad_norm": 0.17902883887290955, "learning_rate": 1.0031030613562116e-06, "loss": 0.0384, "step": 815375 }, { "epoch": 8.02, "grad_norm": 7.187443733215332, "learning_rate": 1.0029789389019633e-06, "loss": 0.0167, "step": 815400 }, { "epoch": 8.02, "grad_norm": 0.1570087969303131, "learning_rate": 1.0028548164477147e-06, "loss": 0.0426, "step": 815425 }, { "epoch": 8.02, "grad_norm": 10.40386962890625, "learning_rate": 1.0027306939934663e-06, "loss": 0.0244, "step": 815450 }, { "epoch": 8.02, "grad_norm": 2.7776377201080322, "learning_rate": 1.0026065715392177e-06, "loss": 0.0469, "step": 815475 }, { "epoch": 8.02, "grad_norm": 22.235416412353516, "learning_rate": 1.0024824490849694e-06, "loss": 0.0358, "step": 815500 }, { "epoch": 8.02, "grad_norm": 0.0034197906497865915, "learning_rate": 1.0023632915288908e-06, "loss": 0.027, "step": 815525 }, { "epoch": 8.02, "grad_norm": 11.056733131408691, "learning_rate": 1.0022391690746422e-06, "loss": 0.0358, "step": 815550 }, { "epoch": 8.02, "grad_norm": 2.6284842491149902, "learning_rate": 1.0021150466203939e-06, "loss": 0.0347, "step": 815575 }, { "epoch": 8.02, "grad_norm": 7.594476699829102, "learning_rate": 1.0019909241661455e-06, "loss": 0.012, "step": 815600 }, { "epoch": 8.02, "grad_norm": 0.09888763725757599, "learning_rate": 1.001866801711897e-06, "loss": 0.0304, "step": 815625 }, { "epoch": 8.02, "grad_norm": 7.2335920333862305, "learning_rate": 1.0017426792576484e-06, "loss": 0.0155, "step": 815650 }, { "epoch": 8.02, "grad_norm": 0.0010423884959891438, "learning_rate": 1.0016185568034e-06, "loss": 0.034, "step": 815675 }, { "epoch": 8.02, "grad_norm": 9.870743751525879, "learning_rate": 1.0014944343491516e-06, "loss": 0.0277, "step": 815700 }, { "epoch": 8.02, "grad_norm": 0.015979457646608353, "learning_rate": 1.001370311894903e-06, "loss": 0.0514, "step": 815725 }, { "epoch": 8.02, "grad_norm": 4.629283905029297, "learning_rate": 1.0012461894406547e-06, "loss": 0.0325, "step": 815750 }, { "epoch": 8.02, "grad_norm": 0.6197823882102966, "learning_rate": 1.0011220669864061e-06, "loss": 0.0386, "step": 815775 }, { "epoch": 8.02, "grad_norm": 5.624438285827637, "learning_rate": 1.0009979445321577e-06, "loss": 0.0353, "step": 815800 }, { "epoch": 8.02, "grad_norm": 0.18479494750499725, "learning_rate": 1.0008738220779092e-06, "loss": 0.0372, "step": 815825 }, { "epoch": 8.02, "grad_norm": 17.284130096435547, "learning_rate": 1.0007496996236608e-06, "loss": 0.0388, "step": 815850 }, { "epoch": 8.02, "grad_norm": 0.034997716546058655, "learning_rate": 1.0006255771694122e-06, "loss": 0.0264, "step": 815875 }, { "epoch": 8.02, "grad_norm": 12.488700866699219, "learning_rate": 1.0005014547151639e-06, "loss": 0.0254, "step": 815900 }, { "epoch": 8.02, "grad_norm": 0.010078110732138157, "learning_rate": 1.0003773322609155e-06, "loss": 0.0355, "step": 815925 }, { "epoch": 8.02, "grad_norm": 26.899410247802734, "learning_rate": 1.000253209806667e-06, "loss": 0.034, "step": 815950 }, { "epoch": 8.02, "grad_norm": 2.415039539337158, "learning_rate": 1.0001290873524186e-06, "loss": 0.0242, "step": 815975 }, { "epoch": 8.02, "grad_norm": 21.536651611328125, "learning_rate": 1.0000049648981702e-06, "loss": 0.0248, "step": 816000 }, { "epoch": 8.02, "grad_norm": 0.05292727053165436, "learning_rate": 9.998808424439216e-07, "loss": 0.0302, "step": 816025 }, { "epoch": 8.02, "grad_norm": 0.6188468337059021, "learning_rate": 9.99756719989673e-07, "loss": 0.0141, "step": 816050 }, { "epoch": 8.02, "grad_norm": 0.2236529141664505, "learning_rate": 9.996325975354247e-07, "loss": 0.0281, "step": 816075 }, { "epoch": 8.02, "grad_norm": 0.11744885891675949, "learning_rate": 9.995084750811763e-07, "loss": 0.0142, "step": 816100 }, { "epoch": 8.02, "grad_norm": 2.9296529293060303, "learning_rate": 9.993843526269277e-07, "loss": 0.0345, "step": 816125 }, { "epoch": 8.02, "grad_norm": 12.446706771850586, "learning_rate": 9.992602301726792e-07, "loss": 0.0248, "step": 816150 }, { "epoch": 8.02, "grad_norm": 0.07183696329593658, "learning_rate": 9.991361077184308e-07, "loss": 0.0548, "step": 816175 }, { "epoch": 8.03, "grad_norm": 2.6272053718566895, "learning_rate": 9.990119852641824e-07, "loss": 0.05, "step": 816200 }, { "epoch": 8.03, "grad_norm": 22.611740112304688, "learning_rate": 9.988878628099339e-07, "loss": 0.0363, "step": 816225 }, { "epoch": 8.03, "grad_norm": 8.286059379577637, "learning_rate": 9.987637403556853e-07, "loss": 0.0251, "step": 816250 }, { "epoch": 8.03, "grad_norm": 0.03935113549232483, "learning_rate": 9.98639617901437e-07, "loss": 0.0458, "step": 816275 }, { "epoch": 8.03, "grad_norm": 6.662299633026123, "learning_rate": 9.985154954471885e-07, "loss": 0.0119, "step": 816300 }, { "epoch": 8.03, "grad_norm": 2.7773358821868896, "learning_rate": 9.9839137299294e-07, "loss": 0.0368, "step": 816325 }, { "epoch": 8.03, "grad_norm": 0.8059797286987305, "learning_rate": 9.982672505386916e-07, "loss": 0.0175, "step": 816350 }, { "epoch": 8.03, "grad_norm": 0.08186561614274979, "learning_rate": 9.98143128084443e-07, "loss": 0.0406, "step": 816375 }, { "epoch": 8.03, "grad_norm": 22.81245231628418, "learning_rate": 9.980190056301947e-07, "loss": 0.0253, "step": 816400 }, { "epoch": 8.03, "grad_norm": 0.026154231280088425, "learning_rate": 9.978948831759463e-07, "loss": 0.0326, "step": 816425 }, { "epoch": 8.03, "grad_norm": 1.6738016605377197, "learning_rate": 9.977707607216977e-07, "loss": 0.0364, "step": 816450 }, { "epoch": 8.03, "grad_norm": 1.320725917816162, "learning_rate": 9.976466382674491e-07, "loss": 0.0457, "step": 816475 }, { "epoch": 8.03, "grad_norm": 13.39712905883789, "learning_rate": 9.975225158132008e-07, "loss": 0.0402, "step": 816500 }, { "epoch": 8.03, "grad_norm": 0.5354979038238525, "learning_rate": 9.973983933589524e-07, "loss": 0.0628, "step": 816525 }, { "epoch": 8.03, "grad_norm": 15.384665489196777, "learning_rate": 9.972742709047038e-07, "loss": 0.0306, "step": 816550 }, { "epoch": 8.03, "grad_norm": 0.08616902679204941, "learning_rate": 9.971501484504553e-07, "loss": 0.0205, "step": 816575 }, { "epoch": 8.03, "grad_norm": 0.9327487349510193, "learning_rate": 9.970260259962069e-07, "loss": 0.0182, "step": 816600 }, { "epoch": 8.03, "grad_norm": 0.05942923203110695, "learning_rate": 9.969019035419585e-07, "loss": 0.0462, "step": 816625 }, { "epoch": 8.03, "grad_norm": 8.522928237915039, "learning_rate": 9.9677778108771e-07, "loss": 0.0253, "step": 816650 }, { "epoch": 8.03, "grad_norm": 0.19090034067630768, "learning_rate": 9.966536586334614e-07, "loss": 0.0423, "step": 816675 }, { "epoch": 8.03, "grad_norm": 11.300195693969727, "learning_rate": 9.96529536179213e-07, "loss": 0.0207, "step": 816700 }, { "epoch": 8.03, "grad_norm": 0.0857844203710556, "learning_rate": 9.964054137249646e-07, "loss": 0.0223, "step": 816725 }, { "epoch": 8.03, "grad_norm": 0.42301225662231445, "learning_rate": 9.96281291270716e-07, "loss": 0.0342, "step": 816750 }, { "epoch": 8.03, "grad_norm": 0.032986268401145935, "learning_rate": 9.961571688164677e-07, "loss": 0.0337, "step": 816775 }, { "epoch": 8.03, "grad_norm": 0.4536915421485901, "learning_rate": 9.960330463622191e-07, "loss": 0.0163, "step": 816800 }, { "epoch": 8.03, "grad_norm": 5.0511322021484375, "learning_rate": 9.959089239079708e-07, "loss": 0.0395, "step": 816825 }, { "epoch": 8.03, "grad_norm": 8.458473205566406, "learning_rate": 9.957848014537224e-07, "loss": 0.0207, "step": 816850 }, { "epoch": 8.03, "grad_norm": 1.101150393486023, "learning_rate": 9.956606789994738e-07, "loss": 0.0332, "step": 816875 }, { "epoch": 8.03, "grad_norm": 6.671269416809082, "learning_rate": 9.955365565452252e-07, "loss": 0.0327, "step": 816900 }, { "epoch": 8.03, "grad_norm": 0.05425921827554703, "learning_rate": 9.954124340909769e-07, "loss": 0.0368, "step": 816925 }, { "epoch": 8.03, "grad_norm": 9.736351013183594, "learning_rate": 9.952883116367285e-07, "loss": 0.0192, "step": 816950 }, { "epoch": 8.03, "grad_norm": 4.094801902770996, "learning_rate": 9.9516418918248e-07, "loss": 0.0359, "step": 816975 }, { "epoch": 8.03, "grad_norm": 4.596017360687256, "learning_rate": 9.950400667282314e-07, "loss": 0.0242, "step": 817000 }, { "epoch": 8.03, "grad_norm": 0.40644267201423645, "learning_rate": 9.94915944273983e-07, "loss": 0.0357, "step": 817025 }, { "epoch": 8.03, "grad_norm": 4.306000709533691, "learning_rate": 9.947918218197346e-07, "loss": 0.0284, "step": 817050 }, { "epoch": 8.03, "grad_norm": 0.020791178569197655, "learning_rate": 9.94667699365486e-07, "loss": 0.033, "step": 817075 }, { "epoch": 8.03, "grad_norm": 17.967594146728516, "learning_rate": 9.945435769112375e-07, "loss": 0.0288, "step": 817100 }, { "epoch": 8.03, "grad_norm": 19.717802047729492, "learning_rate": 9.944194544569891e-07, "loss": 0.0258, "step": 817125 }, { "epoch": 8.03, "grad_norm": 7.9527411460876465, "learning_rate": 9.942953320027407e-07, "loss": 0.036, "step": 817150 }, { "epoch": 8.03, "grad_norm": 0.2926117777824402, "learning_rate": 9.941712095484922e-07, "loss": 0.0333, "step": 817175 }, { "epoch": 8.03, "grad_norm": 8.071475982666016, "learning_rate": 9.940470870942438e-07, "loss": 0.031, "step": 817200 }, { "epoch": 8.04, "grad_norm": 7.974461078643799, "learning_rate": 9.939229646399952e-07, "loss": 0.0278, "step": 817225 }, { "epoch": 8.04, "grad_norm": 7.1681342124938965, "learning_rate": 9.937988421857469e-07, "loss": 0.0293, "step": 817250 }, { "epoch": 8.04, "grad_norm": 0.022605998441576958, "learning_rate": 9.936747197314985e-07, "loss": 0.0467, "step": 817275 }, { "epoch": 8.04, "grad_norm": 10.223575592041016, "learning_rate": 9.9355059727725e-07, "loss": 0.0289, "step": 817300 }, { "epoch": 8.04, "grad_norm": 0.15468275547027588, "learning_rate": 9.934264748230013e-07, "loss": 0.0238, "step": 817325 }, { "epoch": 8.04, "grad_norm": 23.2088565826416, "learning_rate": 9.93302352368753e-07, "loss": 0.0319, "step": 817350 }, { "epoch": 8.04, "grad_norm": 2.285689353942871, "learning_rate": 9.931782299145046e-07, "loss": 0.0349, "step": 817375 }, { "epoch": 8.04, "grad_norm": 8.671721458435059, "learning_rate": 9.93054107460256e-07, "loss": 0.0333, "step": 817400 }, { "epoch": 8.04, "grad_norm": 7.660439491271973, "learning_rate": 9.929299850060077e-07, "loss": 0.0313, "step": 817425 }, { "epoch": 8.04, "grad_norm": 2.158378839492798, "learning_rate": 9.928058625517593e-07, "loss": 0.0414, "step": 817450 }, { "epoch": 8.04, "grad_norm": 0.43045738339424133, "learning_rate": 9.926817400975107e-07, "loss": 0.0234, "step": 817475 }, { "epoch": 8.04, "grad_norm": 0.545613169670105, "learning_rate": 9.925576176432622e-07, "loss": 0.0211, "step": 817500 }, { "epoch": 8.04, "grad_norm": 0.24984997510910034, "learning_rate": 9.924334951890138e-07, "loss": 0.0414, "step": 817525 }, { "epoch": 8.04, "grad_norm": 10.81764030456543, "learning_rate": 9.923093727347654e-07, "loss": 0.0198, "step": 817550 }, { "epoch": 8.04, "grad_norm": 0.0042401463724672794, "learning_rate": 9.921852502805168e-07, "loss": 0.0346, "step": 817575 }, { "epoch": 8.04, "grad_norm": 13.632991790771484, "learning_rate": 9.920611278262683e-07, "loss": 0.022, "step": 817600 }, { "epoch": 8.04, "grad_norm": 1.210573434829712, "learning_rate": 9.9193700537202e-07, "loss": 0.0338, "step": 817625 }, { "epoch": 8.04, "grad_norm": 18.077978134155273, "learning_rate": 9.918128829177715e-07, "loss": 0.0242, "step": 817650 }, { "epoch": 8.04, "grad_norm": 0.17895224690437317, "learning_rate": 9.91688760463523e-07, "loss": 0.0339, "step": 817675 }, { "epoch": 8.04, "grad_norm": 3.1586532592773438, "learning_rate": 9.915646380092746e-07, "loss": 0.0284, "step": 817700 }, { "epoch": 8.04, "grad_norm": 0.10689333081245422, "learning_rate": 9.91440515555026e-07, "loss": 0.0242, "step": 817725 }, { "epoch": 8.04, "grad_norm": 13.31625747680664, "learning_rate": 9.913163931007777e-07, "loss": 0.0133, "step": 817750 }, { "epoch": 8.04, "grad_norm": 0.144727423787117, "learning_rate": 9.91192270646529e-07, "loss": 0.0434, "step": 817775 }, { "epoch": 8.04, "grad_norm": 17.95556640625, "learning_rate": 9.910681481922807e-07, "loss": 0.028, "step": 817800 }, { "epoch": 8.04, "grad_norm": 0.04229865223169327, "learning_rate": 9.909440257380321e-07, "loss": 0.031, "step": 817825 }, { "epoch": 8.04, "grad_norm": 5.089182376861572, "learning_rate": 9.908199032837838e-07, "loss": 0.0324, "step": 817850 }, { "epoch": 8.04, "grad_norm": 10.188255310058594, "learning_rate": 9.906957808295354e-07, "loss": 0.0482, "step": 817875 }, { "epoch": 8.04, "grad_norm": 1.1040271520614624, "learning_rate": 9.905716583752868e-07, "loss": 0.0135, "step": 817900 }, { "epoch": 8.04, "grad_norm": 0.014593944884836674, "learning_rate": 9.904475359210383e-07, "loss": 0.0289, "step": 817925 }, { "epoch": 8.04, "grad_norm": 9.065056800842285, "learning_rate": 9.903234134667899e-07, "loss": 0.0317, "step": 817950 }, { "epoch": 8.04, "grad_norm": 2.8844974040985107, "learning_rate": 9.901992910125415e-07, "loss": 0.0393, "step": 817975 }, { "epoch": 8.04, "grad_norm": 0.43474212288856506, "learning_rate": 9.90075168558293e-07, "loss": 0.0188, "step": 818000 }, { "epoch": 8.04, "grad_norm": 0.014965436421334743, "learning_rate": 9.899510461040444e-07, "loss": 0.0304, "step": 818025 }, { "epoch": 8.04, "grad_norm": 1.546370029449463, "learning_rate": 9.89826923649796e-07, "loss": 0.045, "step": 818050 }, { "epoch": 8.04, "grad_norm": 1.5789176225662231, "learning_rate": 9.897028011955476e-07, "loss": 0.0273, "step": 818075 }, { "epoch": 8.04, "grad_norm": 5.5849690437316895, "learning_rate": 9.89578678741299e-07, "loss": 0.0173, "step": 818100 }, { "epoch": 8.04, "grad_norm": 0.05174426734447479, "learning_rate": 9.894545562870507e-07, "loss": 0.0316, "step": 818125 }, { "epoch": 8.04, "grad_norm": 7.692268371582031, "learning_rate": 9.893304338328021e-07, "loss": 0.0257, "step": 818150 }, { "epoch": 8.04, "grad_norm": 0.25139540433883667, "learning_rate": 9.892063113785538e-07, "loss": 0.0221, "step": 818175 }, { "epoch": 8.04, "grad_norm": 3.620177984237671, "learning_rate": 9.890821889243052e-07, "loss": 0.0224, "step": 818200 }, { "epoch": 8.04, "grad_norm": 0.0031243176199495792, "learning_rate": 9.889580664700568e-07, "loss": 0.0279, "step": 818225 }, { "epoch": 8.05, "grad_norm": 11.041669845581055, "learning_rate": 9.888339440158082e-07, "loss": 0.04, "step": 818250 }, { "epoch": 8.05, "grad_norm": 3.4014556407928467, "learning_rate": 9.887098215615599e-07, "loss": 0.0285, "step": 818275 }, { "epoch": 8.05, "grad_norm": 0.10628607124090195, "learning_rate": 9.885856991073115e-07, "loss": 0.0237, "step": 818300 }, { "epoch": 8.05, "grad_norm": 0.22027254104614258, "learning_rate": 9.88461576653063e-07, "loss": 0.0218, "step": 818325 }, { "epoch": 8.05, "grad_norm": 71.92876434326172, "learning_rate": 9.883374541988144e-07, "loss": 0.0531, "step": 818350 }, { "epoch": 8.05, "grad_norm": 0.24985095858573914, "learning_rate": 9.88213331744566e-07, "loss": 0.0377, "step": 818375 }, { "epoch": 8.05, "grad_norm": 8.891006469726562, "learning_rate": 9.880892092903176e-07, "loss": 0.043, "step": 818400 }, { "epoch": 8.05, "grad_norm": 0.016063405200839043, "learning_rate": 9.87965086836069e-07, "loss": 0.0524, "step": 818425 }, { "epoch": 8.05, "grad_norm": 6.967666149139404, "learning_rate": 9.878409643818205e-07, "loss": 0.0279, "step": 818450 }, { "epoch": 8.05, "grad_norm": 0.7014737725257874, "learning_rate": 9.87716841927572e-07, "loss": 0.0192, "step": 818475 }, { "epoch": 8.05, "grad_norm": 4.140949249267578, "learning_rate": 9.875927194733237e-07, "loss": 0.0464, "step": 818500 }, { "epoch": 8.05, "grad_norm": 0.23637272417545319, "learning_rate": 9.874685970190752e-07, "loss": 0.0412, "step": 818525 }, { "epoch": 8.05, "grad_norm": 1.092777967453003, "learning_rate": 9.873444745648268e-07, "loss": 0.0193, "step": 818550 }, { "epoch": 8.05, "grad_norm": 1.572566032409668, "learning_rate": 9.872203521105782e-07, "loss": 0.0562, "step": 818575 }, { "epoch": 8.05, "grad_norm": 4.298625946044922, "learning_rate": 9.870962296563299e-07, "loss": 0.0432, "step": 818600 }, { "epoch": 8.05, "grad_norm": 0.00946731772273779, "learning_rate": 9.869721072020813e-07, "loss": 0.0219, "step": 818625 }, { "epoch": 8.05, "grad_norm": 8.400940895080566, "learning_rate": 9.86847984747833e-07, "loss": 0.0232, "step": 818650 }, { "epoch": 8.05, "grad_norm": 0.02602989785373211, "learning_rate": 9.867238622935843e-07, "loss": 0.0289, "step": 818675 }, { "epoch": 8.05, "grad_norm": 6.795307636260986, "learning_rate": 9.86599739839336e-07, "loss": 0.0251, "step": 818700 }, { "epoch": 8.05, "grad_norm": 0.11720651388168335, "learning_rate": 9.864756173850876e-07, "loss": 0.0376, "step": 818725 }, { "epoch": 8.05, "grad_norm": 0.5514710545539856, "learning_rate": 9.86351494930839e-07, "loss": 0.0311, "step": 818750 }, { "epoch": 8.05, "grad_norm": 3.761080741882324, "learning_rate": 9.862273724765907e-07, "loss": 0.0348, "step": 818775 }, { "epoch": 8.05, "grad_norm": 7.109446048736572, "learning_rate": 9.86103250022342e-07, "loss": 0.0311, "step": 818800 }, { "epoch": 8.05, "grad_norm": 5.137479782104492, "learning_rate": 9.859791275680937e-07, "loss": 0.0264, "step": 818825 }, { "epoch": 8.05, "grad_norm": 16.864469528198242, "learning_rate": 9.858550051138451e-07, "loss": 0.0523, "step": 818850 }, { "epoch": 8.05, "grad_norm": 0.05750872194766998, "learning_rate": 9.857308826595968e-07, "loss": 0.0321, "step": 818875 }, { "epoch": 8.05, "grad_norm": 11.63963508605957, "learning_rate": 9.856067602053484e-07, "loss": 0.0343, "step": 818900 }, { "epoch": 8.05, "grad_norm": 0.014701022766530514, "learning_rate": 9.854826377510998e-07, "loss": 0.0317, "step": 818925 }, { "epoch": 8.05, "grad_norm": 15.559247970581055, "learning_rate": 9.853585152968513e-07, "loss": 0.029, "step": 818950 }, { "epoch": 8.05, "grad_norm": 4.620745658874512, "learning_rate": 9.85234392842603e-07, "loss": 0.0364, "step": 818975 }, { "epoch": 8.05, "grad_norm": 2.284496307373047, "learning_rate": 9.851102703883545e-07, "loss": 0.0201, "step": 819000 }, { "epoch": 8.05, "grad_norm": 0.22022061049938202, "learning_rate": 9.84986147934106e-07, "loss": 0.0322, "step": 819025 }, { "epoch": 8.05, "grad_norm": 0.11445463448762894, "learning_rate": 9.848620254798574e-07, "loss": 0.027, "step": 819050 }, { "epoch": 8.05, "grad_norm": 1.3258934020996094, "learning_rate": 9.84737903025609e-07, "loss": 0.032, "step": 819075 }, { "epoch": 8.05, "grad_norm": 1.9167934656143188, "learning_rate": 9.846137805713606e-07, "loss": 0.0345, "step": 819100 }, { "epoch": 8.05, "grad_norm": 9.183721542358398, "learning_rate": 9.84489658117112e-07, "loss": 0.028, "step": 819125 }, { "epoch": 8.05, "grad_norm": 8.7457275390625, "learning_rate": 9.843655356628637e-07, "loss": 0.0407, "step": 819150 }, { "epoch": 8.05, "grad_norm": 0.03906386345624924, "learning_rate": 9.842414132086151e-07, "loss": 0.0301, "step": 819175 }, { "epoch": 8.05, "grad_norm": 3.96952486038208, "learning_rate": 9.841172907543668e-07, "loss": 0.0272, "step": 819200 }, { "epoch": 8.05, "grad_norm": 0.06212139129638672, "learning_rate": 9.839931683001182e-07, "loss": 0.0308, "step": 819225 }, { "epoch": 8.06, "grad_norm": 14.860325813293457, "learning_rate": 9.838690458458698e-07, "loss": 0.0153, "step": 819250 }, { "epoch": 8.06, "grad_norm": 0.0963071882724762, "learning_rate": 9.837449233916212e-07, "loss": 0.0546, "step": 819275 }, { "epoch": 8.06, "grad_norm": 16.312292098999023, "learning_rate": 9.836208009373729e-07, "loss": 0.0392, "step": 819300 }, { "epoch": 8.06, "grad_norm": 4.337834358215332, "learning_rate": 9.834966784831245e-07, "loss": 0.0307, "step": 819325 }, { "epoch": 8.06, "grad_norm": 5.464357852935791, "learning_rate": 9.83372556028876e-07, "loss": 0.0548, "step": 819350 }, { "epoch": 8.06, "grad_norm": 3.1334967613220215, "learning_rate": 9.832484335746274e-07, "loss": 0.0381, "step": 819375 }, { "epoch": 8.06, "grad_norm": 21.419763565063477, "learning_rate": 9.83124311120379e-07, "loss": 0.0234, "step": 819400 }, { "epoch": 8.06, "grad_norm": 0.06676993519067764, "learning_rate": 9.830001886661306e-07, "loss": 0.0306, "step": 819425 }, { "epoch": 8.06, "grad_norm": 8.58048152923584, "learning_rate": 9.82876066211882e-07, "loss": 0.0277, "step": 819450 }, { "epoch": 8.06, "grad_norm": 1.5965323448181152, "learning_rate": 9.827519437576335e-07, "loss": 0.0257, "step": 819475 }, { "epoch": 8.06, "grad_norm": 14.4673433303833, "learning_rate": 9.826278213033851e-07, "loss": 0.0314, "step": 819500 }, { "epoch": 8.06, "grad_norm": 0.0915851891040802, "learning_rate": 9.825086637473066e-07, "loss": 0.0482, "step": 819525 }, { "epoch": 8.06, "grad_norm": 5.440606594085693, "learning_rate": 9.823845412930582e-07, "loss": 0.0222, "step": 819550 }, { "epoch": 8.06, "grad_norm": 0.5401484370231628, "learning_rate": 9.822604188388098e-07, "loss": 0.0411, "step": 819575 }, { "epoch": 8.06, "grad_norm": 16.384353637695312, "learning_rate": 9.821362963845613e-07, "loss": 0.0243, "step": 819600 }, { "epoch": 8.06, "grad_norm": 0.24149298667907715, "learning_rate": 9.820121739303127e-07, "loss": 0.0334, "step": 819625 }, { "epoch": 8.06, "grad_norm": 21.39866065979004, "learning_rate": 9.818880514760643e-07, "loss": 0.0329, "step": 819650 }, { "epoch": 8.06, "grad_norm": 1.1081891059875488, "learning_rate": 9.81763929021816e-07, "loss": 0.0214, "step": 819675 }, { "epoch": 8.06, "grad_norm": 1.753395438194275, "learning_rate": 9.816398065675674e-07, "loss": 0.0213, "step": 819700 }, { "epoch": 8.06, "grad_norm": 0.22415511310100555, "learning_rate": 9.815156841133188e-07, "loss": 0.0363, "step": 819725 }, { "epoch": 8.06, "grad_norm": 6.875972747802734, "learning_rate": 9.813915616590704e-07, "loss": 0.0334, "step": 819750 }, { "epoch": 8.06, "grad_norm": 4.260354518890381, "learning_rate": 9.81267439204822e-07, "loss": 0.0219, "step": 819775 }, { "epoch": 8.06, "grad_norm": 1.6895372867584229, "learning_rate": 9.811433167505735e-07, "loss": 0.0333, "step": 819800 }, { "epoch": 8.06, "grad_norm": 6.215146541595459, "learning_rate": 9.81019194296325e-07, "loss": 0.026, "step": 819825 }, { "epoch": 8.06, "grad_norm": 16.446935653686523, "learning_rate": 9.808950718420765e-07, "loss": 0.0363, "step": 819850 }, { "epoch": 8.06, "grad_norm": 0.04033859074115753, "learning_rate": 9.807709493878282e-07, "loss": 0.0395, "step": 819875 }, { "epoch": 8.06, "grad_norm": 7.231367588043213, "learning_rate": 9.806468269335796e-07, "loss": 0.0169, "step": 819900 }, { "epoch": 8.06, "grad_norm": 0.12817656993865967, "learning_rate": 9.805227044793312e-07, "loss": 0.0461, "step": 819925 }, { "epoch": 8.06, "grad_norm": 5.546332836151123, "learning_rate": 9.803985820250827e-07, "loss": 0.0263, "step": 819950 }, { "epoch": 8.06, "grad_norm": 0.22777564823627472, "learning_rate": 9.802744595708343e-07, "loss": 0.0375, "step": 819975 }, { "epoch": 8.06, "grad_norm": 21.440095901489258, "learning_rate": 9.80150337116586e-07, "loss": 0.0218, "step": 820000 }, { "epoch": 8.06, "eval_loss": 0.8844733238220215, "eval_runtime": 6097.3001, "eval_samples_per_second": 1.553, "eval_steps_per_second": 0.194, "eval_wer": 0.11111826434043649, "step": 820000 }, { "epoch": 8.06, "grad_norm": 2.7713305950164795, "learning_rate": 9.800262146623374e-07, "loss": 0.0214, "step": 820025 }, { "epoch": 8.06, "grad_norm": 10.219597816467285, "learning_rate": 9.799020922080888e-07, "loss": 0.0197, "step": 820050 }, { "epoch": 8.06, "grad_norm": 0.05584937706589699, "learning_rate": 9.797779697538404e-07, "loss": 0.0547, "step": 820075 }, { "epoch": 8.06, "grad_norm": 7.167623043060303, "learning_rate": 9.79653847299592e-07, "loss": 0.0206, "step": 820100 }, { "epoch": 8.06, "grad_norm": 0.04165802523493767, "learning_rate": 9.795297248453435e-07, "loss": 0.03, "step": 820125 }, { "epoch": 8.06, "grad_norm": 10.886520385742188, "learning_rate": 9.79405602391095e-07, "loss": 0.0249, "step": 820150 }, { "epoch": 8.06, "grad_norm": 0.5854961276054382, "learning_rate": 9.792814799368465e-07, "loss": 0.0426, "step": 820175 }, { "epoch": 8.06, "grad_norm": 6.695341110229492, "learning_rate": 9.791573574825982e-07, "loss": 0.0292, "step": 820200 }, { "epoch": 8.06, "grad_norm": 0.017590979114174843, "learning_rate": 9.790332350283496e-07, "loss": 0.0465, "step": 820225 }, { "epoch": 8.06, "grad_norm": 5.943234443664551, "learning_rate": 9.789091125741012e-07, "loss": 0.0251, "step": 820250 }, { "epoch": 8.07, "grad_norm": 4.059099197387695, "learning_rate": 9.787849901198529e-07, "loss": 0.0467, "step": 820275 }, { "epoch": 8.07, "grad_norm": 6.658139705657959, "learning_rate": 9.786608676656043e-07, "loss": 0.0307, "step": 820300 }, { "epoch": 8.07, "grad_norm": 0.2540886402130127, "learning_rate": 9.785367452113557e-07, "loss": 0.0311, "step": 820325 }, { "epoch": 8.07, "grad_norm": 8.133537292480469, "learning_rate": 9.784126227571073e-07, "loss": 0.0125, "step": 820350 }, { "epoch": 8.07, "grad_norm": 0.10193254053592682, "learning_rate": 9.78288500302859e-07, "loss": 0.033, "step": 820375 }, { "epoch": 8.07, "grad_norm": 12.984596252441406, "learning_rate": 9.781643778486104e-07, "loss": 0.0253, "step": 820400 }, { "epoch": 8.07, "grad_norm": 0.4207887053489685, "learning_rate": 9.78040255394362e-07, "loss": 0.0428, "step": 820425 }, { "epoch": 8.07, "grad_norm": 8.399236679077148, "learning_rate": 9.779161329401135e-07, "loss": 0.0244, "step": 820450 }, { "epoch": 8.07, "grad_norm": 0.20975084602832794, "learning_rate": 9.77792010485865e-07, "loss": 0.0238, "step": 820475 }, { "epoch": 8.07, "grad_norm": 12.768651962280273, "learning_rate": 9.776678880316165e-07, "loss": 0.0318, "step": 820500 }, { "epoch": 8.07, "grad_norm": 3.7936174869537354, "learning_rate": 9.775437655773682e-07, "loss": 0.0269, "step": 820525 }, { "epoch": 8.07, "grad_norm": 8.855074882507324, "learning_rate": 9.774196431231196e-07, "loss": 0.019, "step": 820550 }, { "epoch": 8.07, "grad_norm": 0.08752608299255371, "learning_rate": 9.772955206688712e-07, "loss": 0.0397, "step": 820575 }, { "epoch": 8.07, "grad_norm": 3.7565433979034424, "learning_rate": 9.771713982146228e-07, "loss": 0.0202, "step": 820600 }, { "epoch": 8.07, "grad_norm": 0.1877480149269104, "learning_rate": 9.770472757603743e-07, "loss": 0.0221, "step": 820625 }, { "epoch": 8.07, "grad_norm": 0.05084480717778206, "learning_rate": 9.769231533061257e-07, "loss": 0.0283, "step": 820650 }, { "epoch": 8.07, "grad_norm": 0.1735844612121582, "learning_rate": 9.767990308518773e-07, "loss": 0.0356, "step": 820675 }, { "epoch": 8.07, "grad_norm": 3.066847562789917, "learning_rate": 9.76674908397629e-07, "loss": 0.0213, "step": 820700 }, { "epoch": 8.07, "grad_norm": 0.3389148712158203, "learning_rate": 9.765507859433804e-07, "loss": 0.0456, "step": 820725 }, { "epoch": 8.07, "grad_norm": 5.846251487731934, "learning_rate": 9.764266634891318e-07, "loss": 0.0285, "step": 820750 }, { "epoch": 8.07, "grad_norm": 0.5681164264678955, "learning_rate": 9.763025410348834e-07, "loss": 0.0237, "step": 820775 }, { "epoch": 8.07, "grad_norm": 9.377344131469727, "learning_rate": 9.76178418580635e-07, "loss": 0.0336, "step": 820800 }, { "epoch": 8.07, "grad_norm": 1.8796368837356567, "learning_rate": 9.760542961263865e-07, "loss": 0.0204, "step": 820825 }, { "epoch": 8.07, "grad_norm": 13.090789794921875, "learning_rate": 9.759301736721381e-07, "loss": 0.0351, "step": 820850 }, { "epoch": 8.07, "grad_norm": 0.4742295742034912, "learning_rate": 9.758060512178896e-07, "loss": 0.0318, "step": 820875 }, { "epoch": 8.07, "grad_norm": 19.48753547668457, "learning_rate": 9.756819287636412e-07, "loss": 0.0319, "step": 820900 }, { "epoch": 8.07, "grad_norm": 1.0890696048736572, "learning_rate": 9.755578063093926e-07, "loss": 0.0335, "step": 820925 }, { "epoch": 8.07, "grad_norm": 1.4815759658813477, "learning_rate": 9.754336838551443e-07, "loss": 0.0239, "step": 820950 }, { "epoch": 8.07, "grad_norm": 0.16380353271961212, "learning_rate": 9.753095614008957e-07, "loss": 0.0383, "step": 820975 }, { "epoch": 8.07, "grad_norm": 15.665739059448242, "learning_rate": 9.751854389466473e-07, "loss": 0.0282, "step": 821000 }, { "epoch": 8.07, "grad_norm": 0.4596416652202606, "learning_rate": 9.75061316492399e-07, "loss": 0.0234, "step": 821025 }, { "epoch": 8.07, "grad_norm": 16.19013214111328, "learning_rate": 9.749371940381504e-07, "loss": 0.0371, "step": 821050 }, { "epoch": 8.07, "grad_norm": 5.944682598114014, "learning_rate": 9.748130715839018e-07, "loss": 0.0288, "step": 821075 }, { "epoch": 8.07, "grad_norm": 10.083328247070312, "learning_rate": 9.746889491296534e-07, "loss": 0.0247, "step": 821100 }, { "epoch": 8.07, "grad_norm": 0.027064930647611618, "learning_rate": 9.74564826675405e-07, "loss": 0.0246, "step": 821125 }, { "epoch": 8.07, "grad_norm": 17.671306610107422, "learning_rate": 9.744407042211565e-07, "loss": 0.0188, "step": 821150 }, { "epoch": 8.07, "grad_norm": 0.017787974327802658, "learning_rate": 9.74316581766908e-07, "loss": 0.0461, "step": 821175 }, { "epoch": 8.07, "grad_norm": 53.969207763671875, "learning_rate": 9.741924593126595e-07, "loss": 0.0391, "step": 821200 }, { "epoch": 8.07, "grad_norm": 0.3061073124408722, "learning_rate": 9.740683368584112e-07, "loss": 0.0559, "step": 821225 }, { "epoch": 8.07, "grad_norm": 5.064033508300781, "learning_rate": 9.739442144041626e-07, "loss": 0.0298, "step": 821250 }, { "epoch": 8.07, "grad_norm": 0.0390680767595768, "learning_rate": 9.738200919499142e-07, "loss": 0.0225, "step": 821275 }, { "epoch": 8.08, "grad_norm": 0.060498807579278946, "learning_rate": 9.736959694956657e-07, "loss": 0.0275, "step": 821300 }, { "epoch": 8.08, "grad_norm": 0.013057911768555641, "learning_rate": 9.735718470414173e-07, "loss": 0.0201, "step": 821325 }, { "epoch": 8.08, "grad_norm": 13.27996826171875, "learning_rate": 9.734477245871687e-07, "loss": 0.0286, "step": 821350 }, { "epoch": 8.08, "grad_norm": 0.005963712930679321, "learning_rate": 9.733236021329204e-07, "loss": 0.031, "step": 821375 }, { "epoch": 8.08, "grad_norm": 6.815277576446533, "learning_rate": 9.731994796786718e-07, "loss": 0.0231, "step": 821400 }, { "epoch": 8.08, "grad_norm": 2.3739168643951416, "learning_rate": 9.730753572244234e-07, "loss": 0.0469, "step": 821425 }, { "epoch": 8.08, "grad_norm": 18.444042205810547, "learning_rate": 9.72951234770175e-07, "loss": 0.0212, "step": 821450 }, { "epoch": 8.08, "grad_norm": 1.6115107536315918, "learning_rate": 9.728271123159265e-07, "loss": 0.0319, "step": 821475 }, { "epoch": 8.08, "grad_norm": 2.5643184185028076, "learning_rate": 9.727029898616779e-07, "loss": 0.0225, "step": 821500 }, { "epoch": 8.08, "grad_norm": 0.43236982822418213, "learning_rate": 9.725788674074295e-07, "loss": 0.0167, "step": 821525 }, { "epoch": 8.08, "grad_norm": 18.197824478149414, "learning_rate": 9.72459709851351e-07, "loss": 0.0426, "step": 821550 }, { "epoch": 8.08, "grad_norm": 1.1276347637176514, "learning_rate": 9.723355873971026e-07, "loss": 0.0306, "step": 821575 }, { "epoch": 8.08, "grad_norm": 9.110342979431152, "learning_rate": 9.72211464942854e-07, "loss": 0.0264, "step": 821600 }, { "epoch": 8.08, "grad_norm": 1.9489883184432983, "learning_rate": 9.720873424886057e-07, "loss": 0.0262, "step": 821625 }, { "epoch": 8.08, "grad_norm": 9.93342399597168, "learning_rate": 9.71963220034357e-07, "loss": 0.0344, "step": 821650 }, { "epoch": 8.08, "grad_norm": 1.018157720565796, "learning_rate": 9.718390975801087e-07, "loss": 0.041, "step": 821675 }, { "epoch": 8.08, "grad_norm": 5.879621982574463, "learning_rate": 9.717149751258602e-07, "loss": 0.0285, "step": 821700 }, { "epoch": 8.08, "grad_norm": 6.6263203620910645, "learning_rate": 9.715908526716118e-07, "loss": 0.0333, "step": 821725 }, { "epoch": 8.08, "grad_norm": 5.08103084564209, "learning_rate": 9.714667302173634e-07, "loss": 0.0461, "step": 821750 }, { "epoch": 8.08, "grad_norm": 0.04644698649644852, "learning_rate": 9.713426077631148e-07, "loss": 0.0371, "step": 821775 }, { "epoch": 8.08, "grad_norm": 13.314580917358398, "learning_rate": 9.712184853088665e-07, "loss": 0.0246, "step": 821800 }, { "epoch": 8.08, "grad_norm": 0.014132298529148102, "learning_rate": 9.71094362854618e-07, "loss": 0.0354, "step": 821825 }, { "epoch": 8.08, "grad_norm": 13.730232238769531, "learning_rate": 9.709702404003695e-07, "loss": 0.0294, "step": 821850 }, { "epoch": 8.08, "grad_norm": 0.46428412199020386, "learning_rate": 9.70846117946121e-07, "loss": 0.041, "step": 821875 }, { "epoch": 8.08, "grad_norm": 7.1992082595825195, "learning_rate": 9.707219954918726e-07, "loss": 0.0342, "step": 821900 }, { "epoch": 8.08, "grad_norm": 0.571448028087616, "learning_rate": 9.70597873037624e-07, "loss": 0.0402, "step": 821925 }, { "epoch": 8.08, "grad_norm": 4.6168646812438965, "learning_rate": 9.704737505833757e-07, "loss": 0.0166, "step": 821950 }, { "epoch": 8.08, "grad_norm": 0.018920589238405228, "learning_rate": 9.703496281291273e-07, "loss": 0.0273, "step": 821975 }, { "epoch": 8.08, "grad_norm": 9.856574058532715, "learning_rate": 9.702255056748787e-07, "loss": 0.0245, "step": 822000 }, { "epoch": 8.08, "grad_norm": 1.6720139980316162, "learning_rate": 9.701013832206301e-07, "loss": 0.0359, "step": 822025 }, { "epoch": 8.08, "grad_norm": 5.602268695831299, "learning_rate": 9.699772607663818e-07, "loss": 0.0286, "step": 822050 }, { "epoch": 8.08, "grad_norm": 0.022114107385277748, "learning_rate": 9.698531383121334e-07, "loss": 0.0176, "step": 822075 }, { "epoch": 8.08, "grad_norm": 5.704996109008789, "learning_rate": 9.697290158578848e-07, "loss": 0.0145, "step": 822100 }, { "epoch": 8.08, "grad_norm": 0.026005856692790985, "learning_rate": 9.696048934036363e-07, "loss": 0.0316, "step": 822125 }, { "epoch": 8.08, "grad_norm": 14.682440757751465, "learning_rate": 9.694807709493879e-07, "loss": 0.0333, "step": 822150 }, { "epoch": 8.08, "grad_norm": 0.015696045011281967, "learning_rate": 9.693566484951395e-07, "loss": 0.0226, "step": 822175 }, { "epoch": 8.08, "grad_norm": 10.55428695678711, "learning_rate": 9.69232526040891e-07, "loss": 0.0298, "step": 822200 }, { "epoch": 8.08, "grad_norm": 1.940589189529419, "learning_rate": 9.691084035866426e-07, "loss": 0.0354, "step": 822225 }, { "epoch": 8.08, "grad_norm": 8.791291236877441, "learning_rate": 9.68984281132394e-07, "loss": 0.0407, "step": 822250 }, { "epoch": 8.08, "grad_norm": 0.4142593741416931, "learning_rate": 9.688601586781456e-07, "loss": 0.0356, "step": 822275 }, { "epoch": 8.08, "grad_norm": 4.811910152435303, "learning_rate": 9.68736036223897e-07, "loss": 0.0162, "step": 822300 }, { "epoch": 8.09, "grad_norm": 2.5889675617218018, "learning_rate": 9.686119137696487e-07, "loss": 0.0275, "step": 822325 }, { "epoch": 8.09, "grad_norm": 4.605923652648926, "learning_rate": 9.684877913154001e-07, "loss": 0.0306, "step": 822350 }, { "epoch": 8.09, "grad_norm": 0.06899354606866837, "learning_rate": 9.683636688611518e-07, "loss": 0.0254, "step": 822375 }, { "epoch": 8.09, "grad_norm": 13.298063278198242, "learning_rate": 9.682395464069034e-07, "loss": 0.036, "step": 822400 }, { "epoch": 8.09, "grad_norm": 6.932508945465088, "learning_rate": 9.681154239526548e-07, "loss": 0.0514, "step": 822425 }, { "epoch": 8.09, "grad_norm": 22.485280990600586, "learning_rate": 9.679913014984062e-07, "loss": 0.0426, "step": 822450 }, { "epoch": 8.09, "grad_norm": 2.0265612602233887, "learning_rate": 9.678671790441579e-07, "loss": 0.0428, "step": 822475 }, { "epoch": 8.09, "grad_norm": 6.354029655456543, "learning_rate": 9.677430565899095e-07, "loss": 0.0154, "step": 822500 }, { "epoch": 8.09, "grad_norm": 7.758678436279297, "learning_rate": 9.67618934135661e-07, "loss": 0.031, "step": 822525 }, { "epoch": 8.09, "grad_norm": 2.837463855743408, "learning_rate": 9.674948116814124e-07, "loss": 0.0163, "step": 822550 }, { "epoch": 8.09, "grad_norm": 0.09698918461799622, "learning_rate": 9.67370689227164e-07, "loss": 0.0294, "step": 822575 }, { "epoch": 8.09, "grad_norm": 13.792325019836426, "learning_rate": 9.672465667729156e-07, "loss": 0.0334, "step": 822600 }, { "epoch": 8.09, "grad_norm": 0.008756966330111027, "learning_rate": 9.67122444318667e-07, "loss": 0.0257, "step": 822625 }, { "epoch": 8.09, "grad_norm": 9.382197380065918, "learning_rate": 9.669983218644187e-07, "loss": 0.0211, "step": 822650 }, { "epoch": 8.09, "grad_norm": 0.012702545151114464, "learning_rate": 9.668741994101701e-07, "loss": 0.027, "step": 822675 }, { "epoch": 8.09, "grad_norm": 9.795530319213867, "learning_rate": 9.667500769559217e-07, "loss": 0.0305, "step": 822700 }, { "epoch": 8.09, "grad_norm": 2.138458013534546, "learning_rate": 9.666259545016732e-07, "loss": 0.0246, "step": 822725 }, { "epoch": 8.09, "grad_norm": 6.294410228729248, "learning_rate": 9.665018320474248e-07, "loss": 0.0247, "step": 822750 }, { "epoch": 8.09, "grad_norm": 10.555413246154785, "learning_rate": 9.663777095931762e-07, "loss": 0.0364, "step": 822775 }, { "epoch": 8.09, "grad_norm": 0.2182975709438324, "learning_rate": 9.662535871389279e-07, "loss": 0.0302, "step": 822800 }, { "epoch": 8.09, "grad_norm": 0.028425307944417, "learning_rate": 9.661294646846795e-07, "loss": 0.0505, "step": 822825 }, { "epoch": 8.09, "grad_norm": 15.088836669921875, "learning_rate": 9.66005342230431e-07, "loss": 0.034, "step": 822850 }, { "epoch": 8.09, "grad_norm": 1.0333987474441528, "learning_rate": 9.658812197761823e-07, "loss": 0.0376, "step": 822875 }, { "epoch": 8.09, "grad_norm": 7.397940158843994, "learning_rate": 9.65757097321934e-07, "loss": 0.0246, "step": 822900 }, { "epoch": 8.09, "grad_norm": 0.4931032359600067, "learning_rate": 9.656329748676856e-07, "loss": 0.0514, "step": 822925 }, { "epoch": 8.09, "grad_norm": 8.341789245605469, "learning_rate": 9.65508852413437e-07, "loss": 0.0322, "step": 822950 }, { "epoch": 8.09, "grad_norm": 1.1896450519561768, "learning_rate": 9.653847299591885e-07, "loss": 0.0195, "step": 822975 }, { "epoch": 8.09, "grad_norm": 15.909077644348145, "learning_rate": 9.6526060750494e-07, "loss": 0.0299, "step": 823000 }, { "epoch": 8.09, "grad_norm": 0.10475321859121323, "learning_rate": 9.651364850506917e-07, "loss": 0.0386, "step": 823025 }, { "epoch": 8.09, "grad_norm": 8.081611633300781, "learning_rate": 9.650123625964431e-07, "loss": 0.021, "step": 823050 }, { "epoch": 8.09, "grad_norm": 3.2466769218444824, "learning_rate": 9.648882401421948e-07, "loss": 0.0473, "step": 823075 }, { "epoch": 8.09, "grad_norm": 1.5795111656188965, "learning_rate": 9.647641176879462e-07, "loss": 0.0202, "step": 823100 }, { "epoch": 8.09, "grad_norm": 1.8620878458023071, "learning_rate": 9.646399952336978e-07, "loss": 0.0278, "step": 823125 }, { "epoch": 8.09, "grad_norm": 1.0058872699737549, "learning_rate": 9.645158727794495e-07, "loss": 0.0214, "step": 823150 }, { "epoch": 8.09, "grad_norm": 0.008157927542924881, "learning_rate": 9.64391750325201e-07, "loss": 0.0328, "step": 823175 }, { "epoch": 8.09, "grad_norm": 10.440773963928223, "learning_rate": 9.642676278709525e-07, "loss": 0.0229, "step": 823200 }, { "epoch": 8.09, "grad_norm": 0.12090843915939331, "learning_rate": 9.64143505416704e-07, "loss": 0.0415, "step": 823225 }, { "epoch": 8.09, "grad_norm": 10.883148193359375, "learning_rate": 9.640193829624556e-07, "loss": 0.0182, "step": 823250 }, { "epoch": 8.09, "grad_norm": 0.029503757134079933, "learning_rate": 9.63895260508207e-07, "loss": 0.0245, "step": 823275 }, { "epoch": 8.09, "grad_norm": 53.71290969848633, "learning_rate": 9.637711380539587e-07, "loss": 0.0303, "step": 823300 }, { "epoch": 8.1, "grad_norm": 1.1748813390731812, "learning_rate": 9.636470155997103e-07, "loss": 0.0267, "step": 823325 }, { "epoch": 8.1, "grad_norm": 18.756359100341797, "learning_rate": 9.635228931454617e-07, "loss": 0.0205, "step": 823350 }, { "epoch": 8.1, "grad_norm": 2.8329312801361084, "learning_rate": 9.633987706912131e-07, "loss": 0.0225, "step": 823375 }, { "epoch": 8.1, "grad_norm": 10.886706352233887, "learning_rate": 9.632746482369648e-07, "loss": 0.0435, "step": 823400 }, { "epoch": 8.1, "grad_norm": 1.931923747062683, "learning_rate": 9.631505257827164e-07, "loss": 0.0372, "step": 823425 }, { "epoch": 8.1, "grad_norm": 11.637955665588379, "learning_rate": 9.630264033284678e-07, "loss": 0.0186, "step": 823450 }, { "epoch": 8.1, "grad_norm": 0.23456637561321259, "learning_rate": 9.629022808742192e-07, "loss": 0.0476, "step": 823475 }, { "epoch": 8.1, "grad_norm": 11.36982250213623, "learning_rate": 9.627781584199709e-07, "loss": 0.0227, "step": 823500 }, { "epoch": 8.1, "grad_norm": 1.6344151496887207, "learning_rate": 9.626540359657225e-07, "loss": 0.0421, "step": 823525 }, { "epoch": 8.1, "grad_norm": 13.72182559967041, "learning_rate": 9.62529913511474e-07, "loss": 0.0251, "step": 823550 }, { "epoch": 8.1, "grad_norm": 1.8495701551437378, "learning_rate": 9.624107559553954e-07, "loss": 0.0438, "step": 823575 }, { "epoch": 8.1, "grad_norm": 9.97087287902832, "learning_rate": 9.62286633501147e-07, "loss": 0.0393, "step": 823600 }, { "epoch": 8.1, "grad_norm": 0.2727336585521698, "learning_rate": 9.621625110468985e-07, "loss": 0.025, "step": 823625 }, { "epoch": 8.1, "grad_norm": 5.553001403808594, "learning_rate": 9.6203838859265e-07, "loss": 0.0217, "step": 823650 }, { "epoch": 8.1, "grad_norm": 0.10035260021686554, "learning_rate": 9.619142661384017e-07, "loss": 0.0295, "step": 823675 }, { "epoch": 8.1, "grad_norm": 0.1038818284869194, "learning_rate": 9.617901436841531e-07, "loss": 0.0272, "step": 823700 }, { "epoch": 8.1, "grad_norm": 0.6053160429000854, "learning_rate": 9.616660212299046e-07, "loss": 0.017, "step": 823725 }, { "epoch": 8.1, "grad_norm": 11.29149055480957, "learning_rate": 9.615418987756562e-07, "loss": 0.0237, "step": 823750 }, { "epoch": 8.1, "grad_norm": 2.5851480960845947, "learning_rate": 9.614177763214078e-07, "loss": 0.0561, "step": 823775 }, { "epoch": 8.1, "grad_norm": 6.802325248718262, "learning_rate": 9.612936538671593e-07, "loss": 0.0181, "step": 823800 }, { "epoch": 8.1, "grad_norm": 0.05633306875824928, "learning_rate": 9.611695314129107e-07, "loss": 0.0254, "step": 823825 }, { "epoch": 8.1, "grad_norm": 14.506759643554688, "learning_rate": 9.610454089586623e-07, "loss": 0.0375, "step": 823850 }, { "epoch": 8.1, "grad_norm": 0.029942026361823082, "learning_rate": 9.60921286504414e-07, "loss": 0.0398, "step": 823875 }, { "epoch": 8.1, "grad_norm": 29.53411865234375, "learning_rate": 9.607971640501654e-07, "loss": 0.044, "step": 823900 }, { "epoch": 8.1, "grad_norm": 0.28120157122612, "learning_rate": 9.60673041595917e-07, "loss": 0.0391, "step": 823925 }, { "epoch": 8.1, "grad_norm": 4.068558692932129, "learning_rate": 9.605489191416684e-07, "loss": 0.0177, "step": 823950 }, { "epoch": 8.1, "grad_norm": 0.006137450225651264, "learning_rate": 9.6042479668742e-07, "loss": 0.0316, "step": 823975 }, { "epoch": 8.1, "grad_norm": 0.26442697644233704, "learning_rate": 9.603006742331715e-07, "loss": 0.0261, "step": 824000 }, { "epoch": 8.1, "grad_norm": 0.7183181643486023, "learning_rate": 9.601765517789231e-07, "loss": 0.0298, "step": 824025 }, { "epoch": 8.1, "grad_norm": 1.4128406047821045, "learning_rate": 9.600524293246746e-07, "loss": 0.0277, "step": 824050 }, { "epoch": 8.1, "grad_norm": 0.3715685307979584, "learning_rate": 9.599283068704262e-07, "loss": 0.0302, "step": 824075 }, { "epoch": 8.1, "grad_norm": 6.418599605560303, "learning_rate": 9.598041844161778e-07, "loss": 0.0368, "step": 824100 }, { "epoch": 8.1, "grad_norm": 1.40328848361969, "learning_rate": 9.596800619619292e-07, "loss": 0.0335, "step": 824125 }, { "epoch": 8.1, "grad_norm": 9.166359901428223, "learning_rate": 9.595559395076807e-07, "loss": 0.0319, "step": 824150 }, { "epoch": 8.1, "grad_norm": 0.007921232841908932, "learning_rate": 9.594318170534323e-07, "loss": 0.0264, "step": 824175 }, { "epoch": 8.1, "grad_norm": 13.81424331665039, "learning_rate": 9.59307694599184e-07, "loss": 0.0195, "step": 824200 }, { "epoch": 8.1, "grad_norm": 0.013684466481208801, "learning_rate": 9.591835721449354e-07, "loss": 0.0326, "step": 824225 }, { "epoch": 8.1, "grad_norm": 3.104076385498047, "learning_rate": 9.590594496906868e-07, "loss": 0.0243, "step": 824250 }, { "epoch": 8.1, "grad_norm": 2.609341621398926, "learning_rate": 9.589353272364384e-07, "loss": 0.0215, "step": 824275 }, { "epoch": 8.1, "grad_norm": 0.9442167282104492, "learning_rate": 9.5881120478219e-07, "loss": 0.0185, "step": 824300 }, { "epoch": 8.1, "grad_norm": 3.311767816543579, "learning_rate": 9.586870823279415e-07, "loss": 0.0237, "step": 824325 }, { "epoch": 8.11, "grad_norm": 2.903055429458618, "learning_rate": 9.585629598736931e-07, "loss": 0.0298, "step": 824350 }, { "epoch": 8.11, "grad_norm": 0.013738060370087624, "learning_rate": 9.584388374194445e-07, "loss": 0.0297, "step": 824375 }, { "epoch": 8.11, "grad_norm": 5.002981662750244, "learning_rate": 9.583147149651962e-07, "loss": 0.0325, "step": 824400 }, { "epoch": 8.11, "grad_norm": 1.0229787826538086, "learning_rate": 9.581905925109476e-07, "loss": 0.0275, "step": 824425 }, { "epoch": 8.11, "grad_norm": 2.2905893325805664, "learning_rate": 9.580664700566992e-07, "loss": 0.0324, "step": 824450 }, { "epoch": 8.11, "grad_norm": 1.2461656332015991, "learning_rate": 9.579423476024507e-07, "loss": 0.0214, "step": 824475 }, { "epoch": 8.11, "grad_norm": 22.597564697265625, "learning_rate": 9.578182251482023e-07, "loss": 0.0557, "step": 824500 }, { "epoch": 8.11, "grad_norm": 0.03344917669892311, "learning_rate": 9.57694102693954e-07, "loss": 0.0252, "step": 824525 }, { "epoch": 8.11, "grad_norm": 14.067805290222168, "learning_rate": 9.575699802397053e-07, "loss": 0.03, "step": 824550 }, { "epoch": 8.11, "grad_norm": 7.1683430671691895, "learning_rate": 9.574458577854568e-07, "loss": 0.0378, "step": 824575 }, { "epoch": 8.11, "grad_norm": 20.271631240844727, "learning_rate": 9.573217353312084e-07, "loss": 0.0521, "step": 824600 }, { "epoch": 8.11, "grad_norm": 0.034161150455474854, "learning_rate": 9.5719761287696e-07, "loss": 0.0419, "step": 824625 }, { "epoch": 8.11, "grad_norm": 12.510076522827148, "learning_rate": 9.570734904227115e-07, "loss": 0.0264, "step": 824650 }, { "epoch": 8.11, "grad_norm": 0.02208552695810795, "learning_rate": 9.56949367968463e-07, "loss": 0.0293, "step": 824675 }, { "epoch": 8.11, "grad_norm": 28.5590877532959, "learning_rate": 9.568252455142147e-07, "loss": 0.0337, "step": 824700 }, { "epoch": 8.11, "grad_norm": 0.016372546553611755, "learning_rate": 9.567011230599662e-07, "loss": 0.0535, "step": 824725 }, { "epoch": 8.11, "grad_norm": 5.867908000946045, "learning_rate": 9.565770006057176e-07, "loss": 0.0175, "step": 824750 }, { "epoch": 8.11, "grad_norm": 1.3839620351791382, "learning_rate": 9.564528781514692e-07, "loss": 0.0163, "step": 824775 }, { "epoch": 8.11, "grad_norm": 0.6751582622528076, "learning_rate": 9.563287556972208e-07, "loss": 0.0191, "step": 824800 }, { "epoch": 8.11, "grad_norm": 3.3566317558288574, "learning_rate": 9.562046332429723e-07, "loss": 0.0529, "step": 824825 }, { "epoch": 8.11, "grad_norm": 1.3828696012496948, "learning_rate": 9.560805107887237e-07, "loss": 0.0205, "step": 824850 }, { "epoch": 8.11, "grad_norm": 0.1449315994977951, "learning_rate": 9.559563883344753e-07, "loss": 0.0337, "step": 824875 }, { "epoch": 8.11, "grad_norm": 11.772989273071289, "learning_rate": 9.55832265880227e-07, "loss": 0.0322, "step": 824900 }, { "epoch": 8.11, "grad_norm": 0.10530322045087814, "learning_rate": 9.557081434259784e-07, "loss": 0.042, "step": 824925 }, { "epoch": 8.11, "grad_norm": 5.20383882522583, "learning_rate": 9.5558402097173e-07, "loss": 0.0198, "step": 824950 }, { "epoch": 8.11, "grad_norm": 1.8601194620132446, "learning_rate": 9.554598985174814e-07, "loss": 0.0198, "step": 824975 }, { "epoch": 8.11, "grad_norm": 9.21115493774414, "learning_rate": 9.55335776063233e-07, "loss": 0.0402, "step": 825000 }, { "epoch": 8.11, "grad_norm": 2.4520480632781982, "learning_rate": 9.552116536089845e-07, "loss": 0.021, "step": 825025 }, { "epoch": 8.11, "grad_norm": 14.199732780456543, "learning_rate": 9.550875311547361e-07, "loss": 0.0428, "step": 825050 }, { "epoch": 8.11, "grad_norm": 0.24627983570098877, "learning_rate": 9.549634087004876e-07, "loss": 0.0245, "step": 825075 }, { "epoch": 8.11, "grad_norm": 0.6999624967575073, "learning_rate": 9.548392862462392e-07, "loss": 0.0191, "step": 825100 }, { "epoch": 8.11, "grad_norm": 3.6044883728027344, "learning_rate": 9.547151637919908e-07, "loss": 0.022, "step": 825125 }, { "epoch": 8.11, "grad_norm": 1.7969521284103394, "learning_rate": 9.545910413377423e-07, "loss": 0.0199, "step": 825150 }, { "epoch": 8.11, "grad_norm": 0.01088922843337059, "learning_rate": 9.544669188834937e-07, "loss": 0.0455, "step": 825175 }, { "epoch": 8.11, "grad_norm": 25.271055221557617, "learning_rate": 9.543427964292453e-07, "loss": 0.0425, "step": 825200 }, { "epoch": 8.11, "grad_norm": 0.032226137816905975, "learning_rate": 9.54218673974997e-07, "loss": 0.0335, "step": 825225 }, { "epoch": 8.11, "grad_norm": 1.0263062715530396, "learning_rate": 9.540945515207484e-07, "loss": 0.0222, "step": 825250 }, { "epoch": 8.11, "grad_norm": 0.013108636252582073, "learning_rate": 9.539704290664998e-07, "loss": 0.0482, "step": 825275 }, { "epoch": 8.11, "grad_norm": 7.307120323181152, "learning_rate": 9.538463066122514e-07, "loss": 0.0293, "step": 825300 }, { "epoch": 8.11, "grad_norm": 0.02476334758102894, "learning_rate": 9.53722184158003e-07, "loss": 0.0497, "step": 825325 }, { "epoch": 8.11, "grad_norm": 7.3527631759643555, "learning_rate": 9.535980617037545e-07, "loss": 0.036, "step": 825350 }, { "epoch": 8.12, "grad_norm": 1.7833781242370605, "learning_rate": 9.534739392495061e-07, "loss": 0.0384, "step": 825375 }, { "epoch": 8.12, "grad_norm": 1.3881067037582397, "learning_rate": 9.533498167952577e-07, "loss": 0.0418, "step": 825400 }, { "epoch": 8.12, "grad_norm": 0.061749864369630814, "learning_rate": 9.532256943410091e-07, "loss": 0.0239, "step": 825425 }, { "epoch": 8.12, "grad_norm": 0.5424678325653076, "learning_rate": 9.531015718867606e-07, "loss": 0.0313, "step": 825450 }, { "epoch": 8.12, "grad_norm": 1.7950475215911865, "learning_rate": 9.529774494325122e-07, "loss": 0.0305, "step": 825475 }, { "epoch": 8.12, "grad_norm": 27.77246856689453, "learning_rate": 9.528533269782638e-07, "loss": 0.0238, "step": 825500 }, { "epoch": 8.12, "grad_norm": 0.05851074680685997, "learning_rate": 9.527292045240152e-07, "loss": 0.0367, "step": 825525 }, { "epoch": 8.12, "grad_norm": 2.3537514209747314, "learning_rate": 9.526050820697668e-07, "loss": 0.0411, "step": 825550 }, { "epoch": 8.12, "grad_norm": 0.05686432123184204, "learning_rate": 9.524859245136883e-07, "loss": 0.0241, "step": 825575 }, { "epoch": 8.12, "grad_norm": 14.943445205688477, "learning_rate": 9.523618020594398e-07, "loss": 0.0301, "step": 825600 }, { "epoch": 8.12, "grad_norm": 4.27220344543457, "learning_rate": 9.522376796051913e-07, "loss": 0.0242, "step": 825625 }, { "epoch": 8.12, "grad_norm": 8.761829376220703, "learning_rate": 9.52113557150943e-07, "loss": 0.0225, "step": 825650 }, { "epoch": 8.12, "grad_norm": 0.12615524232387543, "learning_rate": 9.519894346966944e-07, "loss": 0.0331, "step": 825675 }, { "epoch": 8.12, "grad_norm": 6.229404926300049, "learning_rate": 9.518653122424459e-07, "loss": 0.0329, "step": 825700 }, { "epoch": 8.12, "grad_norm": 4.140352249145508, "learning_rate": 9.517411897881976e-07, "loss": 0.0395, "step": 825725 }, { "epoch": 8.12, "grad_norm": 10.410106658935547, "learning_rate": 9.516170673339491e-07, "loss": 0.0209, "step": 825750 }, { "epoch": 8.12, "grad_norm": 0.04670659452676773, "learning_rate": 9.514929448797006e-07, "loss": 0.0267, "step": 825775 }, { "epoch": 8.12, "grad_norm": 15.202935218811035, "learning_rate": 9.513688224254521e-07, "loss": 0.0521, "step": 825800 }, { "epoch": 8.12, "grad_norm": 0.08967945724725723, "learning_rate": 9.512446999712037e-07, "loss": 0.0252, "step": 825825 }, { "epoch": 8.12, "grad_norm": 1.868355631828308, "learning_rate": 9.511205775169552e-07, "loss": 0.0297, "step": 825850 }, { "epoch": 8.12, "grad_norm": 0.24074988067150116, "learning_rate": 9.509964550627067e-07, "loss": 0.0168, "step": 825875 }, { "epoch": 8.12, "grad_norm": 4.719931125640869, "learning_rate": 9.508723326084584e-07, "loss": 0.0281, "step": 825900 }, { "epoch": 8.12, "grad_norm": 0.003932908643037081, "learning_rate": 9.507482101542098e-07, "loss": 0.0176, "step": 825925 }, { "epoch": 8.12, "grad_norm": 10.198018074035645, "learning_rate": 9.506240876999613e-07, "loss": 0.0242, "step": 825950 }, { "epoch": 8.12, "grad_norm": 0.11016843467950821, "learning_rate": 9.50499965245713e-07, "loss": 0.0219, "step": 825975 }, { "epoch": 8.12, "grad_norm": 0.8448407053947449, "learning_rate": 9.503758427914645e-07, "loss": 0.0325, "step": 826000 }, { "epoch": 8.12, "grad_norm": 0.11395201832056046, "learning_rate": 9.502517203372159e-07, "loss": 0.0253, "step": 826025 }, { "epoch": 8.12, "grad_norm": 7.592854976654053, "learning_rate": 9.501275978829674e-07, "loss": 0.0303, "step": 826050 }, { "epoch": 8.12, "grad_norm": 0.046377915889024734, "learning_rate": 9.500034754287191e-07, "loss": 0.0357, "step": 826075 }, { "epoch": 8.12, "grad_norm": 0.26571622490882874, "learning_rate": 9.498793529744706e-07, "loss": 0.0344, "step": 826100 }, { "epoch": 8.12, "grad_norm": 0.2823801636695862, "learning_rate": 9.49755230520222e-07, "loss": 0.0271, "step": 826125 }, { "epoch": 8.12, "grad_norm": 12.265907287597656, "learning_rate": 9.496311080659737e-07, "loss": 0.0331, "step": 826150 }, { "epoch": 8.12, "grad_norm": 2.6602141857147217, "learning_rate": 9.495069856117252e-07, "loss": 0.0477, "step": 826175 }, { "epoch": 8.12, "grad_norm": 13.498960494995117, "learning_rate": 9.493828631574767e-07, "loss": 0.0248, "step": 826200 }, { "epoch": 8.12, "grad_norm": 2.7595698833465576, "learning_rate": 9.492587407032284e-07, "loss": 0.0343, "step": 826225 }, { "epoch": 8.12, "grad_norm": 2.382737159729004, "learning_rate": 9.491346182489798e-07, "loss": 0.0226, "step": 826250 }, { "epoch": 8.12, "grad_norm": 0.276554137468338, "learning_rate": 9.490104957947313e-07, "loss": 0.0353, "step": 826275 }, { "epoch": 8.12, "grad_norm": 8.166034698486328, "learning_rate": 9.488863733404828e-07, "loss": 0.0225, "step": 826300 }, { "epoch": 8.12, "grad_norm": 0.6326122879981995, "learning_rate": 9.487622508862345e-07, "loss": 0.0186, "step": 826325 }, { "epoch": 8.12, "grad_norm": 7.275167465209961, "learning_rate": 9.486381284319859e-07, "loss": 0.0196, "step": 826350 }, { "epoch": 8.13, "grad_norm": 0.05740979686379433, "learning_rate": 9.485140059777374e-07, "loss": 0.0335, "step": 826375 }, { "epoch": 8.13, "grad_norm": 13.385388374328613, "learning_rate": 9.483898835234891e-07, "loss": 0.0362, "step": 826400 }, { "epoch": 8.13, "grad_norm": 0.07224298268556595, "learning_rate": 9.482657610692406e-07, "loss": 0.0269, "step": 826425 }, { "epoch": 8.13, "grad_norm": 5.71389102935791, "learning_rate": 9.48141638614992e-07, "loss": 0.0186, "step": 826450 }, { "epoch": 8.13, "grad_norm": 7.158029079437256, "learning_rate": 9.480175161607435e-07, "loss": 0.0225, "step": 826475 }, { "epoch": 8.13, "grad_norm": 0.28248438239097595, "learning_rate": 9.478933937064952e-07, "loss": 0.0244, "step": 826500 }, { "epoch": 8.13, "grad_norm": 0.0038037800695747137, "learning_rate": 9.477692712522467e-07, "loss": 0.0264, "step": 826525 }, { "epoch": 8.13, "grad_norm": 3.004499673843384, "learning_rate": 9.476451487979982e-07, "loss": 0.0302, "step": 826550 }, { "epoch": 8.13, "grad_norm": 0.4535628855228424, "learning_rate": 9.475210263437499e-07, "loss": 0.0345, "step": 826575 }, { "epoch": 8.13, "grad_norm": 10.364971160888672, "learning_rate": 9.473969038895013e-07, "loss": 0.0329, "step": 826600 }, { "epoch": 8.13, "grad_norm": 4.710288047790527, "learning_rate": 9.472727814352528e-07, "loss": 0.0269, "step": 826625 }, { "epoch": 8.13, "grad_norm": 1.1242666244506836, "learning_rate": 9.471486589810045e-07, "loss": 0.034, "step": 826650 }, { "epoch": 8.13, "grad_norm": 0.010271694511175156, "learning_rate": 9.47024536526756e-07, "loss": 0.0375, "step": 826675 }, { "epoch": 8.13, "grad_norm": 6.605135917663574, "learning_rate": 9.469004140725074e-07, "loss": 0.011, "step": 826700 }, { "epoch": 8.13, "grad_norm": 3.5744411945343018, "learning_rate": 9.467762916182589e-07, "loss": 0.034, "step": 826725 }, { "epoch": 8.13, "grad_norm": 1.7046693563461304, "learning_rate": 9.466521691640106e-07, "loss": 0.0221, "step": 826750 }, { "epoch": 8.13, "grad_norm": 0.03289688751101494, "learning_rate": 9.465280467097621e-07, "loss": 0.0563, "step": 826775 }, { "epoch": 8.13, "grad_norm": 4.831196308135986, "learning_rate": 9.464039242555135e-07, "loss": 0.0374, "step": 826800 }, { "epoch": 8.13, "grad_norm": 0.38519224524497986, "learning_rate": 9.462798018012652e-07, "loss": 0.0379, "step": 826825 }, { "epoch": 8.13, "grad_norm": 5.679385185241699, "learning_rate": 9.461556793470167e-07, "loss": 0.0374, "step": 826850 }, { "epoch": 8.13, "grad_norm": 17.624496459960938, "learning_rate": 9.460315568927682e-07, "loss": 0.0284, "step": 826875 }, { "epoch": 8.13, "grad_norm": 15.424903869628906, "learning_rate": 9.459074344385196e-07, "loss": 0.0312, "step": 826900 }, { "epoch": 8.13, "grad_norm": 5.582331657409668, "learning_rate": 9.457833119842713e-07, "loss": 0.0373, "step": 826925 }, { "epoch": 8.13, "grad_norm": 0.10831762850284576, "learning_rate": 9.456591895300228e-07, "loss": 0.0221, "step": 826950 }, { "epoch": 8.13, "grad_norm": 0.08104466646909714, "learning_rate": 9.455350670757743e-07, "loss": 0.0341, "step": 826975 }, { "epoch": 8.13, "grad_norm": 8.44190502166748, "learning_rate": 9.45410944621526e-07, "loss": 0.0379, "step": 827000 }, { "epoch": 8.13, "grad_norm": 0.1393885314464569, "learning_rate": 9.452917870654474e-07, "loss": 0.0289, "step": 827025 }, { "epoch": 8.13, "grad_norm": 3.556791305541992, "learning_rate": 9.451676646111988e-07, "loss": 0.0333, "step": 827050 }, { "epoch": 8.13, "grad_norm": 0.128860741853714, "learning_rate": 9.450435421569504e-07, "loss": 0.0439, "step": 827075 }, { "epoch": 8.13, "grad_norm": 9.56611156463623, "learning_rate": 9.44919419702702e-07, "loss": 0.0372, "step": 827100 }, { "epoch": 8.13, "grad_norm": 2.4505982398986816, "learning_rate": 9.447952972484535e-07, "loss": 0.0542, "step": 827125 }, { "epoch": 8.13, "grad_norm": 0.42831382155418396, "learning_rate": 9.44671174794205e-07, "loss": 0.0239, "step": 827150 }, { "epoch": 8.13, "grad_norm": 1.8562722206115723, "learning_rate": 9.445470523399566e-07, "loss": 0.0469, "step": 827175 }, { "epoch": 8.13, "grad_norm": 11.894280433654785, "learning_rate": 9.444229298857081e-07, "loss": 0.0248, "step": 827200 }, { "epoch": 8.13, "grad_norm": 1.8922401666641235, "learning_rate": 9.442988074314596e-07, "loss": 0.0482, "step": 827225 }, { "epoch": 8.13, "grad_norm": 15.4390869140625, "learning_rate": 9.441746849772111e-07, "loss": 0.0292, "step": 827250 }, { "epoch": 8.13, "grad_norm": 0.5760632157325745, "learning_rate": 9.440505625229627e-07, "loss": 0.0323, "step": 827275 }, { "epoch": 8.13, "grad_norm": 6.461219310760498, "learning_rate": 9.439264400687142e-07, "loss": 0.0238, "step": 827300 }, { "epoch": 8.13, "grad_norm": 3.2237250804901123, "learning_rate": 9.438023176144658e-07, "loss": 0.0279, "step": 827325 }, { "epoch": 8.13, "grad_norm": 0.6756350994110107, "learning_rate": 9.436781951602174e-07, "loss": 0.021, "step": 827350 }, { "epoch": 8.13, "grad_norm": 0.6028474569320679, "learning_rate": 9.435540727059689e-07, "loss": 0.0198, "step": 827375 }, { "epoch": 8.14, "grad_norm": 4.264975547790527, "learning_rate": 9.434299502517204e-07, "loss": 0.0159, "step": 827400 }, { "epoch": 8.14, "grad_norm": 0.22328811883926392, "learning_rate": 9.43305827797472e-07, "loss": 0.0351, "step": 827425 }, { "epoch": 8.14, "grad_norm": 0.09379910677671432, "learning_rate": 9.431817053432235e-07, "loss": 0.0355, "step": 827450 }, { "epoch": 8.14, "grad_norm": 0.04365641251206398, "learning_rate": 9.43057582888975e-07, "loss": 0.0436, "step": 827475 }, { "epoch": 8.14, "grad_norm": 6.3689093589782715, "learning_rate": 9.429334604347265e-07, "loss": 0.0436, "step": 827500 }, { "epoch": 8.14, "grad_norm": 1.2501096725463867, "learning_rate": 9.428093379804781e-07, "loss": 0.0381, "step": 827525 }, { "epoch": 8.14, "grad_norm": 12.78592300415039, "learning_rate": 9.426852155262296e-07, "loss": 0.0249, "step": 827550 }, { "epoch": 8.14, "grad_norm": 0.8220332860946655, "learning_rate": 9.425610930719812e-07, "loss": 0.0358, "step": 827575 }, { "epoch": 8.14, "grad_norm": 42.217960357666016, "learning_rate": 9.424369706177328e-07, "loss": 0.0325, "step": 827600 }, { "epoch": 8.14, "grad_norm": 0.027873437851667404, "learning_rate": 9.423128481634842e-07, "loss": 0.0257, "step": 827625 }, { "epoch": 8.14, "grad_norm": 4.762990951538086, "learning_rate": 9.421887257092358e-07, "loss": 0.041, "step": 827650 }, { "epoch": 8.14, "grad_norm": 0.005991556216031313, "learning_rate": 9.420646032549874e-07, "loss": 0.0287, "step": 827675 }, { "epoch": 8.14, "grad_norm": 3.260272264480591, "learning_rate": 9.419404808007389e-07, "loss": 0.0325, "step": 827700 }, { "epoch": 8.14, "grad_norm": 0.16689099371433258, "learning_rate": 9.418163583464903e-07, "loss": 0.0359, "step": 827725 }, { "epoch": 8.14, "grad_norm": 8.173480033874512, "learning_rate": 9.416922358922419e-07, "loss": 0.0273, "step": 827750 }, { "epoch": 8.14, "grad_norm": 0.42329472303390503, "learning_rate": 9.415681134379935e-07, "loss": 0.0452, "step": 827775 }, { "epoch": 8.14, "grad_norm": 15.134934425354004, "learning_rate": 9.41443990983745e-07, "loss": 0.0441, "step": 827800 }, { "epoch": 8.14, "grad_norm": 0.20551449060440063, "learning_rate": 9.413198685294965e-07, "loss": 0.0172, "step": 827825 }, { "epoch": 8.14, "grad_norm": 25.802160263061523, "learning_rate": 9.411957460752481e-07, "loss": 0.0514, "step": 827850 }, { "epoch": 8.14, "grad_norm": 1.2123231887817383, "learning_rate": 9.410716236209996e-07, "loss": 0.0361, "step": 827875 }, { "epoch": 8.14, "grad_norm": 13.215950012207031, "learning_rate": 9.409475011667511e-07, "loss": 0.0364, "step": 827900 }, { "epoch": 8.14, "grad_norm": 0.2728451192378998, "learning_rate": 9.408233787125026e-07, "loss": 0.033, "step": 827925 }, { "epoch": 8.14, "grad_norm": 19.480661392211914, "learning_rate": 9.406992562582542e-07, "loss": 0.0192, "step": 827950 }, { "epoch": 8.14, "grad_norm": 2.710888624191284, "learning_rate": 9.405751338040057e-07, "loss": 0.0382, "step": 827975 }, { "epoch": 8.14, "grad_norm": 20.36127281188965, "learning_rate": 9.404510113497573e-07, "loss": 0.0458, "step": 828000 }, { "epoch": 8.14, "grad_norm": 0.9647283554077148, "learning_rate": 9.403268888955089e-07, "loss": 0.0265, "step": 828025 }, { "epoch": 8.14, "grad_norm": 5.644024848937988, "learning_rate": 9.402027664412604e-07, "loss": 0.0251, "step": 828050 }, { "epoch": 8.14, "grad_norm": 0.046624407172203064, "learning_rate": 9.400786439870119e-07, "loss": 0.0475, "step": 828075 }, { "epoch": 8.14, "grad_norm": 12.178582191467285, "learning_rate": 9.399545215327635e-07, "loss": 0.028, "step": 828100 }, { "epoch": 8.14, "grad_norm": 0.7347145080566406, "learning_rate": 9.39830399078515e-07, "loss": 0.0409, "step": 828125 }, { "epoch": 8.14, "grad_norm": 32.99456787109375, "learning_rate": 9.397062766242665e-07, "loss": 0.0249, "step": 828150 }, { "epoch": 8.14, "grad_norm": 0.10931341350078583, "learning_rate": 9.39582154170018e-07, "loss": 0.0383, "step": 828175 }, { "epoch": 8.14, "grad_norm": 6.252181053161621, "learning_rate": 9.394580317157696e-07, "loss": 0.023, "step": 828200 }, { "epoch": 8.14, "grad_norm": 1.9084526300430298, "learning_rate": 9.393339092615211e-07, "loss": 0.0285, "step": 828225 }, { "epoch": 8.14, "grad_norm": 1.856067419052124, "learning_rate": 9.392097868072727e-07, "loss": 0.0207, "step": 828250 }, { "epoch": 8.14, "grad_norm": 2.975477695465088, "learning_rate": 9.390856643530243e-07, "loss": 0.0311, "step": 828275 }, { "epoch": 8.14, "grad_norm": 8.733988761901855, "learning_rate": 9.389615418987757e-07, "loss": 0.0269, "step": 828300 }, { "epoch": 8.14, "grad_norm": 2.0837459564208984, "learning_rate": 9.388374194445272e-07, "loss": 0.0367, "step": 828325 }, { "epoch": 8.14, "grad_norm": 12.925952911376953, "learning_rate": 9.387132969902788e-07, "loss": 0.0213, "step": 828350 }, { "epoch": 8.14, "grad_norm": 0.0534571148455143, "learning_rate": 9.385891745360304e-07, "loss": 0.0333, "step": 828375 }, { "epoch": 8.14, "grad_norm": 5.197295665740967, "learning_rate": 9.384650520817818e-07, "loss": 0.0255, "step": 828400 }, { "epoch": 8.15, "grad_norm": 5.5058465003967285, "learning_rate": 9.383409296275334e-07, "loss": 0.0374, "step": 828425 }, { "epoch": 8.15, "grad_norm": 2.9083924293518066, "learning_rate": 9.38216807173285e-07, "loss": 0.0233, "step": 828450 }, { "epoch": 8.15, "grad_norm": 8.908977508544922, "learning_rate": 9.380926847190365e-07, "loss": 0.0352, "step": 828475 }, { "epoch": 8.15, "grad_norm": 9.637452125549316, "learning_rate": 9.37968562264788e-07, "loss": 0.0258, "step": 828500 }, { "epoch": 8.15, "grad_norm": 0.07284388691186905, "learning_rate": 9.378444398105396e-07, "loss": 0.0462, "step": 828525 }, { "epoch": 8.15, "grad_norm": 4.873064041137695, "learning_rate": 9.377203173562911e-07, "loss": 0.0139, "step": 828550 }, { "epoch": 8.15, "grad_norm": 0.03727966547012329, "learning_rate": 9.375961949020426e-07, "loss": 0.0366, "step": 828575 }, { "epoch": 8.15, "grad_norm": 2.4943342208862305, "learning_rate": 9.374720724477941e-07, "loss": 0.0126, "step": 828600 }, { "epoch": 8.15, "grad_norm": 1.2946462631225586, "learning_rate": 9.373479499935457e-07, "loss": 0.0283, "step": 828625 }, { "epoch": 8.15, "grad_norm": 6.708711624145508, "learning_rate": 9.372238275392972e-07, "loss": 0.0134, "step": 828650 }, { "epoch": 8.15, "grad_norm": 0.13664208352565765, "learning_rate": 9.370997050850488e-07, "loss": 0.0311, "step": 828675 }, { "epoch": 8.15, "grad_norm": 0.8987912535667419, "learning_rate": 9.369755826308004e-07, "loss": 0.0189, "step": 828700 }, { "epoch": 8.15, "grad_norm": 3.689253330230713, "learning_rate": 9.368514601765519e-07, "loss": 0.0381, "step": 828725 }, { "epoch": 8.15, "grad_norm": 11.61617660522461, "learning_rate": 9.367273377223033e-07, "loss": 0.0378, "step": 828750 }, { "epoch": 8.15, "grad_norm": 0.01626587100327015, "learning_rate": 9.366032152680549e-07, "loss": 0.029, "step": 828775 }, { "epoch": 8.15, "grad_norm": 14.747897148132324, "learning_rate": 9.364790928138065e-07, "loss": 0.031, "step": 828800 }, { "epoch": 8.15, "grad_norm": 11.20103645324707, "learning_rate": 9.36354970359558e-07, "loss": 0.0395, "step": 828825 }, { "epoch": 8.15, "grad_norm": 9.992342948913574, "learning_rate": 9.362308479053095e-07, "loss": 0.0379, "step": 828850 }, { "epoch": 8.15, "grad_norm": 3.947777032852173, "learning_rate": 9.361067254510611e-07, "loss": 0.0419, "step": 828875 }, { "epoch": 8.15, "grad_norm": 0.6384852528572083, "learning_rate": 9.359826029968126e-07, "loss": 0.021, "step": 828900 }, { "epoch": 8.15, "grad_norm": 0.07356058806180954, "learning_rate": 9.358584805425642e-07, "loss": 0.0403, "step": 828925 }, { "epoch": 8.15, "grad_norm": 10.527118682861328, "learning_rate": 9.357343580883158e-07, "loss": 0.0327, "step": 828950 }, { "epoch": 8.15, "grad_norm": 2.8082990646362305, "learning_rate": 9.356102356340672e-07, "loss": 0.037, "step": 828975 }, { "epoch": 8.15, "grad_norm": 21.386703491210938, "learning_rate": 9.354861131798187e-07, "loss": 0.0362, "step": 829000 }, { "epoch": 8.15, "grad_norm": 1.4560139179229736, "learning_rate": 9.353619907255703e-07, "loss": 0.0398, "step": 829025 }, { "epoch": 8.15, "grad_norm": 10.708584785461426, "learning_rate": 9.352378682713219e-07, "loss": 0.0368, "step": 829050 }, { "epoch": 8.15, "grad_norm": 0.07454902678728104, "learning_rate": 9.351137458170733e-07, "loss": 0.0257, "step": 829075 }, { "epoch": 8.15, "grad_norm": 17.92414665222168, "learning_rate": 9.349896233628249e-07, "loss": 0.0301, "step": 829100 }, { "epoch": 8.15, "grad_norm": 1.4563353061676025, "learning_rate": 9.348655009085765e-07, "loss": 0.0369, "step": 829125 }, { "epoch": 8.15, "grad_norm": 25.409189224243164, "learning_rate": 9.34741378454328e-07, "loss": 0.0266, "step": 829150 }, { "epoch": 8.15, "grad_norm": 5.394742965698242, "learning_rate": 9.346172560000794e-07, "loss": 0.0475, "step": 829175 }, { "epoch": 8.15, "grad_norm": 2.1040236949920654, "learning_rate": 9.34493133545831e-07, "loss": 0.0216, "step": 829200 }, { "epoch": 8.15, "grad_norm": 0.9316216111183167, "learning_rate": 9.343690110915826e-07, "loss": 0.0271, "step": 829225 }, { "epoch": 8.15, "grad_norm": 6.971614837646484, "learning_rate": 9.342448886373341e-07, "loss": 0.0287, "step": 829250 }, { "epoch": 8.15, "grad_norm": 0.027526002377271652, "learning_rate": 9.341207661830856e-07, "loss": 0.0324, "step": 829275 }, { "epoch": 8.15, "grad_norm": 2.337388515472412, "learning_rate": 9.339966437288372e-07, "loss": 0.0131, "step": 829300 }, { "epoch": 8.15, "grad_norm": 0.07836292684078217, "learning_rate": 9.338725212745887e-07, "loss": 0.0329, "step": 829325 }, { "epoch": 8.15, "grad_norm": 16.393474578857422, "learning_rate": 9.337483988203403e-07, "loss": 0.0205, "step": 829350 }, { "epoch": 8.15, "grad_norm": 2.061044692993164, "learning_rate": 9.336242763660919e-07, "loss": 0.0344, "step": 829375 }, { "epoch": 8.15, "grad_norm": 6.213630199432373, "learning_rate": 9.335001539118434e-07, "loss": 0.0248, "step": 829400 }, { "epoch": 8.16, "grad_norm": 5.925371170043945, "learning_rate": 9.333760314575948e-07, "loss": 0.0417, "step": 829425 }, { "epoch": 8.16, "grad_norm": 7.095954895019531, "learning_rate": 9.332519090033464e-07, "loss": 0.0243, "step": 829450 }, { "epoch": 8.16, "grad_norm": 0.2057589292526245, "learning_rate": 9.33127786549098e-07, "loss": 0.0271, "step": 829475 }, { "epoch": 8.16, "grad_norm": 2.4622621536254883, "learning_rate": 9.330036640948495e-07, "loss": 0.0257, "step": 829500 }, { "epoch": 8.16, "grad_norm": 0.10327276587486267, "learning_rate": 9.32879541640601e-07, "loss": 0.0254, "step": 829525 }, { "epoch": 8.16, "grad_norm": 11.80506706237793, "learning_rate": 9.327554191863526e-07, "loss": 0.0106, "step": 829550 }, { "epoch": 8.16, "grad_norm": 0.06044843792915344, "learning_rate": 9.326312967321041e-07, "loss": 0.0277, "step": 829575 }, { "epoch": 8.16, "grad_norm": 5.323428153991699, "learning_rate": 9.325071742778557e-07, "loss": 0.0264, "step": 829600 }, { "epoch": 8.16, "grad_norm": 3.523232936859131, "learning_rate": 9.323830518236071e-07, "loss": 0.0272, "step": 829625 }, { "epoch": 8.16, "grad_norm": 7.570516109466553, "learning_rate": 9.322589293693587e-07, "loss": 0.0448, "step": 829650 }, { "epoch": 8.16, "grad_norm": 0.0054151141084730625, "learning_rate": 9.321348069151102e-07, "loss": 0.0403, "step": 829675 }, { "epoch": 8.16, "grad_norm": 0.16857299208641052, "learning_rate": 9.320106844608618e-07, "loss": 0.0278, "step": 829700 }, { "epoch": 8.16, "grad_norm": 0.14953482151031494, "learning_rate": 9.318865620066134e-07, "loss": 0.0503, "step": 829725 }, { "epoch": 8.16, "grad_norm": 9.529755592346191, "learning_rate": 9.317624395523648e-07, "loss": 0.0342, "step": 829750 }, { "epoch": 8.16, "grad_norm": 3.731947183609009, "learning_rate": 9.316383170981164e-07, "loss": 0.0415, "step": 829775 }, { "epoch": 8.16, "grad_norm": 7.210460662841797, "learning_rate": 9.31514194643868e-07, "loss": 0.026, "step": 829800 }, { "epoch": 8.16, "grad_norm": 0.00908857025206089, "learning_rate": 9.313950370877894e-07, "loss": 0.0382, "step": 829825 }, { "epoch": 8.16, "grad_norm": 18.31689453125, "learning_rate": 9.31270914633541e-07, "loss": 0.0281, "step": 829850 }, { "epoch": 8.16, "grad_norm": 0.0652233138680458, "learning_rate": 9.311467921792924e-07, "loss": 0.0427, "step": 829875 }, { "epoch": 8.16, "grad_norm": 3.7969870567321777, "learning_rate": 9.31022669725044e-07, "loss": 0.028, "step": 829900 }, { "epoch": 8.16, "grad_norm": 1.453580379486084, "learning_rate": 9.308985472707956e-07, "loss": 0.0299, "step": 829925 }, { "epoch": 8.16, "grad_norm": 13.298720359802246, "learning_rate": 9.307744248165471e-07, "loss": 0.022, "step": 829950 }, { "epoch": 8.16, "grad_norm": 0.1306336373090744, "learning_rate": 9.306503023622985e-07, "loss": 0.0565, "step": 829975 }, { "epoch": 8.16, "grad_norm": 0.45473039150238037, "learning_rate": 9.305261799080501e-07, "loss": 0.0165, "step": 830000 }, { "epoch": 8.16, "grad_norm": 1.3039965629577637, "learning_rate": 9.304020574538017e-07, "loss": 0.0217, "step": 830025 }, { "epoch": 8.16, "grad_norm": 10.555635452270508, "learning_rate": 9.302779349995532e-07, "loss": 0.0225, "step": 830050 }, { "epoch": 8.16, "grad_norm": 0.2295135259628296, "learning_rate": 9.301538125453048e-07, "loss": 0.034, "step": 830075 }, { "epoch": 8.16, "grad_norm": 13.879040718078613, "learning_rate": 9.300296900910563e-07, "loss": 0.0344, "step": 830100 }, { "epoch": 8.16, "grad_norm": 0.476182758808136, "learning_rate": 9.299055676368078e-07, "loss": 0.033, "step": 830125 }, { "epoch": 8.16, "grad_norm": 9.481483459472656, "learning_rate": 9.297814451825594e-07, "loss": 0.0284, "step": 830150 }, { "epoch": 8.16, "grad_norm": 1.8692247867584229, "learning_rate": 9.29657322728311e-07, "loss": 0.0323, "step": 830175 }, { "epoch": 8.16, "grad_norm": 3.6433346271514893, "learning_rate": 9.295332002740625e-07, "loss": 0.0125, "step": 830200 }, { "epoch": 8.16, "grad_norm": 2.7802817821502686, "learning_rate": 9.294090778198139e-07, "loss": 0.0268, "step": 830225 }, { "epoch": 8.16, "grad_norm": 12.14965534210205, "learning_rate": 9.292849553655655e-07, "loss": 0.0243, "step": 830250 }, { "epoch": 8.16, "grad_norm": 4.938564300537109, "learning_rate": 9.291608329113171e-07, "loss": 0.0333, "step": 830275 }, { "epoch": 8.16, "grad_norm": 19.25816535949707, "learning_rate": 9.290367104570686e-07, "loss": 0.0317, "step": 830300 }, { "epoch": 8.16, "grad_norm": 0.004348456859588623, "learning_rate": 9.289125880028202e-07, "loss": 0.0333, "step": 830325 }, { "epoch": 8.16, "grad_norm": 3.5915699005126953, "learning_rate": 9.287884655485717e-07, "loss": 0.0178, "step": 830350 }, { "epoch": 8.16, "grad_norm": 0.8966259360313416, "learning_rate": 9.286643430943232e-07, "loss": 0.0254, "step": 830375 }, { "epoch": 8.16, "grad_norm": 2.882622003555298, "learning_rate": 9.285402206400747e-07, "loss": 0.0303, "step": 830400 }, { "epoch": 8.16, "grad_norm": 7.277853965759277, "learning_rate": 9.284160981858264e-07, "loss": 0.0339, "step": 830425 }, { "epoch": 8.17, "grad_norm": 9.127510070800781, "learning_rate": 9.282919757315778e-07, "loss": 0.0283, "step": 830450 }, { "epoch": 8.17, "grad_norm": 4.39132022857666, "learning_rate": 9.281678532773293e-07, "loss": 0.0461, "step": 830475 }, { "epoch": 8.17, "grad_norm": 9.135665893554688, "learning_rate": 9.280437308230809e-07, "loss": 0.0195, "step": 830500 }, { "epoch": 8.17, "grad_norm": 0.486965149641037, "learning_rate": 9.279196083688325e-07, "loss": 0.0379, "step": 830525 }, { "epoch": 8.17, "grad_norm": 5.933928489685059, "learning_rate": 9.277954859145839e-07, "loss": 0.0255, "step": 830550 }, { "epoch": 8.17, "grad_norm": 0.030736427754163742, "learning_rate": 9.276713634603355e-07, "loss": 0.0205, "step": 830575 }, { "epoch": 8.17, "grad_norm": 11.479887962341309, "learning_rate": 9.275472410060871e-07, "loss": 0.0236, "step": 830600 }, { "epoch": 8.17, "grad_norm": 0.7992504239082336, "learning_rate": 9.274231185518386e-07, "loss": 0.041, "step": 830625 }, { "epoch": 8.17, "grad_norm": 4.641151428222656, "learning_rate": 9.2729899609759e-07, "loss": 0.0261, "step": 830650 }, { "epoch": 8.17, "grad_norm": 0.004122748505324125, "learning_rate": 9.271748736433416e-07, "loss": 0.0475, "step": 830675 }, { "epoch": 8.17, "grad_norm": 8.246642112731934, "learning_rate": 9.270507511890932e-07, "loss": 0.031, "step": 830700 }, { "epoch": 8.17, "grad_norm": 0.3275207579135895, "learning_rate": 9.269266287348447e-07, "loss": 0.0482, "step": 830725 }, { "epoch": 8.17, "grad_norm": 3.5507359504699707, "learning_rate": 9.268025062805963e-07, "loss": 0.027, "step": 830750 }, { "epoch": 8.17, "grad_norm": 1.4094940423965454, "learning_rate": 9.266783838263478e-07, "loss": 0.0416, "step": 830775 }, { "epoch": 8.17, "grad_norm": 10.51292610168457, "learning_rate": 9.265542613720993e-07, "loss": 0.0378, "step": 830800 }, { "epoch": 8.17, "grad_norm": 0.10761044919490814, "learning_rate": 9.264301389178508e-07, "loss": 0.0396, "step": 830825 }, { "epoch": 8.17, "grad_norm": 2.9350953102111816, "learning_rate": 9.263060164636025e-07, "loss": 0.0335, "step": 830850 }, { "epoch": 8.17, "grad_norm": 0.5492679476737976, "learning_rate": 9.26181894009354e-07, "loss": 0.0278, "step": 830875 }, { "epoch": 8.17, "grad_norm": 6.947704792022705, "learning_rate": 9.260577715551054e-07, "loss": 0.0335, "step": 830900 }, { "epoch": 8.17, "grad_norm": 1.106281042098999, "learning_rate": 9.25933649100857e-07, "loss": 0.0182, "step": 830925 }, { "epoch": 8.17, "grad_norm": 17.329748153686523, "learning_rate": 9.258095266466086e-07, "loss": 0.0361, "step": 830950 }, { "epoch": 8.17, "grad_norm": 3.101851463317871, "learning_rate": 9.256854041923601e-07, "loss": 0.02, "step": 830975 }, { "epoch": 8.17, "grad_norm": 7.032924175262451, "learning_rate": 9.255612817381117e-07, "loss": 0.0175, "step": 831000 }, { "epoch": 8.17, "grad_norm": 0.01365510281175375, "learning_rate": 9.254371592838632e-07, "loss": 0.031, "step": 831025 }, { "epoch": 8.17, "grad_norm": 0.158903568983078, "learning_rate": 9.253130368296147e-07, "loss": 0.011, "step": 831050 }, { "epoch": 8.17, "grad_norm": 0.11284984648227692, "learning_rate": 9.251889143753662e-07, "loss": 0.037, "step": 831075 }, { "epoch": 8.17, "grad_norm": 19.993825912475586, "learning_rate": 9.250647919211179e-07, "loss": 0.0302, "step": 831100 }, { "epoch": 8.17, "grad_norm": 0.008456124924123287, "learning_rate": 9.249406694668693e-07, "loss": 0.035, "step": 831125 }, { "epoch": 8.17, "grad_norm": 7.34830904006958, "learning_rate": 9.248165470126208e-07, "loss": 0.0225, "step": 831150 }, { "epoch": 8.17, "grad_norm": 1.8020411729812622, "learning_rate": 9.246924245583724e-07, "loss": 0.023, "step": 831175 }, { "epoch": 8.17, "grad_norm": 6.404664039611816, "learning_rate": 9.24568302104124e-07, "loss": 0.0212, "step": 831200 }, { "epoch": 8.17, "grad_norm": 0.18665292859077454, "learning_rate": 9.244441796498754e-07, "loss": 0.0495, "step": 831225 }, { "epoch": 8.17, "grad_norm": 1.4158728122711182, "learning_rate": 9.243200571956269e-07, "loss": 0.0245, "step": 831250 }, { "epoch": 8.17, "grad_norm": 4.089585304260254, "learning_rate": 9.241959347413786e-07, "loss": 0.0317, "step": 831275 }, { "epoch": 8.17, "grad_norm": 1.1735470294952393, "learning_rate": 9.240718122871301e-07, "loss": 0.0261, "step": 831300 }, { "epoch": 8.17, "grad_norm": 0.0027035404928028584, "learning_rate": 9.239476898328815e-07, "loss": 0.0271, "step": 831325 }, { "epoch": 8.17, "grad_norm": 11.830046653747559, "learning_rate": 9.238235673786331e-07, "loss": 0.0416, "step": 831350 }, { "epoch": 8.17, "grad_norm": 7.0126776695251465, "learning_rate": 9.236994449243847e-07, "loss": 0.0368, "step": 831375 }, { "epoch": 8.17, "grad_norm": 11.920247077941895, "learning_rate": 9.235753224701362e-07, "loss": 0.0121, "step": 831400 }, { "epoch": 8.17, "grad_norm": 0.11151786893606186, "learning_rate": 9.234512000158878e-07, "loss": 0.0422, "step": 831425 }, { "epoch": 8.17, "grad_norm": 10.771782875061035, "learning_rate": 9.233270775616393e-07, "loss": 0.0377, "step": 831450 }, { "epoch": 8.18, "grad_norm": 0.5193753242492676, "learning_rate": 9.232029551073908e-07, "loss": 0.027, "step": 831475 }, { "epoch": 8.18, "grad_norm": 17.15475845336914, "learning_rate": 9.230788326531423e-07, "loss": 0.0241, "step": 831500 }, { "epoch": 8.18, "grad_norm": 0.02716251090168953, "learning_rate": 9.22954710198894e-07, "loss": 0.0262, "step": 831525 }, { "epoch": 8.18, "grad_norm": 4.647923469543457, "learning_rate": 9.228305877446455e-07, "loss": 0.0271, "step": 831550 }, { "epoch": 8.18, "grad_norm": 0.014560763724148273, "learning_rate": 9.227064652903969e-07, "loss": 0.0384, "step": 831575 }, { "epoch": 8.18, "grad_norm": 11.704099655151367, "learning_rate": 9.225823428361485e-07, "loss": 0.0313, "step": 831600 }, { "epoch": 8.18, "grad_norm": 2.1542868614196777, "learning_rate": 9.224582203819001e-07, "loss": 0.0463, "step": 831625 }, { "epoch": 8.18, "grad_norm": 12.497358322143555, "learning_rate": 9.223340979276516e-07, "loss": 0.0388, "step": 831650 }, { "epoch": 8.18, "grad_norm": 8.974458694458008, "learning_rate": 9.222099754734032e-07, "loss": 0.0317, "step": 831675 }, { "epoch": 8.18, "grad_norm": 2.959327459335327, "learning_rate": 9.220858530191547e-07, "loss": 0.0373, "step": 831700 }, { "epoch": 8.18, "grad_norm": 0.0066700829192996025, "learning_rate": 9.219617305649062e-07, "loss": 0.0288, "step": 831725 }, { "epoch": 8.18, "grad_norm": 7.945285797119141, "learning_rate": 9.218376081106577e-07, "loss": 0.025, "step": 831750 }, { "epoch": 8.18, "grad_norm": 3.5798652172088623, "learning_rate": 9.217134856564093e-07, "loss": 0.022, "step": 831775 }, { "epoch": 8.18, "grad_norm": 1.699328899383545, "learning_rate": 9.215893632021608e-07, "loss": 0.022, "step": 831800 }, { "epoch": 8.18, "grad_norm": 0.30197256803512573, "learning_rate": 9.214652407479123e-07, "loss": 0.0315, "step": 831825 }, { "epoch": 8.18, "grad_norm": 5.204494953155518, "learning_rate": 9.213411182936639e-07, "loss": 0.0222, "step": 831850 }, { "epoch": 8.18, "grad_norm": 0.008639633655548096, "learning_rate": 9.212169958394155e-07, "loss": 0.0302, "step": 831875 }, { "epoch": 8.18, "grad_norm": 13.672428131103516, "learning_rate": 9.210928733851669e-07, "loss": 0.0282, "step": 831900 }, { "epoch": 8.18, "grad_norm": 0.40769660472869873, "learning_rate": 9.209687509309184e-07, "loss": 0.0375, "step": 831925 }, { "epoch": 8.18, "grad_norm": 11.50531005859375, "learning_rate": 9.2084462847667e-07, "loss": 0.0218, "step": 831950 }, { "epoch": 8.18, "grad_norm": 0.011244035325944424, "learning_rate": 9.207205060224216e-07, "loss": 0.0463, "step": 831975 }, { "epoch": 8.18, "grad_norm": 2.2957189083099365, "learning_rate": 9.20596383568173e-07, "loss": 0.0282, "step": 832000 }, { "epoch": 8.18, "grad_norm": 0.045128464698791504, "learning_rate": 9.204722611139246e-07, "loss": 0.034, "step": 832025 }, { "epoch": 8.18, "grad_norm": 13.82065486907959, "learning_rate": 9.203481386596762e-07, "loss": 0.0231, "step": 832050 }, { "epoch": 8.18, "grad_norm": 2.3385794162750244, "learning_rate": 9.202240162054277e-07, "loss": 0.0393, "step": 832075 }, { "epoch": 8.18, "grad_norm": 14.660750389099121, "learning_rate": 9.200998937511793e-07, "loss": 0.0265, "step": 832100 }, { "epoch": 8.18, "grad_norm": 0.24649430811405182, "learning_rate": 9.199757712969308e-07, "loss": 0.0423, "step": 832125 }, { "epoch": 8.18, "grad_norm": 8.152820587158203, "learning_rate": 9.198516488426823e-07, "loss": 0.0259, "step": 832150 }, { "epoch": 8.18, "grad_norm": 5.412721157073975, "learning_rate": 9.197275263884338e-07, "loss": 0.052, "step": 832175 }, { "epoch": 8.18, "grad_norm": 95.87022399902344, "learning_rate": 9.196034039341854e-07, "loss": 0.029, "step": 832200 }, { "epoch": 8.18, "grad_norm": 3.1945199966430664, "learning_rate": 9.194792814799369e-07, "loss": 0.0301, "step": 832225 }, { "epoch": 8.18, "grad_norm": 3.8784430027008057, "learning_rate": 9.193551590256884e-07, "loss": 0.0145, "step": 832250 }, { "epoch": 8.18, "grad_norm": 0.024384377524256706, "learning_rate": 9.1923103657144e-07, "loss": 0.0356, "step": 832275 }, { "epoch": 8.18, "grad_norm": 9.793050765991211, "learning_rate": 9.191069141171916e-07, "loss": 0.0422, "step": 832300 }, { "epoch": 8.18, "grad_norm": 1.9689481258392334, "learning_rate": 9.189827916629431e-07, "loss": 0.0299, "step": 832325 }, { "epoch": 8.18, "grad_norm": 14.41250991821289, "learning_rate": 9.188586692086945e-07, "loss": 0.028, "step": 832350 }, { "epoch": 8.18, "grad_norm": 0.015142331831157207, "learning_rate": 9.187345467544462e-07, "loss": 0.0313, "step": 832375 }, { "epoch": 8.18, "grad_norm": 11.268928527832031, "learning_rate": 9.186104243001977e-07, "loss": 0.0332, "step": 832400 }, { "epoch": 8.18, "grad_norm": 9.101062774658203, "learning_rate": 9.184863018459492e-07, "loss": 0.0314, "step": 832425 }, { "epoch": 8.18, "grad_norm": 13.28750228881836, "learning_rate": 9.183621793917008e-07, "loss": 0.0227, "step": 832450 }, { "epoch": 8.19, "grad_norm": 0.13885687291622162, "learning_rate": 9.182380569374523e-07, "loss": 0.0276, "step": 832475 }, { "epoch": 8.19, "grad_norm": 6.024961471557617, "learning_rate": 9.181139344832038e-07, "loss": 0.0226, "step": 832500 }, { "epoch": 8.19, "grad_norm": 0.04574500769376755, "learning_rate": 9.179898120289554e-07, "loss": 0.0429, "step": 832525 }, { "epoch": 8.19, "grad_norm": 5.341665744781494, "learning_rate": 9.17865689574707e-07, "loss": 0.0298, "step": 832550 }, { "epoch": 8.19, "grad_norm": 0.053214795887470245, "learning_rate": 9.177415671204584e-07, "loss": 0.0306, "step": 832575 }, { "epoch": 8.19, "grad_norm": 2.6011102199554443, "learning_rate": 9.176174446662099e-07, "loss": 0.0315, "step": 832600 }, { "epoch": 8.19, "grad_norm": 0.030573386698961258, "learning_rate": 9.174982871101315e-07, "loss": 0.0325, "step": 832625 }, { "epoch": 8.19, "grad_norm": 11.912002563476562, "learning_rate": 9.17374164655883e-07, "loss": 0.0246, "step": 832650 }, { "epoch": 8.19, "grad_norm": 0.2778021991252899, "learning_rate": 9.172500422016345e-07, "loss": 0.0325, "step": 832675 }, { "epoch": 8.19, "grad_norm": 4.781435012817383, "learning_rate": 9.17125919747386e-07, "loss": 0.0282, "step": 832700 }, { "epoch": 8.19, "grad_norm": 0.4855816066265106, "learning_rate": 9.170017972931376e-07, "loss": 0.0297, "step": 832725 }, { "epoch": 8.19, "grad_norm": 4.905594348907471, "learning_rate": 9.168776748388891e-07, "loss": 0.0192, "step": 832750 }, { "epoch": 8.19, "grad_norm": 0.034974224865436554, "learning_rate": 9.167535523846406e-07, "loss": 0.0324, "step": 832775 }, { "epoch": 8.19, "grad_norm": 5.793062686920166, "learning_rate": 9.166294299303923e-07, "loss": 0.0208, "step": 832800 }, { "epoch": 8.19, "grad_norm": 7.580665588378906, "learning_rate": 9.165053074761437e-07, "loss": 0.0387, "step": 832825 }, { "epoch": 8.19, "grad_norm": 7.480050563812256, "learning_rate": 9.163811850218952e-07, "loss": 0.0239, "step": 832850 }, { "epoch": 8.19, "grad_norm": 0.09812994301319122, "learning_rate": 9.162570625676469e-07, "loss": 0.0416, "step": 832875 }, { "epoch": 8.19, "grad_norm": 6.3248748779296875, "learning_rate": 9.161329401133984e-07, "loss": 0.0282, "step": 832900 }, { "epoch": 8.19, "grad_norm": 0.007126639597117901, "learning_rate": 9.160088176591498e-07, "loss": 0.0638, "step": 832925 }, { "epoch": 8.19, "grad_norm": 2.666628122329712, "learning_rate": 9.158846952049014e-07, "loss": 0.0328, "step": 832950 }, { "epoch": 8.19, "grad_norm": 0.007327716797590256, "learning_rate": 9.15760572750653e-07, "loss": 0.0384, "step": 832975 }, { "epoch": 8.19, "grad_norm": 1.8533329963684082, "learning_rate": 9.156364502964045e-07, "loss": 0.0311, "step": 833000 }, { "epoch": 8.19, "grad_norm": 4.694777011871338, "learning_rate": 9.155123278421559e-07, "loss": 0.0306, "step": 833025 }, { "epoch": 8.19, "grad_norm": 8.097334861755371, "learning_rate": 9.153882053879076e-07, "loss": 0.0258, "step": 833050 }, { "epoch": 8.19, "grad_norm": 2.3915462493896484, "learning_rate": 9.152640829336591e-07, "loss": 0.0241, "step": 833075 }, { "epoch": 8.19, "grad_norm": 11.526467323303223, "learning_rate": 9.151399604794106e-07, "loss": 0.026, "step": 833100 }, { "epoch": 8.19, "grad_norm": 1.7716478109359741, "learning_rate": 9.150158380251622e-07, "loss": 0.0528, "step": 833125 }, { "epoch": 8.19, "grad_norm": 4.3951897621154785, "learning_rate": 9.148917155709138e-07, "loss": 0.0155, "step": 833150 }, { "epoch": 8.19, "grad_norm": 0.07413798570632935, "learning_rate": 9.147675931166652e-07, "loss": 0.0381, "step": 833175 }, { "epoch": 8.19, "grad_norm": 5.244873046875, "learning_rate": 9.146434706624167e-07, "loss": 0.0262, "step": 833200 }, { "epoch": 8.19, "grad_norm": 0.1228591650724411, "learning_rate": 9.145193482081684e-07, "loss": 0.0441, "step": 833225 }, { "epoch": 8.19, "grad_norm": 9.12543773651123, "learning_rate": 9.143952257539199e-07, "loss": 0.0338, "step": 833250 }, { "epoch": 8.19, "grad_norm": 0.09050719439983368, "learning_rate": 9.142711032996713e-07, "loss": 0.0302, "step": 833275 }, { "epoch": 8.19, "grad_norm": 6.543441295623779, "learning_rate": 9.14146980845423e-07, "loss": 0.0378, "step": 833300 }, { "epoch": 8.19, "grad_norm": 0.1315585821866989, "learning_rate": 9.140228583911745e-07, "loss": 0.0325, "step": 833325 }, { "epoch": 8.19, "grad_norm": 14.079505920410156, "learning_rate": 9.13898735936926e-07, "loss": 0.028, "step": 833350 }, { "epoch": 8.19, "grad_norm": 0.1560351699590683, "learning_rate": 9.137746134826775e-07, "loss": 0.0294, "step": 833375 }, { "epoch": 8.19, "grad_norm": 5.7607879638671875, "learning_rate": 9.136504910284291e-07, "loss": 0.0235, "step": 833400 }, { "epoch": 8.19, "grad_norm": 0.00481455959379673, "learning_rate": 9.135263685741806e-07, "loss": 0.0326, "step": 833425 }, { "epoch": 8.19, "grad_norm": 8.775182723999023, "learning_rate": 9.134022461199321e-07, "loss": 0.0227, "step": 833450 }, { "epoch": 8.19, "grad_norm": 0.007816210389137268, "learning_rate": 9.132781236656838e-07, "loss": 0.0364, "step": 833475 }, { "epoch": 8.2, "grad_norm": 0.4256526827812195, "learning_rate": 9.131540012114352e-07, "loss": 0.0146, "step": 833500 }, { "epoch": 8.2, "grad_norm": 1.344526767730713, "learning_rate": 9.130298787571867e-07, "loss": 0.0289, "step": 833525 }, { "epoch": 8.2, "grad_norm": 14.064291954040527, "learning_rate": 9.129057563029383e-07, "loss": 0.0255, "step": 833550 }, { "epoch": 8.2, "grad_norm": 0.06229949742555618, "learning_rate": 9.127816338486899e-07, "loss": 0.0518, "step": 833575 }, { "epoch": 8.2, "grad_norm": 1.6714019775390625, "learning_rate": 9.126575113944413e-07, "loss": 0.0333, "step": 833600 }, { "epoch": 8.2, "grad_norm": 4.069565773010254, "learning_rate": 9.125333889401928e-07, "loss": 0.0255, "step": 833625 }, { "epoch": 8.2, "grad_norm": 1.0192378759384155, "learning_rate": 9.124092664859445e-07, "loss": 0.0372, "step": 833650 }, { "epoch": 8.2, "grad_norm": 0.30269816517829895, "learning_rate": 9.12285144031696e-07, "loss": 0.0491, "step": 833675 }, { "epoch": 8.2, "grad_norm": 16.521059036254883, "learning_rate": 9.121610215774474e-07, "loss": 0.031, "step": 833700 }, { "epoch": 8.2, "grad_norm": 0.030443737283349037, "learning_rate": 9.120368991231991e-07, "loss": 0.0278, "step": 833725 }, { "epoch": 8.2, "grad_norm": 4.544994831085205, "learning_rate": 9.119127766689506e-07, "loss": 0.0258, "step": 833750 }, { "epoch": 8.2, "grad_norm": 0.7124671936035156, "learning_rate": 9.117886542147021e-07, "loss": 0.0412, "step": 833775 }, { "epoch": 8.2, "grad_norm": 6.82421875, "learning_rate": 9.116645317604537e-07, "loss": 0.0246, "step": 833800 }, { "epoch": 8.2, "grad_norm": 0.1743442714214325, "learning_rate": 9.115404093062053e-07, "loss": 0.0316, "step": 833825 }, { "epoch": 8.2, "grad_norm": 1.8061304092407227, "learning_rate": 9.114162868519567e-07, "loss": 0.0295, "step": 833850 }, { "epoch": 8.2, "grad_norm": 2.0788562297821045, "learning_rate": 9.112921643977082e-07, "loss": 0.0261, "step": 833875 }, { "epoch": 8.2, "grad_norm": 14.904105186462402, "learning_rate": 9.111680419434599e-07, "loss": 0.0409, "step": 833900 }, { "epoch": 8.2, "grad_norm": 0.2367009073495865, "learning_rate": 9.110439194892114e-07, "loss": 0.0304, "step": 833925 }, { "epoch": 8.2, "grad_norm": 7.086714267730713, "learning_rate": 9.109197970349628e-07, "loss": 0.0221, "step": 833950 }, { "epoch": 8.2, "grad_norm": 33.49519729614258, "learning_rate": 9.107956745807144e-07, "loss": 0.0352, "step": 833975 }, { "epoch": 8.2, "grad_norm": 13.94075870513916, "learning_rate": 9.10671552126466e-07, "loss": 0.0125, "step": 834000 }, { "epoch": 8.2, "grad_norm": 0.05608084797859192, "learning_rate": 9.105474296722175e-07, "loss": 0.0285, "step": 834025 }, { "epoch": 8.2, "grad_norm": 4.1915178298950195, "learning_rate": 9.10423307217969e-07, "loss": 0.0124, "step": 834050 }, { "epoch": 8.2, "grad_norm": 2.0875000953674316, "learning_rate": 9.102991847637206e-07, "loss": 0.0455, "step": 834075 }, { "epoch": 8.2, "grad_norm": 1.6757515668869019, "learning_rate": 9.101750623094721e-07, "loss": 0.025, "step": 834100 }, { "epoch": 8.2, "grad_norm": 1.919660210609436, "learning_rate": 9.100509398552236e-07, "loss": 0.0297, "step": 834125 }, { "epoch": 8.2, "grad_norm": 4.0534257888793945, "learning_rate": 9.099268174009753e-07, "loss": 0.0435, "step": 834150 }, { "epoch": 8.2, "grad_norm": 0.3059200048446655, "learning_rate": 9.098026949467267e-07, "loss": 0.0185, "step": 834175 }, { "epoch": 8.2, "grad_norm": 8.298298835754395, "learning_rate": 9.096785724924782e-07, "loss": 0.019, "step": 834200 }, { "epoch": 8.2, "grad_norm": 0.19448088109493256, "learning_rate": 9.095544500382298e-07, "loss": 0.024, "step": 834225 }, { "epoch": 8.2, "grad_norm": 4.53460693359375, "learning_rate": 9.094303275839814e-07, "loss": 0.0143, "step": 834250 }, { "epoch": 8.2, "grad_norm": 0.07570058107376099, "learning_rate": 9.093062051297328e-07, "loss": 0.0344, "step": 834275 }, { "epoch": 8.2, "grad_norm": 9.366235733032227, "learning_rate": 9.091820826754843e-07, "loss": 0.021, "step": 834300 }, { "epoch": 8.2, "grad_norm": 0.3536016643047333, "learning_rate": 9.09057960221236e-07, "loss": 0.0347, "step": 834325 }, { "epoch": 8.2, "grad_norm": 9.174973487854004, "learning_rate": 9.089338377669875e-07, "loss": 0.0276, "step": 834350 }, { "epoch": 8.2, "grad_norm": 1.2434768676757812, "learning_rate": 9.088097153127389e-07, "loss": 0.0295, "step": 834375 }, { "epoch": 8.2, "grad_norm": 16.960168838500977, "learning_rate": 9.086855928584905e-07, "loss": 0.0245, "step": 834400 }, { "epoch": 8.2, "grad_norm": 0.5148540139198303, "learning_rate": 9.085614704042421e-07, "loss": 0.0417, "step": 834425 }, { "epoch": 8.2, "grad_norm": 0.9917593002319336, "learning_rate": 9.084373479499936e-07, "loss": 0.029, "step": 834450 }, { "epoch": 8.2, "grad_norm": 0.22034133970737457, "learning_rate": 9.083132254957452e-07, "loss": 0.024, "step": 834475 }, { "epoch": 8.2, "grad_norm": 9.21884536743164, "learning_rate": 9.081891030414968e-07, "loss": 0.0289, "step": 834500 }, { "epoch": 8.21, "grad_norm": 2.8696892261505127, "learning_rate": 9.080649805872482e-07, "loss": 0.0441, "step": 834525 }, { "epoch": 8.21, "grad_norm": 1.453142762184143, "learning_rate": 9.079408581329997e-07, "loss": 0.0163, "step": 834550 }, { "epoch": 8.21, "grad_norm": 11.045930862426758, "learning_rate": 9.078167356787514e-07, "loss": 0.0285, "step": 834575 }, { "epoch": 8.21, "grad_norm": 13.070267677307129, "learning_rate": 9.076926132245029e-07, "loss": 0.043, "step": 834600 }, { "epoch": 8.21, "grad_norm": 0.0854826495051384, "learning_rate": 9.075684907702543e-07, "loss": 0.0391, "step": 834625 }, { "epoch": 8.21, "grad_norm": 13.921280860900879, "learning_rate": 9.074443683160059e-07, "loss": 0.0478, "step": 834650 }, { "epoch": 8.21, "grad_norm": 1.5871398448944092, "learning_rate": 9.073202458617575e-07, "loss": 0.0374, "step": 834675 }, { "epoch": 8.21, "grad_norm": 11.574345588684082, "learning_rate": 9.07196123407509e-07, "loss": 0.0174, "step": 834700 }, { "epoch": 8.21, "grad_norm": 0.9778813123703003, "learning_rate": 9.070720009532604e-07, "loss": 0.0209, "step": 834725 }, { "epoch": 8.21, "grad_norm": 0.2574823796749115, "learning_rate": 9.069478784990121e-07, "loss": 0.0175, "step": 834750 }, { "epoch": 8.21, "grad_norm": 1.3669211864471436, "learning_rate": 9.068237560447636e-07, "loss": 0.0316, "step": 834775 }, { "epoch": 8.21, "grad_norm": 1.2827643156051636, "learning_rate": 9.066996335905151e-07, "loss": 0.019, "step": 834800 }, { "epoch": 8.21, "grad_norm": 0.3736295998096466, "learning_rate": 9.065755111362666e-07, "loss": 0.0282, "step": 834825 }, { "epoch": 8.21, "grad_norm": 12.538418769836426, "learning_rate": 9.064513886820182e-07, "loss": 0.0362, "step": 834850 }, { "epoch": 8.21, "grad_norm": 0.12672971189022064, "learning_rate": 9.063272662277697e-07, "loss": 0.0417, "step": 834875 }, { "epoch": 8.21, "grad_norm": 8.221549987792969, "learning_rate": 9.062031437735213e-07, "loss": 0.0145, "step": 834900 }, { "epoch": 8.21, "grad_norm": 0.02385631948709488, "learning_rate": 9.060790213192729e-07, "loss": 0.0312, "step": 834925 }, { "epoch": 8.21, "grad_norm": 10.921947479248047, "learning_rate": 9.059548988650243e-07, "loss": 0.0307, "step": 834950 }, { "epoch": 8.21, "grad_norm": 0.11102993041276932, "learning_rate": 9.058307764107758e-07, "loss": 0.0166, "step": 834975 }, { "epoch": 8.21, "grad_norm": 12.077014923095703, "learning_rate": 9.057066539565275e-07, "loss": 0.0336, "step": 835000 }, { "epoch": 8.21, "grad_norm": 3.6130330562591553, "learning_rate": 9.05582531502279e-07, "loss": 0.0277, "step": 835025 }, { "epoch": 8.21, "grad_norm": 6.041616916656494, "learning_rate": 9.054584090480304e-07, "loss": 0.0239, "step": 835050 }, { "epoch": 8.21, "grad_norm": 0.14692117273807526, "learning_rate": 9.05334286593782e-07, "loss": 0.0396, "step": 835075 }, { "epoch": 8.21, "grad_norm": 4.309009075164795, "learning_rate": 9.052101641395336e-07, "loss": 0.0293, "step": 835100 }, { "epoch": 8.21, "grad_norm": 0.030504383146762848, "learning_rate": 9.050860416852851e-07, "loss": 0.0323, "step": 835125 }, { "epoch": 8.21, "grad_norm": 4.621969699859619, "learning_rate": 9.049619192310367e-07, "loss": 0.0487, "step": 835150 }, { "epoch": 8.21, "grad_norm": 0.26763659715652466, "learning_rate": 9.048377967767883e-07, "loss": 0.0424, "step": 835175 }, { "epoch": 8.21, "grad_norm": 4.477473735809326, "learning_rate": 9.047136743225397e-07, "loss": 0.0409, "step": 835200 }, { "epoch": 8.21, "grad_norm": 4.5289812088012695, "learning_rate": 9.045895518682912e-07, "loss": 0.0424, "step": 835225 }, { "epoch": 8.21, "grad_norm": 6.702846050262451, "learning_rate": 9.044654294140428e-07, "loss": 0.0274, "step": 835250 }, { "epoch": 8.21, "grad_norm": 6.190470218658447, "learning_rate": 9.043413069597944e-07, "loss": 0.0377, "step": 835275 }, { "epoch": 8.21, "grad_norm": 4.50600528717041, "learning_rate": 9.042171845055458e-07, "loss": 0.0245, "step": 835300 }, { "epoch": 8.21, "grad_norm": 0.08555589616298676, "learning_rate": 9.040930620512974e-07, "loss": 0.0469, "step": 835325 }, { "epoch": 8.21, "grad_norm": 6.522838592529297, "learning_rate": 9.03968939597049e-07, "loss": 0.0293, "step": 835350 }, { "epoch": 8.21, "grad_norm": 0.5961874127388, "learning_rate": 9.038497820409704e-07, "loss": 0.032, "step": 835375 }, { "epoch": 8.21, "grad_norm": 0.7885967493057251, "learning_rate": 9.03725659586722e-07, "loss": 0.0335, "step": 835400 }, { "epoch": 8.21, "grad_norm": 15.6397705078125, "learning_rate": 9.036015371324734e-07, "loss": 0.0344, "step": 835425 }, { "epoch": 8.21, "grad_norm": 4.600589752197266, "learning_rate": 9.03477414678225e-07, "loss": 0.0169, "step": 835450 }, { "epoch": 8.21, "grad_norm": 0.7386346459388733, "learning_rate": 9.033532922239766e-07, "loss": 0.0507, "step": 835475 }, { "epoch": 8.21, "grad_norm": 3.281604051589966, "learning_rate": 9.032291697697281e-07, "loss": 0.0336, "step": 835500 }, { "epoch": 8.22, "grad_norm": 0.0623861625790596, "learning_rate": 9.031050473154797e-07, "loss": 0.0359, "step": 835525 }, { "epoch": 8.22, "grad_norm": 7.494283199310303, "learning_rate": 9.029809248612311e-07, "loss": 0.0275, "step": 835550 }, { "epoch": 8.22, "grad_norm": 0.013399045914411545, "learning_rate": 9.028568024069827e-07, "loss": 0.0404, "step": 835575 }, { "epoch": 8.22, "grad_norm": 9.461043357849121, "learning_rate": 9.027326799527343e-07, "loss": 0.0265, "step": 835600 }, { "epoch": 8.22, "grad_norm": 0.005787807051092386, "learning_rate": 9.026085574984858e-07, "loss": 0.0339, "step": 835625 }, { "epoch": 8.22, "grad_norm": 7.180604457855225, "learning_rate": 9.024844350442373e-07, "loss": 0.0273, "step": 835650 }, { "epoch": 8.22, "grad_norm": 2.4529826641082764, "learning_rate": 9.023603125899888e-07, "loss": 0.028, "step": 835675 }, { "epoch": 8.22, "grad_norm": 8.792553901672363, "learning_rate": 9.022361901357404e-07, "loss": 0.0118, "step": 835700 }, { "epoch": 8.22, "grad_norm": 0.13901321589946747, "learning_rate": 9.02112067681492e-07, "loss": 0.0505, "step": 835725 }, { "epoch": 8.22, "grad_norm": 6.876675128936768, "learning_rate": 9.019879452272434e-07, "loss": 0.0218, "step": 835750 }, { "epoch": 8.22, "grad_norm": 0.1236894503235817, "learning_rate": 9.01863822772995e-07, "loss": 0.0257, "step": 835775 }, { "epoch": 8.22, "grad_norm": 5.5858001708984375, "learning_rate": 9.017397003187465e-07, "loss": 0.0246, "step": 835800 }, { "epoch": 8.22, "grad_norm": 0.06953486800193787, "learning_rate": 9.016155778644981e-07, "loss": 0.0246, "step": 835825 }, { "epoch": 8.22, "grad_norm": 3.5749988555908203, "learning_rate": 9.014914554102495e-07, "loss": 0.0249, "step": 835850 }, { "epoch": 8.22, "grad_norm": 0.15483394265174866, "learning_rate": 9.013673329560011e-07, "loss": 0.0403, "step": 835875 }, { "epoch": 8.22, "grad_norm": 5.601081371307373, "learning_rate": 9.012432105017527e-07, "loss": 0.0241, "step": 835900 }, { "epoch": 8.22, "grad_norm": 3.6840639114379883, "learning_rate": 9.011190880475042e-07, "loss": 0.0308, "step": 835925 }, { "epoch": 8.22, "grad_norm": 2.426494598388672, "learning_rate": 9.009949655932558e-07, "loss": 0.0285, "step": 835950 }, { "epoch": 8.22, "grad_norm": 5.665165424346924, "learning_rate": 9.008708431390074e-07, "loss": 0.0505, "step": 835975 }, { "epoch": 8.22, "grad_norm": 23.404354095458984, "learning_rate": 9.007467206847588e-07, "loss": 0.0569, "step": 836000 }, { "epoch": 8.22, "grad_norm": 9.67186450958252, "learning_rate": 9.006225982305104e-07, "loss": 0.0496, "step": 836025 }, { "epoch": 8.22, "grad_norm": 5.091545104980469, "learning_rate": 9.004984757762619e-07, "loss": 0.032, "step": 836050 }, { "epoch": 8.22, "grad_norm": 0.20106001198291779, "learning_rate": 9.003743533220135e-07, "loss": 0.0384, "step": 836075 }, { "epoch": 8.22, "grad_norm": 6.216540813446045, "learning_rate": 9.002502308677649e-07, "loss": 0.0277, "step": 836100 }, { "epoch": 8.22, "grad_norm": 0.027703477069735527, "learning_rate": 9.001261084135165e-07, "loss": 0.048, "step": 836125 }, { "epoch": 8.22, "grad_norm": 13.564788818359375, "learning_rate": 9.000019859592681e-07, "loss": 0.0221, "step": 836150 }, { "epoch": 8.22, "grad_norm": 4.440853118896484, "learning_rate": 8.998778635050196e-07, "loss": 0.032, "step": 836175 }, { "epoch": 8.22, "grad_norm": 3.1890506744384766, "learning_rate": 8.997537410507712e-07, "loss": 0.0215, "step": 836200 }, { "epoch": 8.22, "grad_norm": 0.1442861258983612, "learning_rate": 8.996296185965226e-07, "loss": 0.0369, "step": 836225 }, { "epoch": 8.22, "grad_norm": 9.11838150024414, "learning_rate": 8.995054961422742e-07, "loss": 0.018, "step": 836250 }, { "epoch": 8.22, "grad_norm": 0.005436664447188377, "learning_rate": 8.993813736880257e-07, "loss": 0.0379, "step": 836275 }, { "epoch": 8.22, "grad_norm": 12.905226707458496, "learning_rate": 8.992572512337773e-07, "loss": 0.0304, "step": 836300 }, { "epoch": 8.22, "grad_norm": 0.19797486066818237, "learning_rate": 8.991331287795288e-07, "loss": 0.0292, "step": 836325 }, { "epoch": 8.22, "grad_norm": 13.854531288146973, "learning_rate": 8.990090063252803e-07, "loss": 0.0276, "step": 836350 }, { "epoch": 8.22, "grad_norm": 0.06534098088741302, "learning_rate": 8.988848838710319e-07, "loss": 0.0231, "step": 836375 }, { "epoch": 8.22, "grad_norm": 1.1369010210037231, "learning_rate": 8.987607614167835e-07, "loss": 0.0302, "step": 836400 }, { "epoch": 8.22, "grad_norm": 2.1208817958831787, "learning_rate": 8.986366389625349e-07, "loss": 0.0288, "step": 836425 }, { "epoch": 8.22, "grad_norm": 8.275529861450195, "learning_rate": 8.985125165082865e-07, "loss": 0.0298, "step": 836450 }, { "epoch": 8.22, "grad_norm": 0.30685368180274963, "learning_rate": 8.98388394054038e-07, "loss": 0.0478, "step": 836475 }, { "epoch": 8.22, "grad_norm": 16.080097198486328, "learning_rate": 8.982642715997896e-07, "loss": 0.0233, "step": 836500 }, { "epoch": 8.22, "grad_norm": 0.01301368698477745, "learning_rate": 8.98140149145541e-07, "loss": 0.0322, "step": 836525 }, { "epoch": 8.23, "grad_norm": 10.958765029907227, "learning_rate": 8.980160266912926e-07, "loss": 0.0293, "step": 836550 }, { "epoch": 8.23, "grad_norm": 0.8897262215614319, "learning_rate": 8.978919042370442e-07, "loss": 0.0235, "step": 836575 }, { "epoch": 8.23, "grad_norm": 14.671427726745605, "learning_rate": 8.977677817827957e-07, "loss": 0.0308, "step": 836600 }, { "epoch": 8.23, "grad_norm": 3.9362497329711914, "learning_rate": 8.976436593285473e-07, "loss": 0.0444, "step": 836625 }, { "epoch": 8.23, "grad_norm": 6.927855014801025, "learning_rate": 8.975195368742988e-07, "loss": 0.0311, "step": 836650 }, { "epoch": 8.23, "grad_norm": 2.7508676052093506, "learning_rate": 8.973954144200503e-07, "loss": 0.0412, "step": 836675 }, { "epoch": 8.23, "grad_norm": 6.8418684005737305, "learning_rate": 8.972712919658018e-07, "loss": 0.0223, "step": 836700 }, { "epoch": 8.23, "grad_norm": 0.48631688952445984, "learning_rate": 8.971471695115534e-07, "loss": 0.0399, "step": 836725 }, { "epoch": 8.23, "grad_norm": 10.27647876739502, "learning_rate": 8.97023047057305e-07, "loss": 0.032, "step": 836750 }, { "epoch": 8.23, "grad_norm": 0.4790596067905426, "learning_rate": 8.968989246030564e-07, "loss": 0.0485, "step": 836775 }, { "epoch": 8.23, "grad_norm": 0.15788894891738892, "learning_rate": 8.96774802148808e-07, "loss": 0.0243, "step": 836800 }, { "epoch": 8.23, "grad_norm": 8.670729637145996, "learning_rate": 8.966506796945596e-07, "loss": 0.031, "step": 836825 }, { "epoch": 8.23, "grad_norm": 6.280781269073486, "learning_rate": 8.965265572403111e-07, "loss": 0.0293, "step": 836850 }, { "epoch": 8.23, "grad_norm": 0.0044999513775110245, "learning_rate": 8.964024347860627e-07, "loss": 0.0427, "step": 836875 }, { "epoch": 8.23, "grad_norm": 3.536144256591797, "learning_rate": 8.962783123318141e-07, "loss": 0.0179, "step": 836900 }, { "epoch": 8.23, "grad_norm": 6.8714776039123535, "learning_rate": 8.961541898775657e-07, "loss": 0.04, "step": 836925 }, { "epoch": 8.23, "grad_norm": 13.18626880645752, "learning_rate": 8.960300674233172e-07, "loss": 0.0323, "step": 836950 }, { "epoch": 8.23, "grad_norm": 0.05015888810157776, "learning_rate": 8.959059449690688e-07, "loss": 0.0501, "step": 836975 }, { "epoch": 8.23, "grad_norm": 18.297700881958008, "learning_rate": 8.957818225148203e-07, "loss": 0.0296, "step": 837000 }, { "epoch": 8.23, "grad_norm": 1.1891674995422363, "learning_rate": 8.956577000605718e-07, "loss": 0.0343, "step": 837025 }, { "epoch": 8.23, "grad_norm": 2.078747034072876, "learning_rate": 8.955335776063234e-07, "loss": 0.0318, "step": 837050 }, { "epoch": 8.23, "grad_norm": 0.26296091079711914, "learning_rate": 8.95409455152075e-07, "loss": 0.0308, "step": 837075 }, { "epoch": 8.23, "grad_norm": 4.404293060302734, "learning_rate": 8.952853326978264e-07, "loss": 0.021, "step": 837100 }, { "epoch": 8.23, "grad_norm": 0.05342235788702965, "learning_rate": 8.951612102435779e-07, "loss": 0.0424, "step": 837125 }, { "epoch": 8.23, "grad_norm": 5.941458702087402, "learning_rate": 8.950370877893295e-07, "loss": 0.0249, "step": 837150 }, { "epoch": 8.23, "grad_norm": 0.9130532145500183, "learning_rate": 8.949129653350811e-07, "loss": 0.0376, "step": 837175 }, { "epoch": 8.23, "grad_norm": 1.8220994472503662, "learning_rate": 8.947888428808325e-07, "loss": 0.0407, "step": 837200 }, { "epoch": 8.23, "grad_norm": 0.04645228385925293, "learning_rate": 8.946647204265841e-07, "loss": 0.0439, "step": 837225 }, { "epoch": 8.23, "grad_norm": 6.770437240600586, "learning_rate": 8.945405979723357e-07, "loss": 0.0156, "step": 837250 }, { "epoch": 8.23, "grad_norm": 0.5710392594337463, "learning_rate": 8.944164755180872e-07, "loss": 0.025, "step": 837275 }, { "epoch": 8.23, "grad_norm": 7.49791145324707, "learning_rate": 8.942923530638388e-07, "loss": 0.0219, "step": 837300 }, { "epoch": 8.23, "grad_norm": 1.9566689729690552, "learning_rate": 8.941682306095902e-07, "loss": 0.0332, "step": 837325 }, { "epoch": 8.23, "grad_norm": 5.12656307220459, "learning_rate": 8.940441081553418e-07, "loss": 0.0168, "step": 837350 }, { "epoch": 8.23, "grad_norm": 6.209670066833496, "learning_rate": 8.939199857010933e-07, "loss": 0.0217, "step": 837375 }, { "epoch": 8.23, "grad_norm": 9.596043586730957, "learning_rate": 8.937958632468449e-07, "loss": 0.0342, "step": 837400 }, { "epoch": 8.23, "grad_norm": 2.388824462890625, "learning_rate": 8.936717407925965e-07, "loss": 0.034, "step": 837425 }, { "epoch": 8.23, "grad_norm": 12.497504234313965, "learning_rate": 8.935476183383479e-07, "loss": 0.0242, "step": 837450 }, { "epoch": 8.23, "grad_norm": 0.053756292909383774, "learning_rate": 8.934234958840995e-07, "loss": 0.0428, "step": 837475 }, { "epoch": 8.23, "grad_norm": 14.33594036102295, "learning_rate": 8.93299373429851e-07, "loss": 0.0279, "step": 837500 }, { "epoch": 8.23, "grad_norm": 0.017162542790174484, "learning_rate": 8.931752509756026e-07, "loss": 0.034, "step": 837525 }, { "epoch": 8.23, "grad_norm": 16.463212966918945, "learning_rate": 8.93051128521354e-07, "loss": 0.0325, "step": 837550 }, { "epoch": 8.24, "grad_norm": 26.059358596801758, "learning_rate": 8.929270060671056e-07, "loss": 0.045, "step": 837575 }, { "epoch": 8.24, "grad_norm": 0.08637768030166626, "learning_rate": 8.928028836128572e-07, "loss": 0.0264, "step": 837600 }, { "epoch": 8.24, "grad_norm": 5.623885631561279, "learning_rate": 8.926787611586087e-07, "loss": 0.0348, "step": 837625 }, { "epoch": 8.24, "grad_norm": 14.329251289367676, "learning_rate": 8.925546387043603e-07, "loss": 0.0241, "step": 837650 }, { "epoch": 8.24, "grad_norm": 0.00575620960444212, "learning_rate": 8.924305162501118e-07, "loss": 0.0395, "step": 837675 }, { "epoch": 8.24, "grad_norm": 3.506195545196533, "learning_rate": 8.923063937958633e-07, "loss": 0.0232, "step": 837700 }, { "epoch": 8.24, "grad_norm": 1.5519914627075195, "learning_rate": 8.921822713416149e-07, "loss": 0.0298, "step": 837725 }, { "epoch": 8.24, "grad_norm": 5.820202350616455, "learning_rate": 8.920581488873664e-07, "loss": 0.0335, "step": 837750 }, { "epoch": 8.24, "grad_norm": 0.5644323825836182, "learning_rate": 8.919340264331179e-07, "loss": 0.0411, "step": 837775 }, { "epoch": 8.24, "grad_norm": 7.118903160095215, "learning_rate": 8.918099039788694e-07, "loss": 0.0437, "step": 837800 }, { "epoch": 8.24, "grad_norm": 0.060434263199567795, "learning_rate": 8.91685781524621e-07, "loss": 0.0286, "step": 837825 }, { "epoch": 8.24, "grad_norm": 11.388517379760742, "learning_rate": 8.915616590703726e-07, "loss": 0.0245, "step": 837850 }, { "epoch": 8.24, "grad_norm": 1.1371712684631348, "learning_rate": 8.91437536616124e-07, "loss": 0.0284, "step": 837875 }, { "epoch": 8.24, "grad_norm": 3.617746353149414, "learning_rate": 8.913134141618756e-07, "loss": 0.023, "step": 837900 }, { "epoch": 8.24, "grad_norm": 3.069862127304077, "learning_rate": 8.911892917076272e-07, "loss": 0.0227, "step": 837925 }, { "epoch": 8.24, "grad_norm": 25.327713012695312, "learning_rate": 8.910651692533787e-07, "loss": 0.0362, "step": 837950 }, { "epoch": 8.24, "grad_norm": 0.031501252204179764, "learning_rate": 8.909410467991301e-07, "loss": 0.0254, "step": 837975 }, { "epoch": 8.24, "grad_norm": 10.067425727844238, "learning_rate": 8.908169243448817e-07, "loss": 0.0205, "step": 838000 }, { "epoch": 8.24, "grad_norm": 3.21066951751709, "learning_rate": 8.906928018906333e-07, "loss": 0.0282, "step": 838025 }, { "epoch": 8.24, "grad_norm": 11.445228576660156, "learning_rate": 8.905686794363848e-07, "loss": 0.0184, "step": 838050 }, { "epoch": 8.24, "grad_norm": 0.005226207431405783, "learning_rate": 8.904445569821364e-07, "loss": 0.0542, "step": 838075 }, { "epoch": 8.24, "grad_norm": 14.708910942077637, "learning_rate": 8.90320434527888e-07, "loss": 0.0296, "step": 838100 }, { "epoch": 8.24, "grad_norm": 0.01846800372004509, "learning_rate": 8.901963120736394e-07, "loss": 0.0302, "step": 838125 }, { "epoch": 8.24, "grad_norm": 8.832355499267578, "learning_rate": 8.90072189619391e-07, "loss": 0.028, "step": 838150 }, { "epoch": 8.24, "grad_norm": 0.032457154244184494, "learning_rate": 8.899480671651425e-07, "loss": 0.0252, "step": 838175 }, { "epoch": 8.24, "grad_norm": 7.608874320983887, "learning_rate": 8.898239447108941e-07, "loss": 0.0233, "step": 838200 }, { "epoch": 8.24, "grad_norm": 13.460131645202637, "learning_rate": 8.896998222566455e-07, "loss": 0.0215, "step": 838225 }, { "epoch": 8.24, "grad_norm": 16.178831100463867, "learning_rate": 8.895756998023971e-07, "loss": 0.027, "step": 838250 }, { "epoch": 8.24, "grad_norm": 0.3058798313140869, "learning_rate": 8.894515773481487e-07, "loss": 0.0339, "step": 838275 }, { "epoch": 8.24, "grad_norm": 7.306203842163086, "learning_rate": 8.893274548939002e-07, "loss": 0.0222, "step": 838300 }, { "epoch": 8.24, "grad_norm": 0.007481888402253389, "learning_rate": 8.892033324396518e-07, "loss": 0.034, "step": 838325 }, { "epoch": 8.24, "grad_norm": 0.7925171852111816, "learning_rate": 8.890792099854033e-07, "loss": 0.0246, "step": 838350 }, { "epoch": 8.24, "grad_norm": 0.051334407180547714, "learning_rate": 8.889550875311548e-07, "loss": 0.0187, "step": 838375 }, { "epoch": 8.24, "grad_norm": 5.109270095825195, "learning_rate": 8.888309650769063e-07, "loss": 0.0235, "step": 838400 }, { "epoch": 8.24, "grad_norm": 0.048269305378198624, "learning_rate": 8.887068426226579e-07, "loss": 0.0242, "step": 838425 }, { "epoch": 8.24, "grad_norm": 1.7845622301101685, "learning_rate": 8.885827201684094e-07, "loss": 0.0207, "step": 838450 }, { "epoch": 8.24, "grad_norm": 2.601273536682129, "learning_rate": 8.884585977141609e-07, "loss": 0.0451, "step": 838475 }, { "epoch": 8.24, "grad_norm": 16.740354537963867, "learning_rate": 8.883344752599125e-07, "loss": 0.0395, "step": 838500 }, { "epoch": 8.24, "grad_norm": 0.20810064673423767, "learning_rate": 8.882103528056641e-07, "loss": 0.0354, "step": 838525 }, { "epoch": 8.24, "grad_norm": 1.7096971273422241, "learning_rate": 8.880862303514155e-07, "loss": 0.0223, "step": 838550 }, { "epoch": 8.25, "grad_norm": 0.013636604882776737, "learning_rate": 8.879621078971671e-07, "loss": 0.0283, "step": 838575 }, { "epoch": 8.25, "grad_norm": 9.319705963134766, "learning_rate": 8.878379854429186e-07, "loss": 0.0221, "step": 838600 }, { "epoch": 8.25, "grad_norm": 0.06617417186498642, "learning_rate": 8.877138629886702e-07, "loss": 0.0383, "step": 838625 }, { "epoch": 8.25, "grad_norm": 11.335586547851562, "learning_rate": 8.875897405344216e-07, "loss": 0.0261, "step": 838650 }, { "epoch": 8.25, "grad_norm": 0.005653258413076401, "learning_rate": 8.874656180801732e-07, "loss": 0.0225, "step": 838675 }, { "epoch": 8.25, "grad_norm": 7.3263983726501465, "learning_rate": 8.873414956259248e-07, "loss": 0.027, "step": 838700 }, { "epoch": 8.25, "grad_norm": 0.0158549752086401, "learning_rate": 8.872173731716763e-07, "loss": 0.0186, "step": 838725 }, { "epoch": 8.25, "grad_norm": 10.693306922912598, "learning_rate": 8.870932507174279e-07, "loss": 0.0277, "step": 838750 }, { "epoch": 8.25, "grad_norm": 1.11078941822052, "learning_rate": 8.869691282631795e-07, "loss": 0.0323, "step": 838775 }, { "epoch": 8.25, "grad_norm": 0.07416195422410965, "learning_rate": 8.868450058089309e-07, "loss": 0.0236, "step": 838800 }, { "epoch": 8.25, "grad_norm": 8.793261528015137, "learning_rate": 8.867208833546824e-07, "loss": 0.0469, "step": 838825 }, { "epoch": 8.25, "grad_norm": 14.290658950805664, "learning_rate": 8.86596760900434e-07, "loss": 0.031, "step": 838850 }, { "epoch": 8.25, "grad_norm": 0.4260832965373993, "learning_rate": 8.864726384461856e-07, "loss": 0.0147, "step": 838875 }, { "epoch": 8.25, "grad_norm": 10.282642364501953, "learning_rate": 8.86348515991937e-07, "loss": 0.021, "step": 838900 }, { "epoch": 8.25, "grad_norm": 0.021406101062893867, "learning_rate": 8.862243935376886e-07, "loss": 0.0291, "step": 838925 }, { "epoch": 8.25, "grad_norm": 2.9422693252563477, "learning_rate": 8.861002710834402e-07, "loss": 0.0251, "step": 838950 }, { "epoch": 8.25, "grad_norm": 0.055908966809511185, "learning_rate": 8.859761486291917e-07, "loss": 0.0323, "step": 838975 }, { "epoch": 8.25, "grad_norm": 14.181488037109375, "learning_rate": 8.858520261749433e-07, "loss": 0.015, "step": 839000 }, { "epoch": 8.25, "grad_norm": 0.6979736685752869, "learning_rate": 8.857279037206947e-07, "loss": 0.0416, "step": 839025 }, { "epoch": 8.25, "grad_norm": 6.762232780456543, "learning_rate": 8.856037812664463e-07, "loss": 0.022, "step": 839050 }, { "epoch": 8.25, "grad_norm": 0.08959279209375381, "learning_rate": 8.854796588121978e-07, "loss": 0.0249, "step": 839075 }, { "epoch": 8.25, "grad_norm": 5.4189653396606445, "learning_rate": 8.853555363579494e-07, "loss": 0.025, "step": 839100 }, { "epoch": 8.25, "grad_norm": 3.049522876739502, "learning_rate": 8.852314139037009e-07, "loss": 0.037, "step": 839125 }, { "epoch": 8.25, "grad_norm": 0.3956468403339386, "learning_rate": 8.851072914494524e-07, "loss": 0.0299, "step": 839150 }, { "epoch": 8.25, "grad_norm": 1.8717902898788452, "learning_rate": 8.84983168995204e-07, "loss": 0.0405, "step": 839175 }, { "epoch": 8.25, "grad_norm": 2.314302444458008, "learning_rate": 8.848590465409556e-07, "loss": 0.0196, "step": 839200 }, { "epoch": 8.25, "grad_norm": 14.08961009979248, "learning_rate": 8.84739888984877e-07, "loss": 0.0419, "step": 839225 }, { "epoch": 8.25, "grad_norm": 18.790842056274414, "learning_rate": 8.846157665306284e-07, "loss": 0.0374, "step": 839250 }, { "epoch": 8.25, "grad_norm": 1.5515025854110718, "learning_rate": 8.844916440763801e-07, "loss": 0.0338, "step": 839275 }, { "epoch": 8.25, "grad_norm": 2.0458271503448486, "learning_rate": 8.843675216221316e-07, "loss": 0.0292, "step": 839300 }, { "epoch": 8.25, "grad_norm": 0.8451153039932251, "learning_rate": 8.842433991678831e-07, "loss": 0.0354, "step": 839325 }, { "epoch": 8.25, "grad_norm": 17.92559051513672, "learning_rate": 8.841192767136348e-07, "loss": 0.0343, "step": 839350 }, { "epoch": 8.25, "grad_norm": 1.4524489641189575, "learning_rate": 8.839951542593862e-07, "loss": 0.0331, "step": 839375 }, { "epoch": 8.25, "grad_norm": 11.021172523498535, "learning_rate": 8.838710318051377e-07, "loss": 0.0235, "step": 839400 }, { "epoch": 8.25, "grad_norm": 0.037184134125709534, "learning_rate": 8.837469093508892e-07, "loss": 0.0238, "step": 839425 }, { "epoch": 8.25, "grad_norm": 10.882859230041504, "learning_rate": 8.836227868966409e-07, "loss": 0.0233, "step": 839450 }, { "epoch": 8.25, "grad_norm": 0.006578544620424509, "learning_rate": 8.834986644423923e-07, "loss": 0.0291, "step": 839475 }, { "epoch": 8.25, "grad_norm": 11.163710594177246, "learning_rate": 8.833745419881438e-07, "loss": 0.0294, "step": 839500 }, { "epoch": 8.25, "grad_norm": 0.04883063584566116, "learning_rate": 8.832504195338955e-07, "loss": 0.0434, "step": 839525 }, { "epoch": 8.25, "grad_norm": 6.774659633636475, "learning_rate": 8.83126297079647e-07, "loss": 0.0349, "step": 839550 }, { "epoch": 8.25, "grad_norm": 0.11649155616760254, "learning_rate": 8.830021746253985e-07, "loss": 0.0368, "step": 839575 }, { "epoch": 8.26, "grad_norm": 2.507979154586792, "learning_rate": 8.828780521711502e-07, "loss": 0.017, "step": 839600 }, { "epoch": 8.26, "grad_norm": 0.03245871141552925, "learning_rate": 8.827539297169016e-07, "loss": 0.029, "step": 839625 }, { "epoch": 8.26, "grad_norm": 3.8564958572387695, "learning_rate": 8.826298072626531e-07, "loss": 0.0429, "step": 839650 }, { "epoch": 8.26, "grad_norm": 0.14041826128959656, "learning_rate": 8.825056848084046e-07, "loss": 0.0244, "step": 839675 }, { "epoch": 8.26, "grad_norm": 10.495895385742188, "learning_rate": 8.823815623541563e-07, "loss": 0.0297, "step": 839700 }, { "epoch": 8.26, "grad_norm": 0.03197634220123291, "learning_rate": 8.822574398999077e-07, "loss": 0.0232, "step": 839725 }, { "epoch": 8.26, "grad_norm": 1.2024433612823486, "learning_rate": 8.821333174456592e-07, "loss": 0.0288, "step": 839750 }, { "epoch": 8.26, "grad_norm": 0.0483337938785553, "learning_rate": 8.820091949914109e-07, "loss": 0.0391, "step": 839775 }, { "epoch": 8.26, "grad_norm": 14.860261917114258, "learning_rate": 8.818850725371624e-07, "loss": 0.0404, "step": 839800 }, { "epoch": 8.26, "grad_norm": 0.0030562931206077337, "learning_rate": 8.817609500829138e-07, "loss": 0.0597, "step": 839825 }, { "epoch": 8.26, "grad_norm": 7.461400032043457, "learning_rate": 8.816368276286653e-07, "loss": 0.0313, "step": 839850 }, { "epoch": 8.26, "grad_norm": 0.25287482142448425, "learning_rate": 8.81512705174417e-07, "loss": 0.0435, "step": 839875 }, { "epoch": 8.26, "grad_norm": 5.106017112731934, "learning_rate": 8.813885827201685e-07, "loss": 0.0263, "step": 839900 }, { "epoch": 8.26, "grad_norm": 1.927666187286377, "learning_rate": 8.812644602659199e-07, "loss": 0.024, "step": 839925 }, { "epoch": 8.26, "grad_norm": 3.3942770957946777, "learning_rate": 8.811403378116716e-07, "loss": 0.023, "step": 839950 }, { "epoch": 8.26, "grad_norm": 1.1936991214752197, "learning_rate": 8.810162153574231e-07, "loss": 0.0324, "step": 839975 }, { "epoch": 8.26, "grad_norm": 4.028674125671387, "learning_rate": 8.808920929031746e-07, "loss": 0.0125, "step": 840000 }, { "epoch": 8.26, "eval_loss": 0.9071944952011108, "eval_runtime": 6080.4565, "eval_samples_per_second": 1.557, "eval_steps_per_second": 0.195, "eval_wer": 0.1112309277023112, "step": 840000 }, { "epoch": 8.26, "grad_norm": 0.1337580680847168, "learning_rate": 8.807679704489263e-07, "loss": 0.0326, "step": 840025 }, { "epoch": 8.26, "grad_norm": 11.990198135375977, "learning_rate": 8.806438479946777e-07, "loss": 0.0218, "step": 840050 }, { "epoch": 8.26, "grad_norm": 1.8295972347259521, "learning_rate": 8.805197255404292e-07, "loss": 0.0324, "step": 840075 }, { "epoch": 8.26, "grad_norm": 4.261614799499512, "learning_rate": 8.803956030861807e-07, "loss": 0.0139, "step": 840100 }, { "epoch": 8.26, "grad_norm": 5.0353522300720215, "learning_rate": 8.802714806319324e-07, "loss": 0.0435, "step": 840125 }, { "epoch": 8.26, "grad_norm": 9.00851058959961, "learning_rate": 8.801473581776838e-07, "loss": 0.031, "step": 840150 }, { "epoch": 8.26, "grad_norm": 0.006845302879810333, "learning_rate": 8.800232357234353e-07, "loss": 0.0238, "step": 840175 }, { "epoch": 8.26, "grad_norm": 11.71200180053711, "learning_rate": 8.79899113269187e-07, "loss": 0.0365, "step": 840200 }, { "epoch": 8.26, "grad_norm": 0.005443362519145012, "learning_rate": 8.797749908149385e-07, "loss": 0.047, "step": 840225 }, { "epoch": 8.26, "grad_norm": 4.811871528625488, "learning_rate": 8.7965086836069e-07, "loss": 0.032, "step": 840250 }, { "epoch": 8.26, "grad_norm": 0.8244174718856812, "learning_rate": 8.795267459064414e-07, "loss": 0.0306, "step": 840275 }, { "epoch": 8.26, "grad_norm": 15.165207862854004, "learning_rate": 8.794026234521931e-07, "loss": 0.0311, "step": 840300 }, { "epoch": 8.26, "grad_norm": 0.6579383611679077, "learning_rate": 8.792785009979446e-07, "loss": 0.036, "step": 840325 }, { "epoch": 8.26, "grad_norm": 3.3288562297821045, "learning_rate": 8.791543785436961e-07, "loss": 0.0328, "step": 840350 }, { "epoch": 8.26, "grad_norm": 0.048114802688360214, "learning_rate": 8.790302560894478e-07, "loss": 0.0455, "step": 840375 }, { "epoch": 8.26, "grad_norm": 2.1960461139678955, "learning_rate": 8.789061336351992e-07, "loss": 0.0397, "step": 840400 }, { "epoch": 8.26, "grad_norm": 0.005859644617885351, "learning_rate": 8.787820111809507e-07, "loss": 0.0237, "step": 840425 }, { "epoch": 8.26, "grad_norm": 10.416013717651367, "learning_rate": 8.786578887267024e-07, "loss": 0.0335, "step": 840450 }, { "epoch": 8.26, "grad_norm": 5.493645668029785, "learning_rate": 8.785337662724539e-07, "loss": 0.0357, "step": 840475 }, { "epoch": 8.26, "grad_norm": 11.517450332641602, "learning_rate": 8.784096438182053e-07, "loss": 0.0238, "step": 840500 }, { "epoch": 8.26, "grad_norm": 1.9710637331008911, "learning_rate": 8.782855213639568e-07, "loss": 0.0262, "step": 840525 }, { "epoch": 8.26, "grad_norm": 15.348648071289062, "learning_rate": 8.781613989097085e-07, "loss": 0.0329, "step": 840550 }, { "epoch": 8.26, "grad_norm": 1.2106170654296875, "learning_rate": 8.7803727645546e-07, "loss": 0.029, "step": 840575 }, { "epoch": 8.26, "grad_norm": 13.247222900390625, "learning_rate": 8.779131540012114e-07, "loss": 0.0281, "step": 840600 }, { "epoch": 8.27, "grad_norm": 0.048306047916412354, "learning_rate": 8.777890315469631e-07, "loss": 0.046, "step": 840625 }, { "epoch": 8.27, "grad_norm": 3.254152536392212, "learning_rate": 8.776649090927146e-07, "loss": 0.043, "step": 840650 }, { "epoch": 8.27, "grad_norm": 0.03262024372816086, "learning_rate": 8.775407866384661e-07, "loss": 0.0537, "step": 840675 }, { "epoch": 8.27, "grad_norm": 6.2166924476623535, "learning_rate": 8.774166641842175e-07, "loss": 0.0274, "step": 840700 }, { "epoch": 8.27, "grad_norm": 2.7096660137176514, "learning_rate": 8.772925417299692e-07, "loss": 0.0249, "step": 840725 }, { "epoch": 8.27, "grad_norm": 7.674136161804199, "learning_rate": 8.771684192757207e-07, "loss": 0.0249, "step": 840750 }, { "epoch": 8.27, "grad_norm": 0.03294267877936363, "learning_rate": 8.770442968214722e-07, "loss": 0.038, "step": 840775 }, { "epoch": 8.27, "grad_norm": 2.399416446685791, "learning_rate": 8.769201743672239e-07, "loss": 0.0243, "step": 840800 }, { "epoch": 8.27, "grad_norm": 0.09757590293884277, "learning_rate": 8.767960519129753e-07, "loss": 0.0366, "step": 840825 }, { "epoch": 8.27, "grad_norm": 9.877137184143066, "learning_rate": 8.766719294587268e-07, "loss": 0.0289, "step": 840850 }, { "epoch": 8.27, "grad_norm": 0.0016980019863694906, "learning_rate": 8.765478070044785e-07, "loss": 0.042, "step": 840875 }, { "epoch": 8.27, "grad_norm": 3.837808132171631, "learning_rate": 8.7642368455023e-07, "loss": 0.024, "step": 840900 }, { "epoch": 8.27, "grad_norm": 0.42330485582351685, "learning_rate": 8.762995620959815e-07, "loss": 0.0352, "step": 840925 }, { "epoch": 8.27, "grad_norm": 4.510317802429199, "learning_rate": 8.761754396417329e-07, "loss": 0.0243, "step": 840950 }, { "epoch": 8.27, "grad_norm": 0.1084790900349617, "learning_rate": 8.760513171874846e-07, "loss": 0.0343, "step": 840975 }, { "epoch": 8.27, "grad_norm": 12.01302433013916, "learning_rate": 8.759271947332361e-07, "loss": 0.042, "step": 841000 }, { "epoch": 8.27, "grad_norm": 0.14389225840568542, "learning_rate": 8.758030722789876e-07, "loss": 0.0322, "step": 841025 }, { "epoch": 8.27, "grad_norm": 12.405803680419922, "learning_rate": 8.756789498247393e-07, "loss": 0.0349, "step": 841050 }, { "epoch": 8.27, "grad_norm": 1.889918565750122, "learning_rate": 8.755548273704907e-07, "loss": 0.0614, "step": 841075 }, { "epoch": 8.27, "grad_norm": 10.685979843139648, "learning_rate": 8.754307049162422e-07, "loss": 0.0321, "step": 841100 }, { "epoch": 8.27, "grad_norm": 0.02777772583067417, "learning_rate": 8.753065824619937e-07, "loss": 0.0397, "step": 841125 }, { "epoch": 8.27, "grad_norm": 16.609468460083008, "learning_rate": 8.751824600077454e-07, "loss": 0.0275, "step": 841150 }, { "epoch": 8.27, "grad_norm": 0.4087323546409607, "learning_rate": 8.750583375534968e-07, "loss": 0.0331, "step": 841175 }, { "epoch": 8.27, "grad_norm": 0.2883492112159729, "learning_rate": 8.749342150992483e-07, "loss": 0.0236, "step": 841200 }, { "epoch": 8.27, "grad_norm": 1.5448530912399292, "learning_rate": 8.74810092645e-07, "loss": 0.0231, "step": 841225 }, { "epoch": 8.27, "grad_norm": 10.273690223693848, "learning_rate": 8.746859701907515e-07, "loss": 0.0153, "step": 841250 }, { "epoch": 8.27, "grad_norm": 0.35554641485214233, "learning_rate": 8.745618477365029e-07, "loss": 0.0459, "step": 841275 }, { "epoch": 8.27, "grad_norm": 13.399869918823242, "learning_rate": 8.744377252822546e-07, "loss": 0.0389, "step": 841300 }, { "epoch": 8.27, "grad_norm": 0.13478505611419678, "learning_rate": 8.743136028280061e-07, "loss": 0.0206, "step": 841325 }, { "epoch": 8.27, "grad_norm": 5.675668716430664, "learning_rate": 8.741894803737576e-07, "loss": 0.0199, "step": 841350 }, { "epoch": 8.27, "grad_norm": 0.014805730432271957, "learning_rate": 8.74065357919509e-07, "loss": 0.04, "step": 841375 }, { "epoch": 8.27, "grad_norm": 12.756827354431152, "learning_rate": 8.739412354652607e-07, "loss": 0.0158, "step": 841400 }, { "epoch": 8.27, "grad_norm": 0.027065778151154518, "learning_rate": 8.738171130110122e-07, "loss": 0.0439, "step": 841425 }, { "epoch": 8.27, "grad_norm": 6.793201446533203, "learning_rate": 8.736929905567637e-07, "loss": 0.0363, "step": 841450 }, { "epoch": 8.27, "grad_norm": 0.11290794610977173, "learning_rate": 8.735688681025154e-07, "loss": 0.0213, "step": 841475 }, { "epoch": 8.27, "grad_norm": 7.008427143096924, "learning_rate": 8.734447456482668e-07, "loss": 0.0247, "step": 841500 }, { "epoch": 8.27, "grad_norm": 6.219691753387451, "learning_rate": 8.733206231940183e-07, "loss": 0.034, "step": 841525 }, { "epoch": 8.27, "grad_norm": 5.822089195251465, "learning_rate": 8.731965007397698e-07, "loss": 0.0214, "step": 841550 }, { "epoch": 8.27, "grad_norm": 4.9881672859191895, "learning_rate": 8.730773431836914e-07, "loss": 0.0501, "step": 841575 }, { "epoch": 8.27, "grad_norm": 10.939017295837402, "learning_rate": 8.729532207294429e-07, "loss": 0.0274, "step": 841600 }, { "epoch": 8.27, "grad_norm": 0.022228965535759926, "learning_rate": 8.728290982751944e-07, "loss": 0.0508, "step": 841625 }, { "epoch": 8.28, "grad_norm": 11.437026023864746, "learning_rate": 8.72704975820946e-07, "loss": 0.0144, "step": 841650 }, { "epoch": 8.28, "grad_norm": 1.575763463973999, "learning_rate": 8.725808533666975e-07, "loss": 0.0319, "step": 841675 }, { "epoch": 8.28, "grad_norm": 3.374469041824341, "learning_rate": 8.72456730912449e-07, "loss": 0.0428, "step": 841700 }, { "epoch": 8.28, "grad_norm": 0.04302825406193733, "learning_rate": 8.723326084582006e-07, "loss": 0.0238, "step": 841725 }, { "epoch": 8.28, "grad_norm": 1.229023814201355, "learning_rate": 8.722084860039522e-07, "loss": 0.0246, "step": 841750 }, { "epoch": 8.28, "grad_norm": 0.702098548412323, "learning_rate": 8.720843635497036e-07, "loss": 0.0213, "step": 841775 }, { "epoch": 8.28, "grad_norm": 0.22606487572193146, "learning_rate": 8.719602410954552e-07, "loss": 0.0333, "step": 841800 }, { "epoch": 8.28, "grad_norm": 4.90623664855957, "learning_rate": 8.718361186412068e-07, "loss": 0.0332, "step": 841825 }, { "epoch": 8.28, "grad_norm": 2.1845765113830566, "learning_rate": 8.717119961869583e-07, "loss": 0.0187, "step": 841850 }, { "epoch": 8.28, "grad_norm": 0.08987409621477127, "learning_rate": 8.715878737327098e-07, "loss": 0.0303, "step": 841875 }, { "epoch": 8.28, "grad_norm": 12.261207580566406, "learning_rate": 8.714637512784614e-07, "loss": 0.0292, "step": 841900 }, { "epoch": 8.28, "grad_norm": 4.882998943328857, "learning_rate": 8.713396288242129e-07, "loss": 0.0305, "step": 841925 }, { "epoch": 8.28, "grad_norm": 13.523775100708008, "learning_rate": 8.712155063699644e-07, "loss": 0.0263, "step": 841950 }, { "epoch": 8.28, "grad_norm": 0.02947223372757435, "learning_rate": 8.710913839157159e-07, "loss": 0.0437, "step": 841975 }, { "epoch": 8.28, "grad_norm": 14.817481994628906, "learning_rate": 8.709672614614675e-07, "loss": 0.0194, "step": 842000 }, { "epoch": 8.28, "grad_norm": 0.40236571431159973, "learning_rate": 8.70843139007219e-07, "loss": 0.0282, "step": 842025 }, { "epoch": 8.28, "grad_norm": 8.395631790161133, "learning_rate": 8.707190165529706e-07, "loss": 0.024, "step": 842050 }, { "epoch": 8.28, "grad_norm": 0.06926797330379486, "learning_rate": 8.705948940987222e-07, "loss": 0.027, "step": 842075 }, { "epoch": 8.28, "grad_norm": 6.979798316955566, "learning_rate": 8.704707716444736e-07, "loss": 0.0247, "step": 842100 }, { "epoch": 8.28, "grad_norm": 0.13499154150485992, "learning_rate": 8.703466491902252e-07, "loss": 0.0489, "step": 842125 }, { "epoch": 8.28, "grad_norm": 17.171552658081055, "learning_rate": 8.702225267359767e-07, "loss": 0.0361, "step": 842150 }, { "epoch": 8.28, "grad_norm": 0.11409853398799896, "learning_rate": 8.700984042817283e-07, "loss": 0.0398, "step": 842175 }, { "epoch": 8.28, "grad_norm": 7.478132247924805, "learning_rate": 8.699742818274797e-07, "loss": 0.0182, "step": 842200 }, { "epoch": 8.28, "grad_norm": 1.0553345680236816, "learning_rate": 8.698501593732313e-07, "loss": 0.0319, "step": 842225 }, { "epoch": 8.28, "grad_norm": 4.878113746643066, "learning_rate": 8.697260369189829e-07, "loss": 0.0175, "step": 842250 }, { "epoch": 8.28, "grad_norm": 5.5440874099731445, "learning_rate": 8.696019144647344e-07, "loss": 0.0448, "step": 842275 }, { "epoch": 8.28, "grad_norm": 0.529161274433136, "learning_rate": 8.694777920104859e-07, "loss": 0.0131, "step": 842300 }, { "epoch": 8.28, "grad_norm": 0.18173891305923462, "learning_rate": 8.693536695562375e-07, "loss": 0.0245, "step": 842325 }, { "epoch": 8.28, "grad_norm": 14.616579055786133, "learning_rate": 8.69229547101989e-07, "loss": 0.039, "step": 842350 }, { "epoch": 8.28, "grad_norm": 0.5313304662704468, "learning_rate": 8.691054246477405e-07, "loss": 0.0421, "step": 842375 }, { "epoch": 8.28, "grad_norm": 2.4963836669921875, "learning_rate": 8.689813021934921e-07, "loss": 0.0347, "step": 842400 }, { "epoch": 8.28, "grad_norm": 12.961526870727539, "learning_rate": 8.688571797392437e-07, "loss": 0.043, "step": 842425 }, { "epoch": 8.28, "grad_norm": 9.553439140319824, "learning_rate": 8.687330572849951e-07, "loss": 0.0128, "step": 842450 }, { "epoch": 8.28, "grad_norm": 1.8273541927337646, "learning_rate": 8.686089348307467e-07, "loss": 0.0382, "step": 842475 }, { "epoch": 8.28, "grad_norm": 16.18258285522461, "learning_rate": 8.684848123764983e-07, "loss": 0.0345, "step": 842500 }, { "epoch": 8.28, "grad_norm": 1.049869418144226, "learning_rate": 8.683606899222498e-07, "loss": 0.0311, "step": 842525 }, { "epoch": 8.28, "grad_norm": 0.23142684996128082, "learning_rate": 8.682365674680013e-07, "loss": 0.0149, "step": 842550 }, { "epoch": 8.28, "grad_norm": 0.029338745400309563, "learning_rate": 8.681124450137528e-07, "loss": 0.0327, "step": 842575 }, { "epoch": 8.28, "grad_norm": 7.193849563598633, "learning_rate": 8.679883225595044e-07, "loss": 0.0256, "step": 842600 }, { "epoch": 8.28, "grad_norm": 0.17858576774597168, "learning_rate": 8.678642001052559e-07, "loss": 0.0214, "step": 842625 }, { "epoch": 8.29, "grad_norm": 16.40929412841797, "learning_rate": 8.677400776510074e-07, "loss": 0.0341, "step": 842650 }, { "epoch": 8.29, "grad_norm": 0.1834031641483307, "learning_rate": 8.67615955196759e-07, "loss": 0.0297, "step": 842675 }, { "epoch": 8.29, "grad_norm": 9.855270385742188, "learning_rate": 8.674918327425105e-07, "loss": 0.0269, "step": 842700 }, { "epoch": 8.29, "grad_norm": 0.015415620058774948, "learning_rate": 8.673677102882621e-07, "loss": 0.0239, "step": 842725 }, { "epoch": 8.29, "grad_norm": 8.396133422851562, "learning_rate": 8.672435878340137e-07, "loss": 0.0447, "step": 842750 }, { "epoch": 8.29, "grad_norm": 0.04217444732785225, "learning_rate": 8.671194653797651e-07, "loss": 0.0263, "step": 842775 }, { "epoch": 8.29, "grad_norm": 8.581509590148926, "learning_rate": 8.669953429255166e-07, "loss": 0.0391, "step": 842800 }, { "epoch": 8.29, "grad_norm": 1.5334222316741943, "learning_rate": 8.668712204712682e-07, "loss": 0.0377, "step": 842825 }, { "epoch": 8.29, "grad_norm": 0.7496374845504761, "learning_rate": 8.667470980170198e-07, "loss": 0.0304, "step": 842850 }, { "epoch": 8.29, "grad_norm": 2.216069459915161, "learning_rate": 8.666229755627712e-07, "loss": 0.0547, "step": 842875 }, { "epoch": 8.29, "grad_norm": 1.687936782836914, "learning_rate": 8.664988531085228e-07, "loss": 0.0208, "step": 842900 }, { "epoch": 8.29, "grad_norm": 0.11079670488834381, "learning_rate": 8.663747306542744e-07, "loss": 0.0318, "step": 842925 }, { "epoch": 8.29, "grad_norm": 25.330001831054688, "learning_rate": 8.662506082000259e-07, "loss": 0.0243, "step": 842950 }, { "epoch": 8.29, "grad_norm": 0.43152451515197754, "learning_rate": 8.661264857457774e-07, "loss": 0.0248, "step": 842975 }, { "epoch": 8.29, "grad_norm": 18.451854705810547, "learning_rate": 8.660023632915289e-07, "loss": 0.0306, "step": 843000 }, { "epoch": 8.29, "grad_norm": 1.8001912832260132, "learning_rate": 8.658782408372805e-07, "loss": 0.0501, "step": 843025 }, { "epoch": 8.29, "grad_norm": 19.926530838012695, "learning_rate": 8.65754118383032e-07, "loss": 0.044, "step": 843050 }, { "epoch": 8.29, "grad_norm": 0.29692763090133667, "learning_rate": 8.656299959287836e-07, "loss": 0.0484, "step": 843075 }, { "epoch": 8.29, "grad_norm": 5.94625997543335, "learning_rate": 8.655058734745351e-07, "loss": 0.0179, "step": 843100 }, { "epoch": 8.29, "grad_norm": 0.15307161211967468, "learning_rate": 8.653817510202866e-07, "loss": 0.039, "step": 843125 }, { "epoch": 8.29, "grad_norm": 0.6558213829994202, "learning_rate": 8.652576285660382e-07, "loss": 0.0328, "step": 843150 }, { "epoch": 8.29, "grad_norm": 1.6128610372543335, "learning_rate": 8.651335061117898e-07, "loss": 0.0254, "step": 843175 }, { "epoch": 8.29, "grad_norm": 2.9113481044769287, "learning_rate": 8.650093836575413e-07, "loss": 0.0243, "step": 843200 }, { "epoch": 8.29, "grad_norm": 0.2526325583457947, "learning_rate": 8.648852612032928e-07, "loss": 0.0322, "step": 843225 }, { "epoch": 8.29, "grad_norm": 9.133277893066406, "learning_rate": 8.647611387490443e-07, "loss": 0.0323, "step": 843250 }, { "epoch": 8.29, "grad_norm": 0.179097518324852, "learning_rate": 8.646370162947959e-07, "loss": 0.0384, "step": 843275 }, { "epoch": 8.29, "grad_norm": 0.4331784248352051, "learning_rate": 8.645128938405474e-07, "loss": 0.0311, "step": 843300 }, { "epoch": 8.29, "grad_norm": 0.3394370973110199, "learning_rate": 8.643887713862989e-07, "loss": 0.0289, "step": 843325 }, { "epoch": 8.29, "grad_norm": 13.072432518005371, "learning_rate": 8.642646489320505e-07, "loss": 0.0357, "step": 843350 }, { "epoch": 8.29, "grad_norm": 0.09166781604290009, "learning_rate": 8.64140526477802e-07, "loss": 0.0312, "step": 843375 }, { "epoch": 8.29, "grad_norm": 1.3510757684707642, "learning_rate": 8.640164040235536e-07, "loss": 0.035, "step": 843400 }, { "epoch": 8.29, "grad_norm": 9.31851577758789, "learning_rate": 8.63892281569305e-07, "loss": 0.0296, "step": 843425 }, { "epoch": 8.29, "grad_norm": 15.865669250488281, "learning_rate": 8.637681591150566e-07, "loss": 0.0284, "step": 843450 }, { "epoch": 8.29, "grad_norm": 0.008517327718436718, "learning_rate": 8.636440366608081e-07, "loss": 0.0445, "step": 843475 }, { "epoch": 8.29, "grad_norm": 7.984381675720215, "learning_rate": 8.635199142065597e-07, "loss": 0.0254, "step": 843500 }, { "epoch": 8.29, "grad_norm": 0.605258584022522, "learning_rate": 8.633957917523113e-07, "loss": 0.0241, "step": 843525 }, { "epoch": 8.29, "grad_norm": 16.34501838684082, "learning_rate": 8.632716692980627e-07, "loss": 0.0251, "step": 843550 }, { "epoch": 8.29, "grad_norm": 0.023511992767453194, "learning_rate": 8.631475468438143e-07, "loss": 0.0284, "step": 843575 }, { "epoch": 8.29, "grad_norm": 2.6196248531341553, "learning_rate": 8.630234243895659e-07, "loss": 0.0154, "step": 843600 }, { "epoch": 8.29, "grad_norm": 2.4161481857299805, "learning_rate": 8.628993019353174e-07, "loss": 0.0191, "step": 843625 }, { "epoch": 8.29, "grad_norm": 11.87152099609375, "learning_rate": 8.627751794810689e-07, "loss": 0.0238, "step": 843650 }, { "epoch": 8.3, "grad_norm": 0.006627185735851526, "learning_rate": 8.626510570268204e-07, "loss": 0.0236, "step": 843675 }, { "epoch": 8.3, "grad_norm": 1.0322024822235107, "learning_rate": 8.62526934572572e-07, "loss": 0.0378, "step": 843700 }, { "epoch": 8.3, "grad_norm": 0.009498248808085918, "learning_rate": 8.624028121183235e-07, "loss": 0.0449, "step": 843725 }, { "epoch": 8.3, "grad_norm": 21.310022354125977, "learning_rate": 8.62278689664075e-07, "loss": 0.019, "step": 843750 }, { "epoch": 8.3, "grad_norm": 0.03487202525138855, "learning_rate": 8.621545672098266e-07, "loss": 0.0462, "step": 843775 }, { "epoch": 8.3, "grad_norm": 13.340965270996094, "learning_rate": 8.620304447555781e-07, "loss": 0.0245, "step": 843800 }, { "epoch": 8.3, "grad_norm": 0.019031886011362076, "learning_rate": 8.619063223013297e-07, "loss": 0.0337, "step": 843825 }, { "epoch": 8.3, "grad_norm": 31.606842041015625, "learning_rate": 8.617821998470812e-07, "loss": 0.0288, "step": 843850 }, { "epoch": 8.3, "grad_norm": 0.0077209738083183765, "learning_rate": 8.616580773928328e-07, "loss": 0.0296, "step": 843875 }, { "epoch": 8.3, "grad_norm": 14.492874145507812, "learning_rate": 8.615339549385842e-07, "loss": 0.0382, "step": 843900 }, { "epoch": 8.3, "grad_norm": 0.1550242006778717, "learning_rate": 8.614098324843358e-07, "loss": 0.0341, "step": 843925 }, { "epoch": 8.3, "grad_norm": 9.157695770263672, "learning_rate": 8.612857100300874e-07, "loss": 0.0213, "step": 843950 }, { "epoch": 8.3, "grad_norm": 1.0706102848052979, "learning_rate": 8.611615875758389e-07, "loss": 0.0326, "step": 843975 }, { "epoch": 8.3, "grad_norm": 2.5920753479003906, "learning_rate": 8.610374651215904e-07, "loss": 0.0152, "step": 844000 }, { "epoch": 8.3, "grad_norm": 1.0469896793365479, "learning_rate": 8.60913342667342e-07, "loss": 0.032, "step": 844025 }, { "epoch": 8.3, "grad_norm": 11.131400108337402, "learning_rate": 8.607892202130935e-07, "loss": 0.0387, "step": 844050 }, { "epoch": 8.3, "grad_norm": 0.0514318086206913, "learning_rate": 8.606650977588451e-07, "loss": 0.0367, "step": 844075 }, { "epoch": 8.3, "grad_norm": 10.118010520935059, "learning_rate": 8.605409753045965e-07, "loss": 0.0203, "step": 844100 }, { "epoch": 8.3, "grad_norm": 0.09677977859973907, "learning_rate": 8.604168528503481e-07, "loss": 0.0498, "step": 844125 }, { "epoch": 8.3, "grad_norm": 7.492321014404297, "learning_rate": 8.602927303960996e-07, "loss": 0.0168, "step": 844150 }, { "epoch": 8.3, "grad_norm": 0.10546425729990005, "learning_rate": 8.601686079418512e-07, "loss": 0.0205, "step": 844175 }, { "epoch": 8.3, "grad_norm": 1.6908531188964844, "learning_rate": 8.600444854876028e-07, "loss": 0.0196, "step": 844200 }, { "epoch": 8.3, "grad_norm": 0.009207664988934994, "learning_rate": 8.599203630333542e-07, "loss": 0.0288, "step": 844225 }, { "epoch": 8.3, "grad_norm": 11.640531539916992, "learning_rate": 8.597962405791058e-07, "loss": 0.0227, "step": 844250 }, { "epoch": 8.3, "grad_norm": 1.5583637952804565, "learning_rate": 8.596721181248573e-07, "loss": 0.0523, "step": 844275 }, { "epoch": 8.3, "grad_norm": 0.6029801964759827, "learning_rate": 8.595479956706089e-07, "loss": 0.0342, "step": 844300 }, { "epoch": 8.3, "grad_norm": 0.041467685252428055, "learning_rate": 8.594238732163603e-07, "loss": 0.0239, "step": 844325 }, { "epoch": 8.3, "grad_norm": 4.353539943695068, "learning_rate": 8.592997507621119e-07, "loss": 0.0274, "step": 844350 }, { "epoch": 8.3, "grad_norm": 3.3910930156707764, "learning_rate": 8.591756283078635e-07, "loss": 0.0266, "step": 844375 }, { "epoch": 8.3, "grad_norm": 17.81027603149414, "learning_rate": 8.59051505853615e-07, "loss": 0.0348, "step": 844400 }, { "epoch": 8.3, "grad_norm": 0.33938097953796387, "learning_rate": 8.589273833993665e-07, "loss": 0.0398, "step": 844425 }, { "epoch": 8.3, "grad_norm": 1.0021785497665405, "learning_rate": 8.588032609451181e-07, "loss": 0.0246, "step": 844450 }, { "epoch": 8.3, "grad_norm": 0.17547747492790222, "learning_rate": 8.586791384908696e-07, "loss": 0.0298, "step": 844475 }, { "epoch": 8.3, "grad_norm": 5.155558109283447, "learning_rate": 8.585550160366212e-07, "loss": 0.0208, "step": 844500 }, { "epoch": 8.3, "grad_norm": 0.006119477096945047, "learning_rate": 8.584308935823727e-07, "loss": 0.0234, "step": 844525 }, { "epoch": 8.3, "grad_norm": 11.045110702514648, "learning_rate": 8.583067711281243e-07, "loss": 0.0275, "step": 844550 }, { "epoch": 8.3, "grad_norm": 7.766078948974609, "learning_rate": 8.581826486738757e-07, "loss": 0.0342, "step": 844575 }, { "epoch": 8.3, "grad_norm": 9.455286026000977, "learning_rate": 8.580585262196273e-07, "loss": 0.0331, "step": 844600 }, { "epoch": 8.3, "grad_norm": 3.9738590717315674, "learning_rate": 8.579344037653789e-07, "loss": 0.0237, "step": 844625 }, { "epoch": 8.3, "grad_norm": 11.307621002197266, "learning_rate": 8.578102813111304e-07, "loss": 0.0367, "step": 844650 }, { "epoch": 8.3, "grad_norm": 9.575993537902832, "learning_rate": 8.576861588568819e-07, "loss": 0.0494, "step": 844675 }, { "epoch": 8.31, "grad_norm": 1.0721877813339233, "learning_rate": 8.575620364026334e-07, "loss": 0.0188, "step": 844700 }, { "epoch": 8.31, "grad_norm": 0.07424049079418182, "learning_rate": 8.57437913948385e-07, "loss": 0.0244, "step": 844725 }, { "epoch": 8.31, "grad_norm": 0.16269010305404663, "learning_rate": 8.573137914941366e-07, "loss": 0.0179, "step": 844750 }, { "epoch": 8.31, "grad_norm": 6.609759330749512, "learning_rate": 8.57189669039888e-07, "loss": 0.0159, "step": 844775 }, { "epoch": 8.31, "grad_norm": 17.964628219604492, "learning_rate": 8.570655465856396e-07, "loss": 0.0489, "step": 844800 }, { "epoch": 8.31, "grad_norm": 0.2755417823791504, "learning_rate": 8.569414241313911e-07, "loss": 0.0335, "step": 844825 }, { "epoch": 8.31, "grad_norm": 0.12206176668405533, "learning_rate": 8.568173016771427e-07, "loss": 0.0328, "step": 844850 }, { "epoch": 8.31, "grad_norm": 0.16181468963623047, "learning_rate": 8.566931792228943e-07, "loss": 0.034, "step": 844875 }, { "epoch": 8.31, "grad_norm": 10.208852767944336, "learning_rate": 8.565690567686457e-07, "loss": 0.0308, "step": 844900 }, { "epoch": 8.31, "grad_norm": 0.5534288287162781, "learning_rate": 8.564449343143973e-07, "loss": 0.0419, "step": 844925 }, { "epoch": 8.31, "grad_norm": 14.59490966796875, "learning_rate": 8.563208118601488e-07, "loss": 0.0401, "step": 844950 }, { "epoch": 8.31, "grad_norm": 1.569071650505066, "learning_rate": 8.561966894059004e-07, "loss": 0.0344, "step": 844975 }, { "epoch": 8.31, "grad_norm": 8.091053009033203, "learning_rate": 8.560725669516518e-07, "loss": 0.0158, "step": 845000 }, { "epoch": 8.31, "grad_norm": 0.007290061563253403, "learning_rate": 8.559484444974034e-07, "loss": 0.0193, "step": 845025 }, { "epoch": 8.31, "grad_norm": 15.50210189819336, "learning_rate": 8.55824322043155e-07, "loss": 0.0312, "step": 845050 }, { "epoch": 8.31, "grad_norm": 7.865910530090332, "learning_rate": 8.557001995889065e-07, "loss": 0.029, "step": 845075 }, { "epoch": 8.31, "grad_norm": 13.346298217773438, "learning_rate": 8.55576077134658e-07, "loss": 0.0323, "step": 845100 }, { "epoch": 8.31, "grad_norm": 2.702454090118408, "learning_rate": 8.554519546804095e-07, "loss": 0.0419, "step": 845125 }, { "epoch": 8.31, "grad_norm": 0.053511522710323334, "learning_rate": 8.553278322261611e-07, "loss": 0.0344, "step": 845150 }, { "epoch": 8.31, "grad_norm": 3.5412397384643555, "learning_rate": 8.552037097719127e-07, "loss": 0.0255, "step": 845175 }, { "epoch": 8.31, "grad_norm": 7.155712127685547, "learning_rate": 8.550795873176642e-07, "loss": 0.0434, "step": 845200 }, { "epoch": 8.31, "grad_norm": 0.4915051758289337, "learning_rate": 8.549554648634158e-07, "loss": 0.0514, "step": 845225 }, { "epoch": 8.31, "grad_norm": 0.356192409992218, "learning_rate": 8.548313424091672e-07, "loss": 0.0179, "step": 845250 }, { "epoch": 8.31, "grad_norm": 4.1933512687683105, "learning_rate": 8.547072199549188e-07, "loss": 0.0364, "step": 845275 }, { "epoch": 8.31, "grad_norm": 11.573182106018066, "learning_rate": 8.545830975006704e-07, "loss": 0.0397, "step": 845300 }, { "epoch": 8.31, "grad_norm": 0.06565751135349274, "learning_rate": 8.544589750464219e-07, "loss": 0.031, "step": 845325 }, { "epoch": 8.31, "grad_norm": 0.5123422145843506, "learning_rate": 8.543348525921734e-07, "loss": 0.0162, "step": 845350 }, { "epoch": 8.31, "grad_norm": 1.5099486112594604, "learning_rate": 8.542107301379249e-07, "loss": 0.0199, "step": 845375 }, { "epoch": 8.31, "grad_norm": 3.0248465538024902, "learning_rate": 8.540866076836765e-07, "loss": 0.0244, "step": 845400 }, { "epoch": 8.31, "grad_norm": 0.14487864077091217, "learning_rate": 8.53962485229428e-07, "loss": 0.0423, "step": 845425 }, { "epoch": 8.31, "grad_norm": 9.9010648727417, "learning_rate": 8.538383627751795e-07, "loss": 0.0238, "step": 845450 }, { "epoch": 8.31, "grad_norm": 1.5554611682891846, "learning_rate": 8.537142403209311e-07, "loss": 0.0188, "step": 845475 }, { "epoch": 8.31, "grad_norm": 5.821798801422119, "learning_rate": 8.535901178666826e-07, "loss": 0.0254, "step": 845500 }, { "epoch": 8.31, "grad_norm": 0.11041571944952011, "learning_rate": 8.534659954124342e-07, "loss": 0.0515, "step": 845525 }, { "epoch": 8.31, "grad_norm": 7.693613529205322, "learning_rate": 8.533418729581856e-07, "loss": 0.0424, "step": 845550 }, { "epoch": 8.31, "grad_norm": 10.559280395507812, "learning_rate": 8.532177505039372e-07, "loss": 0.0186, "step": 845575 }, { "epoch": 8.31, "grad_norm": Infinity, "learning_rate": 8.530985929478587e-07, "loss": 0.028, "step": 845600 }, { "epoch": 8.31, "grad_norm": 0.05015822499990463, "learning_rate": 8.529744704936102e-07, "loss": 0.0319, "step": 845625 }, { "epoch": 8.31, "grad_norm": 6.763095378875732, "learning_rate": 8.528503480393618e-07, "loss": 0.0318, "step": 845650 }, { "epoch": 8.31, "grad_norm": 10.22074031829834, "learning_rate": 8.527262255851134e-07, "loss": 0.0306, "step": 845675 }, { "epoch": 8.32, "grad_norm": 15.407750129699707, "learning_rate": 8.526021031308648e-07, "loss": 0.0355, "step": 845700 }, { "epoch": 8.32, "grad_norm": 0.09705127030611038, "learning_rate": 8.524779806766163e-07, "loss": 0.0336, "step": 845725 }, { "epoch": 8.32, "grad_norm": 0.6402794718742371, "learning_rate": 8.52353858222368e-07, "loss": 0.0181, "step": 845750 }, { "epoch": 8.32, "grad_norm": 1.2597904205322266, "learning_rate": 8.522297357681195e-07, "loss": 0.0242, "step": 845775 }, { "epoch": 8.32, "grad_norm": 15.572776794433594, "learning_rate": 8.521056133138709e-07, "loss": 0.0216, "step": 845800 }, { "epoch": 8.32, "grad_norm": 0.08839278668165207, "learning_rate": 8.519814908596225e-07, "loss": 0.0306, "step": 845825 }, { "epoch": 8.32, "grad_norm": 9.71987533569336, "learning_rate": 8.518573684053741e-07, "loss": 0.0134, "step": 845850 }, { "epoch": 8.32, "grad_norm": 0.04673956334590912, "learning_rate": 8.517332459511256e-07, "loss": 0.0419, "step": 845875 }, { "epoch": 8.32, "grad_norm": 6.07772159576416, "learning_rate": 8.516091234968772e-07, "loss": 0.0268, "step": 845900 }, { "epoch": 8.32, "grad_norm": 4.193700790405273, "learning_rate": 8.514850010426287e-07, "loss": 0.0509, "step": 845925 }, { "epoch": 8.32, "grad_norm": 5.919466495513916, "learning_rate": 8.513608785883802e-07, "loss": 0.0288, "step": 845950 }, { "epoch": 8.32, "grad_norm": 11.729177474975586, "learning_rate": 8.512367561341317e-07, "loss": 0.0369, "step": 845975 }, { "epoch": 8.32, "grad_norm": 0.6226368546485901, "learning_rate": 8.511126336798834e-07, "loss": 0.0254, "step": 846000 }, { "epoch": 8.32, "grad_norm": 0.02425066940486431, "learning_rate": 8.509885112256349e-07, "loss": 0.0379, "step": 846025 }, { "epoch": 8.32, "grad_norm": 13.855560302734375, "learning_rate": 8.508643887713863e-07, "loss": 0.0339, "step": 846050 }, { "epoch": 8.32, "grad_norm": 0.10701192915439606, "learning_rate": 8.507402663171379e-07, "loss": 0.0518, "step": 846075 }, { "epoch": 8.32, "grad_norm": 12.949959754943848, "learning_rate": 8.506161438628895e-07, "loss": 0.0244, "step": 846100 }, { "epoch": 8.32, "grad_norm": 7.1736578941345215, "learning_rate": 8.50492021408641e-07, "loss": 0.0253, "step": 846125 }, { "epoch": 8.32, "grad_norm": 16.53695297241211, "learning_rate": 8.503678989543924e-07, "loss": 0.0267, "step": 846150 }, { "epoch": 8.32, "grad_norm": 0.07223518937826157, "learning_rate": 8.502437765001441e-07, "loss": 0.0347, "step": 846175 }, { "epoch": 8.32, "grad_norm": 4.857148170471191, "learning_rate": 8.501196540458956e-07, "loss": 0.0219, "step": 846200 }, { "epoch": 8.32, "grad_norm": 0.04437289014458656, "learning_rate": 8.499955315916471e-07, "loss": 0.0362, "step": 846225 }, { "epoch": 8.32, "grad_norm": 1.2810903787612915, "learning_rate": 8.498714091373988e-07, "loss": 0.0145, "step": 846250 }, { "epoch": 8.32, "grad_norm": 3.9488916397094727, "learning_rate": 8.497472866831502e-07, "loss": 0.0361, "step": 846275 }, { "epoch": 8.32, "grad_norm": 10.20499038696289, "learning_rate": 8.496231642289017e-07, "loss": 0.0186, "step": 846300 }, { "epoch": 8.32, "grad_norm": 1.4835331439971924, "learning_rate": 8.494990417746533e-07, "loss": 0.0285, "step": 846325 }, { "epoch": 8.32, "grad_norm": 14.415972709655762, "learning_rate": 8.493749193204049e-07, "loss": 0.0321, "step": 846350 }, { "epoch": 8.32, "grad_norm": 1.6624996662139893, "learning_rate": 8.492507968661563e-07, "loss": 0.0493, "step": 846375 }, { "epoch": 8.32, "grad_norm": 16.209388732910156, "learning_rate": 8.491266744119078e-07, "loss": 0.0433, "step": 846400 }, { "epoch": 8.32, "grad_norm": 0.005158412270247936, "learning_rate": 8.490025519576595e-07, "loss": 0.034, "step": 846425 }, { "epoch": 8.32, "grad_norm": 1.359584927558899, "learning_rate": 8.48878429503411e-07, "loss": 0.0284, "step": 846450 }, { "epoch": 8.32, "grad_norm": 0.011279154568910599, "learning_rate": 8.487543070491624e-07, "loss": 0.0197, "step": 846475 }, { "epoch": 8.32, "grad_norm": 1.8783965110778809, "learning_rate": 8.48630184594914e-07, "loss": 0.0179, "step": 846500 }, { "epoch": 8.32, "grad_norm": 1.8037117719650269, "learning_rate": 8.485060621406656e-07, "loss": 0.0351, "step": 846525 }, { "epoch": 8.32, "grad_norm": 11.383825302124023, "learning_rate": 8.483819396864171e-07, "loss": 0.0286, "step": 846550 }, { "epoch": 8.32, "grad_norm": 1.112711787223816, "learning_rate": 8.482578172321685e-07, "loss": 0.0221, "step": 846575 }, { "epoch": 8.32, "grad_norm": 6.446233749389648, "learning_rate": 8.481336947779202e-07, "loss": 0.0436, "step": 846600 }, { "epoch": 8.32, "grad_norm": 7.187389373779297, "learning_rate": 8.480095723236717e-07, "loss": 0.0347, "step": 846625 }, { "epoch": 8.32, "grad_norm": 13.692558288574219, "learning_rate": 8.478854498694232e-07, "loss": 0.0483, "step": 846650 }, { "epoch": 8.32, "grad_norm": 0.03772939369082451, "learning_rate": 8.477613274151749e-07, "loss": 0.0256, "step": 846675 }, { "epoch": 8.32, "grad_norm": 17.484514236450195, "learning_rate": 8.476372049609264e-07, "loss": 0.0315, "step": 846700 }, { "epoch": 8.33, "grad_norm": 1.6389660835266113, "learning_rate": 8.475130825066778e-07, "loss": 0.0211, "step": 846725 }, { "epoch": 8.33, "grad_norm": 18.754928588867188, "learning_rate": 8.473889600524294e-07, "loss": 0.0273, "step": 846750 }, { "epoch": 8.33, "grad_norm": 0.010257041081786156, "learning_rate": 8.47264837598181e-07, "loss": 0.032, "step": 846775 }, { "epoch": 8.33, "grad_norm": 21.102996826171875, "learning_rate": 8.471407151439325e-07, "loss": 0.0183, "step": 846800 }, { "epoch": 8.33, "grad_norm": 2.303469181060791, "learning_rate": 8.470165926896839e-07, "loss": 0.0401, "step": 846825 }, { "epoch": 8.33, "grad_norm": 21.350282669067383, "learning_rate": 8.468924702354356e-07, "loss": 0.0407, "step": 846850 }, { "epoch": 8.33, "grad_norm": 13.020936965942383, "learning_rate": 8.467683477811871e-07, "loss": 0.0306, "step": 846875 }, { "epoch": 8.33, "grad_norm": 19.071941375732422, "learning_rate": 8.466442253269386e-07, "loss": 0.0433, "step": 846900 }, { "epoch": 8.33, "grad_norm": 0.03146185353398323, "learning_rate": 8.465201028726902e-07, "loss": 0.0225, "step": 846925 }, { "epoch": 8.33, "grad_norm": 15.615012168884277, "learning_rate": 8.463959804184417e-07, "loss": 0.0326, "step": 846950 }, { "epoch": 8.33, "grad_norm": 0.004403186962008476, "learning_rate": 8.462718579641932e-07, "loss": 0.0608, "step": 846975 }, { "epoch": 8.33, "grad_norm": 12.469932556152344, "learning_rate": 8.461477355099447e-07, "loss": 0.0229, "step": 847000 }, { "epoch": 8.33, "grad_norm": 0.7439398765563965, "learning_rate": 8.460236130556964e-07, "loss": 0.0332, "step": 847025 }, { "epoch": 8.33, "grad_norm": 4.46140193939209, "learning_rate": 8.458994906014478e-07, "loss": 0.0214, "step": 847050 }, { "epoch": 8.33, "grad_norm": 0.2974971830844879, "learning_rate": 8.457753681471993e-07, "loss": 0.0436, "step": 847075 }, { "epoch": 8.33, "grad_norm": 2.5323054790496826, "learning_rate": 8.45651245692951e-07, "loss": 0.0168, "step": 847100 }, { "epoch": 8.33, "grad_norm": 0.01943025179207325, "learning_rate": 8.455271232387025e-07, "loss": 0.0381, "step": 847125 }, { "epoch": 8.33, "grad_norm": 7.102117538452148, "learning_rate": 8.454030007844539e-07, "loss": 0.0284, "step": 847150 }, { "epoch": 8.33, "grad_norm": 0.05295518785715103, "learning_rate": 8.452788783302055e-07, "loss": 0.0319, "step": 847175 }, { "epoch": 8.33, "grad_norm": 10.253972053527832, "learning_rate": 8.451547558759571e-07, "loss": 0.028, "step": 847200 }, { "epoch": 8.33, "grad_norm": 0.36086514592170715, "learning_rate": 8.450355983198785e-07, "loss": 0.0363, "step": 847225 }, { "epoch": 8.33, "grad_norm": 3.768399715423584, "learning_rate": 8.4491147586563e-07, "loss": 0.0226, "step": 847250 }, { "epoch": 8.33, "grad_norm": 0.09363503754138947, "learning_rate": 8.447873534113817e-07, "loss": 0.0172, "step": 847275 }, { "epoch": 8.33, "grad_norm": 6.665594100952148, "learning_rate": 8.446632309571331e-07, "loss": 0.0453, "step": 847300 }, { "epoch": 8.33, "grad_norm": 0.010678400285542011, "learning_rate": 8.445391085028846e-07, "loss": 0.0225, "step": 847325 }, { "epoch": 8.33, "grad_norm": 0.9168499708175659, "learning_rate": 8.444149860486362e-07, "loss": 0.0222, "step": 847350 }, { "epoch": 8.33, "grad_norm": 31.227783203125, "learning_rate": 8.442908635943878e-07, "loss": 0.0372, "step": 847375 }, { "epoch": 8.33, "grad_norm": 8.445304870605469, "learning_rate": 8.441667411401392e-07, "loss": 0.0333, "step": 847400 }, { "epoch": 8.33, "grad_norm": 6.1960225105285645, "learning_rate": 8.440426186858908e-07, "loss": 0.0273, "step": 847425 }, { "epoch": 8.33, "grad_norm": 2.445525646209717, "learning_rate": 8.439184962316424e-07, "loss": 0.0139, "step": 847450 }, { "epoch": 8.33, "grad_norm": 1.6249581575393677, "learning_rate": 8.437943737773939e-07, "loss": 0.0297, "step": 847475 }, { "epoch": 8.33, "grad_norm": 0.9902194142341614, "learning_rate": 8.436702513231454e-07, "loss": 0.0236, "step": 847500 }, { "epoch": 8.33, "grad_norm": 0.30015450716018677, "learning_rate": 8.435461288688971e-07, "loss": 0.0288, "step": 847525 }, { "epoch": 8.33, "grad_norm": 9.996655464172363, "learning_rate": 8.434220064146485e-07, "loss": 0.0249, "step": 847550 }, { "epoch": 8.33, "grad_norm": 0.029564496129751205, "learning_rate": 8.432978839604e-07, "loss": 0.0547, "step": 847575 }, { "epoch": 8.33, "grad_norm": 8.58730697631836, "learning_rate": 8.431737615061516e-07, "loss": 0.0326, "step": 847600 }, { "epoch": 8.33, "grad_norm": 10.151598930358887, "learning_rate": 8.430496390519032e-07, "loss": 0.0359, "step": 847625 }, { "epoch": 8.33, "grad_norm": 11.173934936523438, "learning_rate": 8.429255165976546e-07, "loss": 0.0397, "step": 847650 }, { "epoch": 8.33, "grad_norm": 0.20941559970378876, "learning_rate": 8.428013941434061e-07, "loss": 0.0477, "step": 847675 }, { "epoch": 8.33, "grad_norm": 9.672561645507812, "learning_rate": 8.426772716891578e-07, "loss": 0.0339, "step": 847700 }, { "epoch": 8.33, "grad_norm": 1.2295475006103516, "learning_rate": 8.425531492349093e-07, "loss": 0.0365, "step": 847725 }, { "epoch": 8.34, "grad_norm": 15.997045516967773, "learning_rate": 8.424290267806607e-07, "loss": 0.0344, "step": 847750 }, { "epoch": 8.34, "grad_norm": 0.06679528951644897, "learning_rate": 8.423049043264123e-07, "loss": 0.0359, "step": 847775 }, { "epoch": 8.34, "grad_norm": 12.35802173614502, "learning_rate": 8.421807818721639e-07, "loss": 0.0229, "step": 847800 }, { "epoch": 8.34, "grad_norm": 1.1502281427383423, "learning_rate": 8.420566594179154e-07, "loss": 0.0412, "step": 847825 }, { "epoch": 8.34, "grad_norm": 17.231430053710938, "learning_rate": 8.419325369636669e-07, "loss": 0.0374, "step": 847850 }, { "epoch": 8.34, "grad_norm": 7.198194980621338, "learning_rate": 8.418084145094185e-07, "loss": 0.0412, "step": 847875 }, { "epoch": 8.34, "grad_norm": 100.30435180664062, "learning_rate": 8.4168429205517e-07, "loss": 0.0367, "step": 847900 }, { "epoch": 8.34, "grad_norm": 1.7382068634033203, "learning_rate": 8.415601696009215e-07, "loss": 0.0327, "step": 847925 }, { "epoch": 8.34, "grad_norm": 0.8824744820594788, "learning_rate": 8.414360471466732e-07, "loss": 0.0201, "step": 847950 }, { "epoch": 8.34, "grad_norm": 2.198727607727051, "learning_rate": 8.413119246924246e-07, "loss": 0.0315, "step": 847975 }, { "epoch": 8.34, "grad_norm": 14.144116401672363, "learning_rate": 8.411878022381761e-07, "loss": 0.0246, "step": 848000 }, { "epoch": 8.34, "grad_norm": 0.06082742661237717, "learning_rate": 8.410636797839277e-07, "loss": 0.0413, "step": 848025 }, { "epoch": 8.34, "grad_norm": 3.542008876800537, "learning_rate": 8.409395573296793e-07, "loss": 0.0218, "step": 848050 }, { "epoch": 8.34, "grad_norm": 8.377056121826172, "learning_rate": 8.408154348754307e-07, "loss": 0.036, "step": 848075 }, { "epoch": 8.34, "grad_norm": 9.882457733154297, "learning_rate": 8.406913124211822e-07, "loss": 0.0243, "step": 848100 }, { "epoch": 8.34, "grad_norm": 1.4389848709106445, "learning_rate": 8.405671899669339e-07, "loss": 0.0214, "step": 848125 }, { "epoch": 8.34, "grad_norm": 9.113860130310059, "learning_rate": 8.404430675126854e-07, "loss": 0.0363, "step": 848150 }, { "epoch": 8.34, "grad_norm": 0.27559757232666016, "learning_rate": 8.403189450584369e-07, "loss": 0.0263, "step": 848175 }, { "epoch": 8.34, "grad_norm": 21.908082962036133, "learning_rate": 8.401948226041886e-07, "loss": 0.0302, "step": 848200 }, { "epoch": 8.34, "grad_norm": 0.042295876890420914, "learning_rate": 8.4007070014994e-07, "loss": 0.0381, "step": 848225 }, { "epoch": 8.34, "grad_norm": 19.658510208129883, "learning_rate": 8.399465776956915e-07, "loss": 0.0372, "step": 848250 }, { "epoch": 8.34, "grad_norm": 0.013590112328529358, "learning_rate": 8.398224552414431e-07, "loss": 0.0256, "step": 848275 }, { "epoch": 8.34, "grad_norm": 3.7663440704345703, "learning_rate": 8.396983327871947e-07, "loss": 0.0337, "step": 848300 }, { "epoch": 8.34, "grad_norm": 0.004739583469927311, "learning_rate": 8.395742103329461e-07, "loss": 0.0225, "step": 848325 }, { "epoch": 8.34, "grad_norm": 8.010015487670898, "learning_rate": 8.394500878786976e-07, "loss": 0.027, "step": 848350 }, { "epoch": 8.34, "grad_norm": 10.087719917297363, "learning_rate": 8.393259654244493e-07, "loss": 0.0538, "step": 848375 }, { "epoch": 8.34, "grad_norm": 9.08912467956543, "learning_rate": 8.392018429702008e-07, "loss": 0.0195, "step": 848400 }, { "epoch": 8.34, "grad_norm": 0.2527499198913574, "learning_rate": 8.390777205159522e-07, "loss": 0.0328, "step": 848425 }, { "epoch": 8.34, "grad_norm": 8.839452743530273, "learning_rate": 8.389535980617038e-07, "loss": 0.0375, "step": 848450 }, { "epoch": 8.34, "grad_norm": 0.617818295955658, "learning_rate": 8.388294756074554e-07, "loss": 0.0386, "step": 848475 }, { "epoch": 8.34, "grad_norm": 4.661186695098877, "learning_rate": 8.387053531532069e-07, "loss": 0.0207, "step": 848500 }, { "epoch": 8.34, "grad_norm": 1.118201732635498, "learning_rate": 8.385812306989584e-07, "loss": 0.039, "step": 848525 }, { "epoch": 8.34, "grad_norm": 4.835550308227539, "learning_rate": 8.3845710824471e-07, "loss": 0.0195, "step": 848550 }, { "epoch": 8.34, "grad_norm": 5.063792705535889, "learning_rate": 8.383329857904615e-07, "loss": 0.0328, "step": 848575 }, { "epoch": 8.34, "grad_norm": 8.38809871673584, "learning_rate": 8.38208863336213e-07, "loss": 0.0259, "step": 848600 }, { "epoch": 8.34, "grad_norm": 0.004656031262129545, "learning_rate": 8.380847408819647e-07, "loss": 0.0278, "step": 848625 }, { "epoch": 8.34, "grad_norm": 7.376088619232178, "learning_rate": 8.379606184277161e-07, "loss": 0.0205, "step": 848650 }, { "epoch": 8.34, "grad_norm": 0.29294732213020325, "learning_rate": 8.378364959734676e-07, "loss": 0.0265, "step": 848675 }, { "epoch": 8.34, "grad_norm": 0.641227662563324, "learning_rate": 8.377123735192192e-07, "loss": 0.0366, "step": 848700 }, { "epoch": 8.34, "grad_norm": 0.0049239154905080795, "learning_rate": 8.375882510649708e-07, "loss": 0.0342, "step": 848725 }, { "epoch": 8.35, "grad_norm": 2.2581794261932373, "learning_rate": 8.374641286107222e-07, "loss": 0.0184, "step": 848750 }, { "epoch": 8.35, "grad_norm": 1.7253321409225464, "learning_rate": 8.373400061564737e-07, "loss": 0.0555, "step": 848775 }, { "epoch": 8.35, "grad_norm": 9.941662788391113, "learning_rate": 8.372158837022254e-07, "loss": 0.0322, "step": 848800 }, { "epoch": 8.35, "grad_norm": 0.017587842419743538, "learning_rate": 8.370917612479769e-07, "loss": 0.0422, "step": 848825 }, { "epoch": 8.35, "grad_norm": 9.113607406616211, "learning_rate": 8.369676387937284e-07, "loss": 0.024, "step": 848850 }, { "epoch": 8.35, "grad_norm": 0.08927850425243378, "learning_rate": 8.368435163394799e-07, "loss": 0.0314, "step": 848875 }, { "epoch": 8.35, "grad_norm": 6.594028472900391, "learning_rate": 8.367193938852315e-07, "loss": 0.0327, "step": 848900 }, { "epoch": 8.35, "grad_norm": 11.482194900512695, "learning_rate": 8.36595271430983e-07, "loss": 0.0324, "step": 848925 }, { "epoch": 8.35, "grad_norm": 3.4313442707061768, "learning_rate": 8.364711489767346e-07, "loss": 0.0343, "step": 848950 }, { "epoch": 8.35, "grad_norm": 0.15870508551597595, "learning_rate": 8.363470265224862e-07, "loss": 0.0256, "step": 848975 }, { "epoch": 8.35, "grad_norm": 23.154685974121094, "learning_rate": 8.362229040682376e-07, "loss": 0.04, "step": 849000 }, { "epoch": 8.35, "grad_norm": 0.010942598804831505, "learning_rate": 8.360987816139891e-07, "loss": 0.0362, "step": 849025 }, { "epoch": 8.35, "grad_norm": 22.128311157226562, "learning_rate": 8.359746591597408e-07, "loss": 0.0339, "step": 849050 }, { "epoch": 8.35, "grad_norm": 0.28669169545173645, "learning_rate": 8.358505367054923e-07, "loss": 0.0394, "step": 849075 }, { "epoch": 8.35, "grad_norm": 28.822284698486328, "learning_rate": 8.357264142512437e-07, "loss": 0.0286, "step": 849100 }, { "epoch": 8.35, "grad_norm": 0.0012002167059108615, "learning_rate": 8.356022917969953e-07, "loss": 0.0297, "step": 849125 }, { "epoch": 8.35, "grad_norm": 9.355835914611816, "learning_rate": 8.354781693427469e-07, "loss": 0.0381, "step": 849150 }, { "epoch": 8.35, "grad_norm": 0.009713476523756981, "learning_rate": 8.353540468884984e-07, "loss": 0.0334, "step": 849175 }, { "epoch": 8.35, "grad_norm": 8.4810152053833, "learning_rate": 8.352299244342498e-07, "loss": 0.0317, "step": 849200 }, { "epoch": 8.35, "grad_norm": 0.41598939895629883, "learning_rate": 8.351058019800015e-07, "loss": 0.0263, "step": 849225 }, { "epoch": 8.35, "grad_norm": 11.993396759033203, "learning_rate": 8.34981679525753e-07, "loss": 0.0218, "step": 849250 }, { "epoch": 8.35, "grad_norm": 0.04564741998910904, "learning_rate": 8.348575570715045e-07, "loss": 0.0317, "step": 849275 }, { "epoch": 8.35, "grad_norm": 25.417665481567383, "learning_rate": 8.34733434617256e-07, "loss": 0.0433, "step": 849300 }, { "epoch": 8.35, "grad_norm": 6.43060827255249, "learning_rate": 8.346093121630076e-07, "loss": 0.045, "step": 849325 }, { "epoch": 8.35, "grad_norm": 19.305679321289062, "learning_rate": 8.344851897087591e-07, "loss": 0.0282, "step": 849350 }, { "epoch": 8.35, "grad_norm": 5.712786674499512, "learning_rate": 8.343610672545107e-07, "loss": 0.0516, "step": 849375 }, { "epoch": 8.35, "grad_norm": 10.00983715057373, "learning_rate": 8.342369448002623e-07, "loss": 0.0303, "step": 849400 }, { "epoch": 8.35, "grad_norm": 0.02402978204190731, "learning_rate": 8.341128223460137e-07, "loss": 0.0391, "step": 849425 }, { "epoch": 8.35, "grad_norm": 1.5397214889526367, "learning_rate": 8.339886998917652e-07, "loss": 0.0226, "step": 849450 }, { "epoch": 8.35, "grad_norm": 0.2558465003967285, "learning_rate": 8.338645774375169e-07, "loss": 0.0378, "step": 849475 }, { "epoch": 8.35, "grad_norm": 5.580356121063232, "learning_rate": 8.337404549832684e-07, "loss": 0.0249, "step": 849500 }, { "epoch": 8.35, "grad_norm": 0.16005846858024597, "learning_rate": 8.336163325290198e-07, "loss": 0.0216, "step": 849525 }, { "epoch": 8.35, "grad_norm": 12.229217529296875, "learning_rate": 8.334922100747714e-07, "loss": 0.0194, "step": 849550 }, { "epoch": 8.35, "grad_norm": 0.07874846458435059, "learning_rate": 8.33368087620523e-07, "loss": 0.0225, "step": 849575 }, { "epoch": 8.35, "grad_norm": 6.8169379234313965, "learning_rate": 8.332439651662745e-07, "loss": 0.0151, "step": 849600 }, { "epoch": 8.35, "grad_norm": 0.08560722321271896, "learning_rate": 8.331198427120261e-07, "loss": 0.0333, "step": 849625 }, { "epoch": 8.35, "grad_norm": 5.229257106781006, "learning_rate": 8.329957202577777e-07, "loss": 0.0159, "step": 849650 }, { "epoch": 8.35, "grad_norm": 0.011124445125460625, "learning_rate": 8.328765627016991e-07, "loss": 0.0305, "step": 849675 }, { "epoch": 8.35, "grad_norm": 4.969062805175781, "learning_rate": 8.327524402474506e-07, "loss": 0.0191, "step": 849700 }, { "epoch": 8.35, "grad_norm": 2.400287628173828, "learning_rate": 8.326283177932021e-07, "loss": 0.0296, "step": 849725 }, { "epoch": 8.35, "grad_norm": 1.645547866821289, "learning_rate": 8.325041953389537e-07, "loss": 0.0364, "step": 849750 }, { "epoch": 8.36, "grad_norm": 0.30457696318626404, "learning_rate": 8.323800728847053e-07, "loss": 0.0479, "step": 849775 }, { "epoch": 8.36, "grad_norm": 3.1245062351226807, "learning_rate": 8.322559504304567e-07, "loss": 0.0313, "step": 849800 }, { "epoch": 8.36, "grad_norm": 0.2339906543493271, "learning_rate": 8.321318279762083e-07, "loss": 0.0399, "step": 849825 }, { "epoch": 8.36, "grad_norm": 21.39556884765625, "learning_rate": 8.320077055219598e-07, "loss": 0.0268, "step": 849850 }, { "epoch": 8.36, "grad_norm": 13.455001831054688, "learning_rate": 8.318835830677114e-07, "loss": 0.0315, "step": 849875 }, { "epoch": 8.36, "grad_norm": 2.693834066390991, "learning_rate": 8.317594606134628e-07, "loss": 0.0388, "step": 849900 }, { "epoch": 8.36, "grad_norm": 4.621920108795166, "learning_rate": 8.316353381592144e-07, "loss": 0.0352, "step": 849925 }, { "epoch": 8.36, "grad_norm": 5.649805545806885, "learning_rate": 8.31511215704966e-07, "loss": 0.0309, "step": 849950 }, { "epoch": 8.36, "grad_norm": 3.4872448444366455, "learning_rate": 8.313870932507175e-07, "loss": 0.0456, "step": 849975 }, { "epoch": 8.36, "grad_norm": 0.9530370831489563, "learning_rate": 8.312629707964691e-07, "loss": 0.0299, "step": 850000 }, { "epoch": 8.36, "grad_norm": 0.2247350960969925, "learning_rate": 8.311388483422205e-07, "loss": 0.0463, "step": 850025 }, { "epoch": 8.36, "grad_norm": 6.835271835327148, "learning_rate": 8.310147258879721e-07, "loss": 0.0188, "step": 850050 }, { "epoch": 8.36, "grad_norm": 1.6859469413757324, "learning_rate": 8.308906034337236e-07, "loss": 0.0325, "step": 850075 }, { "epoch": 8.36, "grad_norm": 6.812278747558594, "learning_rate": 8.307664809794752e-07, "loss": 0.0191, "step": 850100 }, { "epoch": 8.36, "grad_norm": 0.710456907749176, "learning_rate": 8.306423585252267e-07, "loss": 0.0256, "step": 850125 }, { "epoch": 8.36, "grad_norm": 6.181243419647217, "learning_rate": 8.305182360709782e-07, "loss": 0.025, "step": 850150 }, { "epoch": 8.36, "grad_norm": 0.9133508801460266, "learning_rate": 8.303941136167298e-07, "loss": 0.0358, "step": 850175 }, { "epoch": 8.36, "grad_norm": 25.080989837646484, "learning_rate": 8.302699911624814e-07, "loss": 0.0437, "step": 850200 }, { "epoch": 8.36, "grad_norm": 4.680860996246338, "learning_rate": 8.301458687082328e-07, "loss": 0.0337, "step": 850225 }, { "epoch": 8.36, "grad_norm": 7.0861496925354, "learning_rate": 8.300217462539844e-07, "loss": 0.0238, "step": 850250 }, { "epoch": 8.36, "grad_norm": 0.018564296886324883, "learning_rate": 8.298976237997359e-07, "loss": 0.0546, "step": 850275 }, { "epoch": 8.36, "grad_norm": 9.657388687133789, "learning_rate": 8.297735013454875e-07, "loss": 0.0207, "step": 850300 }, { "epoch": 8.36, "grad_norm": 0.12743225693702698, "learning_rate": 8.296493788912389e-07, "loss": 0.0421, "step": 850325 }, { "epoch": 8.36, "grad_norm": 13.2809419631958, "learning_rate": 8.295252564369905e-07, "loss": 0.0367, "step": 850350 }, { "epoch": 8.36, "grad_norm": 1.5911967754364014, "learning_rate": 8.294011339827421e-07, "loss": 0.0403, "step": 850375 }, { "epoch": 8.36, "grad_norm": 8.112974166870117, "learning_rate": 8.292770115284936e-07, "loss": 0.0258, "step": 850400 }, { "epoch": 8.36, "grad_norm": 0.17657700181007385, "learning_rate": 8.291528890742452e-07, "loss": 0.0312, "step": 850425 }, { "epoch": 8.36, "grad_norm": 8.468900680541992, "learning_rate": 8.290287666199968e-07, "loss": 0.0261, "step": 850450 }, { "epoch": 8.36, "grad_norm": 1.5503350496292114, "learning_rate": 8.289046441657482e-07, "loss": 0.0497, "step": 850475 }, { "epoch": 8.36, "grad_norm": 4.384511470794678, "learning_rate": 8.287805217114997e-07, "loss": 0.019, "step": 850500 }, { "epoch": 8.36, "grad_norm": 0.18140381574630737, "learning_rate": 8.286563992572513e-07, "loss": 0.0369, "step": 850525 }, { "epoch": 8.36, "grad_norm": 13.406525611877441, "learning_rate": 8.285322768030029e-07, "loss": 0.0414, "step": 850550 }, { "epoch": 8.36, "grad_norm": 0.12047749757766724, "learning_rate": 8.284081543487543e-07, "loss": 0.0264, "step": 850575 }, { "epoch": 8.36, "grad_norm": 21.70551109313965, "learning_rate": 8.282840318945059e-07, "loss": 0.0227, "step": 850600 }, { "epoch": 8.36, "grad_norm": 0.10182452946901321, "learning_rate": 8.281599094402575e-07, "loss": 0.0262, "step": 850625 }, { "epoch": 8.36, "grad_norm": 10.64982795715332, "learning_rate": 8.28035786986009e-07, "loss": 0.0282, "step": 850650 }, { "epoch": 8.36, "grad_norm": 0.6848629117012024, "learning_rate": 8.279116645317606e-07, "loss": 0.0258, "step": 850675 }, { "epoch": 8.36, "grad_norm": 11.780978202819824, "learning_rate": 8.27787542077512e-07, "loss": 0.0283, "step": 850700 }, { "epoch": 8.36, "grad_norm": 0.08300494402647018, "learning_rate": 8.276634196232636e-07, "loss": 0.0319, "step": 850725 }, { "epoch": 8.36, "grad_norm": 2.96494197845459, "learning_rate": 8.275392971690151e-07, "loss": 0.0267, "step": 850750 }, { "epoch": 8.36, "grad_norm": 0.0210258886218071, "learning_rate": 8.274151747147667e-07, "loss": 0.0398, "step": 850775 }, { "epoch": 8.37, "grad_norm": 8.99565315246582, "learning_rate": 8.272910522605182e-07, "loss": 0.0192, "step": 850800 }, { "epoch": 8.37, "grad_norm": 0.023377280682325363, "learning_rate": 8.271669298062697e-07, "loss": 0.0377, "step": 850825 }, { "epoch": 8.37, "grad_norm": 11.963831901550293, "learning_rate": 8.270428073520213e-07, "loss": 0.0293, "step": 850850 }, { "epoch": 8.37, "grad_norm": 2.964228630065918, "learning_rate": 8.269186848977729e-07, "loss": 0.033, "step": 850875 }, { "epoch": 8.37, "grad_norm": 10.239580154418945, "learning_rate": 8.267945624435243e-07, "loss": 0.028, "step": 850900 }, { "epoch": 8.37, "grad_norm": 5.298896789550781, "learning_rate": 8.266704399892758e-07, "loss": 0.0281, "step": 850925 }, { "epoch": 8.37, "grad_norm": 12.00960922241211, "learning_rate": 8.265463175350274e-07, "loss": 0.0268, "step": 850950 }, { "epoch": 8.37, "grad_norm": 0.14962176978588104, "learning_rate": 8.26422195080779e-07, "loss": 0.0246, "step": 850975 }, { "epoch": 8.37, "grad_norm": 15.548574447631836, "learning_rate": 8.262980726265304e-07, "loss": 0.0201, "step": 851000 }, { "epoch": 8.37, "grad_norm": 0.26293033361434937, "learning_rate": 8.26173950172282e-07, "loss": 0.0318, "step": 851025 }, { "epoch": 8.37, "grad_norm": 8.32166862487793, "learning_rate": 8.260498277180336e-07, "loss": 0.0525, "step": 851050 }, { "epoch": 8.37, "grad_norm": 5.907861709594727, "learning_rate": 8.259257052637851e-07, "loss": 0.038, "step": 851075 }, { "epoch": 8.37, "grad_norm": 6.42479944229126, "learning_rate": 8.258015828095367e-07, "loss": 0.0229, "step": 851100 }, { "epoch": 8.37, "grad_norm": 0.6802264451980591, "learning_rate": 8.256774603552883e-07, "loss": 0.036, "step": 851125 }, { "epoch": 8.37, "grad_norm": 0.09983200579881668, "learning_rate": 8.255533379010397e-07, "loss": 0.0261, "step": 851150 }, { "epoch": 8.37, "grad_norm": 0.0013906522653996944, "learning_rate": 8.254292154467912e-07, "loss": 0.0309, "step": 851175 }, { "epoch": 8.37, "grad_norm": 6.718217849731445, "learning_rate": 8.253050929925428e-07, "loss": 0.018, "step": 851200 }, { "epoch": 8.37, "grad_norm": 9.890637397766113, "learning_rate": 8.251809705382944e-07, "loss": 0.0269, "step": 851225 }, { "epoch": 8.37, "grad_norm": 12.596500396728516, "learning_rate": 8.250568480840458e-07, "loss": 0.0476, "step": 851250 }, { "epoch": 8.37, "grad_norm": 0.009908322244882584, "learning_rate": 8.249327256297974e-07, "loss": 0.0227, "step": 851275 }, { "epoch": 8.37, "grad_norm": 17.852767944335938, "learning_rate": 8.24808603175549e-07, "loss": 0.0299, "step": 851300 }, { "epoch": 8.37, "grad_norm": 0.4028880000114441, "learning_rate": 8.246844807213005e-07, "loss": 0.0385, "step": 851325 }, { "epoch": 8.37, "grad_norm": 5.7328925132751465, "learning_rate": 8.245603582670519e-07, "loss": 0.0201, "step": 851350 }, { "epoch": 8.37, "grad_norm": 1.158220887184143, "learning_rate": 8.244362358128035e-07, "loss": 0.0547, "step": 851375 }, { "epoch": 8.37, "grad_norm": 4.820863246917725, "learning_rate": 8.243121133585551e-07, "loss": 0.0221, "step": 851400 }, { "epoch": 8.37, "grad_norm": 0.9961459636688232, "learning_rate": 8.241879909043066e-07, "loss": 0.0389, "step": 851425 }, { "epoch": 8.37, "grad_norm": 10.95628547668457, "learning_rate": 8.240638684500582e-07, "loss": 0.0383, "step": 851450 }, { "epoch": 8.37, "grad_norm": 0.15728618204593658, "learning_rate": 8.239397459958097e-07, "loss": 0.0303, "step": 851475 }, { "epoch": 8.37, "grad_norm": 11.278573989868164, "learning_rate": 8.238156235415612e-07, "loss": 0.0313, "step": 851500 }, { "epoch": 8.37, "grad_norm": 1.5495100021362305, "learning_rate": 8.236915010873128e-07, "loss": 0.0421, "step": 851525 }, { "epoch": 8.37, "grad_norm": 4.235356330871582, "learning_rate": 8.235673786330644e-07, "loss": 0.0138, "step": 851550 }, { "epoch": 8.37, "grad_norm": 0.5110828280448914, "learning_rate": 8.234432561788158e-07, "loss": 0.0389, "step": 851575 }, { "epoch": 8.37, "grad_norm": 12.460358619689941, "learning_rate": 8.233191337245673e-07, "loss": 0.0414, "step": 851600 }, { "epoch": 8.37, "grad_norm": 0.24164853990077972, "learning_rate": 8.231950112703189e-07, "loss": 0.0229, "step": 851625 }, { "epoch": 8.37, "grad_norm": 8.664180755615234, "learning_rate": 8.230708888160705e-07, "loss": 0.0346, "step": 851650 }, { "epoch": 8.37, "grad_norm": 0.31281009316444397, "learning_rate": 8.229467663618219e-07, "loss": 0.033, "step": 851675 }, { "epoch": 8.37, "grad_norm": 20.659194946289062, "learning_rate": 8.228226439075735e-07, "loss": 0.0308, "step": 851700 }, { "epoch": 8.37, "grad_norm": 0.10508521646261215, "learning_rate": 8.226985214533251e-07, "loss": 0.0232, "step": 851725 }, { "epoch": 8.37, "grad_norm": 10.936830520629883, "learning_rate": 8.225743989990766e-07, "loss": 0.034, "step": 851750 }, { "epoch": 8.37, "grad_norm": 1.2273046970367432, "learning_rate": 8.224502765448282e-07, "loss": 0.0294, "step": 851775 }, { "epoch": 8.38, "grad_norm": 12.727734565734863, "learning_rate": 8.223261540905797e-07, "loss": 0.025, "step": 851800 }, { "epoch": 8.38, "grad_norm": 0.02382247895002365, "learning_rate": 8.222020316363312e-07, "loss": 0.0202, "step": 851825 }, { "epoch": 8.38, "grad_norm": 14.725092887878418, "learning_rate": 8.220779091820827e-07, "loss": 0.0204, "step": 851850 }, { "epoch": 8.38, "grad_norm": 0.049640003591775894, "learning_rate": 8.219537867278343e-07, "loss": 0.022, "step": 851875 }, { "epoch": 8.38, "grad_norm": 10.418488502502441, "learning_rate": 8.218296642735859e-07, "loss": 0.0272, "step": 851900 }, { "epoch": 8.38, "grad_norm": 0.11020762473344803, "learning_rate": 8.217055418193373e-07, "loss": 0.023, "step": 851925 }, { "epoch": 8.38, "grad_norm": 7.737004280090332, "learning_rate": 8.215814193650889e-07, "loss": 0.0176, "step": 851950 }, { "epoch": 8.38, "grad_norm": 0.07801507413387299, "learning_rate": 8.214572969108405e-07, "loss": 0.0276, "step": 851975 }, { "epoch": 8.38, "grad_norm": 6.062646389007568, "learning_rate": 8.21333174456592e-07, "loss": 0.0207, "step": 852000 }, { "epoch": 8.38, "grad_norm": 0.06930642575025558, "learning_rate": 8.212090520023434e-07, "loss": 0.0282, "step": 852025 }, { "epoch": 8.38, "grad_norm": 15.593889236450195, "learning_rate": 8.21084929548095e-07, "loss": 0.0387, "step": 852050 }, { "epoch": 8.38, "grad_norm": 0.4727949798107147, "learning_rate": 8.209608070938466e-07, "loss": 0.0348, "step": 852075 }, { "epoch": 8.38, "grad_norm": 9.333222389221191, "learning_rate": 8.208366846395981e-07, "loss": 0.04, "step": 852100 }, { "epoch": 8.38, "grad_norm": 0.21327845752239227, "learning_rate": 8.207125621853497e-07, "loss": 0.0357, "step": 852125 }, { "epoch": 8.38, "grad_norm": 6.742640495300293, "learning_rate": 8.205884397311012e-07, "loss": 0.0402, "step": 852150 }, { "epoch": 8.38, "grad_norm": 2.7371044158935547, "learning_rate": 8.204643172768527e-07, "loss": 0.0292, "step": 852175 }, { "epoch": 8.38, "grad_norm": 6.753647327423096, "learning_rate": 8.203401948226043e-07, "loss": 0.0317, "step": 852200 }, { "epoch": 8.38, "grad_norm": 0.7700415849685669, "learning_rate": 8.202160723683558e-07, "loss": 0.0462, "step": 852225 }, { "epoch": 8.38, "grad_norm": 3.2125091552734375, "learning_rate": 8.200919499141073e-07, "loss": 0.0277, "step": 852250 }, { "epoch": 8.38, "grad_norm": 1.1449750661849976, "learning_rate": 8.199678274598588e-07, "loss": 0.0346, "step": 852275 }, { "epoch": 8.38, "grad_norm": 13.548816680908203, "learning_rate": 8.198437050056104e-07, "loss": 0.023, "step": 852300 }, { "epoch": 8.38, "grad_norm": 7.062907695770264, "learning_rate": 8.19719582551362e-07, "loss": 0.0446, "step": 852325 }, { "epoch": 8.38, "grad_norm": 1.9715877771377563, "learning_rate": 8.195954600971134e-07, "loss": 0.0296, "step": 852350 }, { "epoch": 8.38, "grad_norm": 0.35150671005249023, "learning_rate": 8.19471337642865e-07, "loss": 0.0279, "step": 852375 }, { "epoch": 8.38, "grad_norm": 22.3641414642334, "learning_rate": 8.193472151886166e-07, "loss": 0.0495, "step": 852400 }, { "epoch": 8.38, "grad_norm": 0.09137487411499023, "learning_rate": 8.192230927343681e-07, "loss": 0.0291, "step": 852425 }, { "epoch": 8.38, "grad_norm": 0.5689426064491272, "learning_rate": 8.190989702801196e-07, "loss": 0.0297, "step": 852450 }, { "epoch": 8.38, "grad_norm": 0.011899229139089584, "learning_rate": 8.189748478258712e-07, "loss": 0.0295, "step": 852475 }, { "epoch": 8.38, "grad_norm": 1.0559449195861816, "learning_rate": 8.188507253716227e-07, "loss": 0.0326, "step": 852500 }, { "epoch": 8.38, "grad_norm": 3.330745220184326, "learning_rate": 8.187266029173742e-07, "loss": 0.0254, "step": 852525 }, { "epoch": 8.38, "grad_norm": 8.737738609313965, "learning_rate": 8.186024804631258e-07, "loss": 0.0344, "step": 852550 }, { "epoch": 8.38, "grad_norm": 1.6857471466064453, "learning_rate": 8.184783580088774e-07, "loss": 0.0361, "step": 852575 }, { "epoch": 8.38, "grad_norm": 5.868519306182861, "learning_rate": 8.183542355546288e-07, "loss": 0.0268, "step": 852600 }, { "epoch": 8.38, "grad_norm": 0.10380522161722183, "learning_rate": 8.182301131003804e-07, "loss": 0.0284, "step": 852625 }, { "epoch": 8.38, "grad_norm": 12.335504531860352, "learning_rate": 8.18105990646132e-07, "loss": 0.0229, "step": 852650 }, { "epoch": 8.38, "grad_norm": 4.890358924865723, "learning_rate": 8.179868330900534e-07, "loss": 0.0511, "step": 852675 }, { "epoch": 8.38, "grad_norm": 4.1930623054504395, "learning_rate": 8.178627106358049e-07, "loss": 0.0392, "step": 852700 }, { "epoch": 8.38, "grad_norm": 1.4765658378601074, "learning_rate": 8.177385881815566e-07, "loss": 0.0416, "step": 852725 }, { "epoch": 8.38, "grad_norm": 4.81295919418335, "learning_rate": 8.17614465727308e-07, "loss": 0.0298, "step": 852750 }, { "epoch": 8.38, "grad_norm": 0.005507873371243477, "learning_rate": 8.174903432730595e-07, "loss": 0.0293, "step": 852775 }, { "epoch": 8.38, "grad_norm": 5.411227226257324, "learning_rate": 8.17366220818811e-07, "loss": 0.0292, "step": 852800 }, { "epoch": 8.39, "grad_norm": 2.2253055572509766, "learning_rate": 8.172420983645627e-07, "loss": 0.0294, "step": 852825 }, { "epoch": 8.39, "grad_norm": 11.29321575164795, "learning_rate": 8.171179759103141e-07, "loss": 0.0243, "step": 852850 }, { "epoch": 8.39, "grad_norm": 0.01881948672235012, "learning_rate": 8.169938534560656e-07, "loss": 0.0376, "step": 852875 }, { "epoch": 8.39, "grad_norm": 13.583832740783691, "learning_rate": 8.168697310018173e-07, "loss": 0.0275, "step": 852900 }, { "epoch": 8.39, "grad_norm": 0.13852424919605255, "learning_rate": 8.167456085475688e-07, "loss": 0.0333, "step": 852925 }, { "epoch": 8.39, "grad_norm": 8.415173530578613, "learning_rate": 8.166214860933202e-07, "loss": 0.0175, "step": 852950 }, { "epoch": 8.39, "grad_norm": 0.03411718085408211, "learning_rate": 8.164973636390719e-07, "loss": 0.0347, "step": 852975 }, { "epoch": 8.39, "grad_norm": 18.930370330810547, "learning_rate": 8.163732411848234e-07, "loss": 0.03, "step": 853000 }, { "epoch": 8.39, "grad_norm": 1.6845539808273315, "learning_rate": 8.162491187305749e-07, "loss": 0.0495, "step": 853025 }, { "epoch": 8.39, "grad_norm": 8.099469184875488, "learning_rate": 8.161249962763263e-07, "loss": 0.0337, "step": 853050 }, { "epoch": 8.39, "grad_norm": 21.42417335510254, "learning_rate": 8.16000873822078e-07, "loss": 0.0446, "step": 853075 }, { "epoch": 8.39, "grad_norm": 4.579428672790527, "learning_rate": 8.158767513678295e-07, "loss": 0.0202, "step": 853100 }, { "epoch": 8.39, "grad_norm": 1.0836974382400513, "learning_rate": 8.15752628913581e-07, "loss": 0.0296, "step": 853125 }, { "epoch": 8.39, "grad_norm": 7.86398983001709, "learning_rate": 8.156285064593327e-07, "loss": 0.0241, "step": 853150 }, { "epoch": 8.39, "grad_norm": 1.3190680742263794, "learning_rate": 8.155043840050841e-07, "loss": 0.0376, "step": 853175 }, { "epoch": 8.39, "grad_norm": 5.16089391708374, "learning_rate": 8.153802615508356e-07, "loss": 0.037, "step": 853200 }, { "epoch": 8.39, "grad_norm": 0.12431196123361588, "learning_rate": 8.152561390965871e-07, "loss": 0.0337, "step": 853225 }, { "epoch": 8.39, "grad_norm": 17.70264434814453, "learning_rate": 8.151320166423388e-07, "loss": 0.0231, "step": 853250 }, { "epoch": 8.39, "grad_norm": 0.2418072670698166, "learning_rate": 8.150078941880903e-07, "loss": 0.04, "step": 853275 }, { "epoch": 8.39, "grad_norm": 27.66493797302246, "learning_rate": 8.148837717338417e-07, "loss": 0.0268, "step": 853300 }, { "epoch": 8.39, "grad_norm": 0.059118885546922684, "learning_rate": 8.147596492795934e-07, "loss": 0.0476, "step": 853325 }, { "epoch": 8.39, "grad_norm": 6.693316459655762, "learning_rate": 8.146355268253449e-07, "loss": 0.0353, "step": 853350 }, { "epoch": 8.39, "grad_norm": 1.9065676927566528, "learning_rate": 8.145114043710964e-07, "loss": 0.0313, "step": 853375 }, { "epoch": 8.39, "grad_norm": 7.945028781890869, "learning_rate": 8.143872819168481e-07, "loss": 0.0403, "step": 853400 }, { "epoch": 8.39, "grad_norm": 0.19350293278694153, "learning_rate": 8.142631594625995e-07, "loss": 0.0404, "step": 853425 }, { "epoch": 8.39, "grad_norm": 19.484468460083008, "learning_rate": 8.14139037008351e-07, "loss": 0.0213, "step": 853450 }, { "epoch": 8.39, "grad_norm": 0.06134352833032608, "learning_rate": 8.140149145541025e-07, "loss": 0.0245, "step": 853475 }, { "epoch": 8.39, "grad_norm": 8.60737419128418, "learning_rate": 8.138907920998542e-07, "loss": 0.0173, "step": 853500 }, { "epoch": 8.39, "grad_norm": 3.3509762287139893, "learning_rate": 8.137666696456056e-07, "loss": 0.0493, "step": 853525 }, { "epoch": 8.39, "grad_norm": 6.111215114593506, "learning_rate": 8.136425471913571e-07, "loss": 0.033, "step": 853550 }, { "epoch": 8.39, "grad_norm": 0.042500417679548264, "learning_rate": 8.135184247371088e-07, "loss": 0.0317, "step": 853575 }, { "epoch": 8.39, "grad_norm": 4.20973014831543, "learning_rate": 8.133943022828603e-07, "loss": 0.0267, "step": 853600 }, { "epoch": 8.39, "grad_norm": 1.6082364320755005, "learning_rate": 8.132701798286117e-07, "loss": 0.0425, "step": 853625 }, { "epoch": 8.39, "grad_norm": 13.63044548034668, "learning_rate": 8.131460573743632e-07, "loss": 0.0223, "step": 853650 }, { "epoch": 8.39, "grad_norm": 0.11096855252981186, "learning_rate": 8.130219349201149e-07, "loss": 0.0393, "step": 853675 }, { "epoch": 8.39, "grad_norm": 5.299347400665283, "learning_rate": 8.128978124658664e-07, "loss": 0.0204, "step": 853700 }, { "epoch": 8.39, "grad_norm": 0.5392539501190186, "learning_rate": 8.127736900116178e-07, "loss": 0.0372, "step": 853725 }, { "epoch": 8.39, "grad_norm": 4.419524192810059, "learning_rate": 8.126495675573695e-07, "loss": 0.0305, "step": 853750 }, { "epoch": 8.39, "grad_norm": 0.38744205236434937, "learning_rate": 8.12525445103121e-07, "loss": 0.0475, "step": 853775 }, { "epoch": 8.39, "grad_norm": 21.40479850769043, "learning_rate": 8.124013226488725e-07, "loss": 0.0224, "step": 853800 }, { "epoch": 8.39, "grad_norm": 0.9230984449386597, "learning_rate": 8.122772001946242e-07, "loss": 0.0384, "step": 853825 }, { "epoch": 8.4, "grad_norm": 26.665742874145508, "learning_rate": 8.121530777403756e-07, "loss": 0.0372, "step": 853850 }, { "epoch": 8.4, "grad_norm": 1.3506237268447876, "learning_rate": 8.120289552861271e-07, "loss": 0.0253, "step": 853875 }, { "epoch": 8.4, "grad_norm": 13.788954734802246, "learning_rate": 8.119048328318786e-07, "loss": 0.0283, "step": 853900 }, { "epoch": 8.4, "grad_norm": 0.0037682901602238417, "learning_rate": 8.117807103776303e-07, "loss": 0.0387, "step": 853925 }, { "epoch": 8.4, "grad_norm": 1.6363322734832764, "learning_rate": 8.116565879233818e-07, "loss": 0.0278, "step": 853950 }, { "epoch": 8.4, "grad_norm": 0.026627464219927788, "learning_rate": 8.115324654691332e-07, "loss": 0.0386, "step": 853975 }, { "epoch": 8.4, "grad_norm": 0.8648442029953003, "learning_rate": 8.114083430148849e-07, "loss": 0.0269, "step": 854000 }, { "epoch": 8.4, "grad_norm": 2.6920924186706543, "learning_rate": 8.112842205606364e-07, "loss": 0.0303, "step": 854025 }, { "epoch": 8.4, "grad_norm": 9.156692504882812, "learning_rate": 8.111600981063879e-07, "loss": 0.0197, "step": 854050 }, { "epoch": 8.4, "grad_norm": 0.47775784134864807, "learning_rate": 8.110359756521393e-07, "loss": 0.0325, "step": 854075 }, { "epoch": 8.4, "grad_norm": 0.2066890001296997, "learning_rate": 8.10911853197891e-07, "loss": 0.0179, "step": 854100 }, { "epoch": 8.4, "grad_norm": 0.10381706804037094, "learning_rate": 8.107877307436425e-07, "loss": 0.0582, "step": 854125 }, { "epoch": 8.4, "grad_norm": 3.7484209537506104, "learning_rate": 8.10663608289394e-07, "loss": 0.0317, "step": 854150 }, { "epoch": 8.4, "grad_norm": 0.055568140000104904, "learning_rate": 8.105394858351457e-07, "loss": 0.0435, "step": 854175 }, { "epoch": 8.4, "grad_norm": 7.8010077476501465, "learning_rate": 8.104153633808971e-07, "loss": 0.0243, "step": 854200 }, { "epoch": 8.4, "grad_norm": 0.026435844600200653, "learning_rate": 8.102912409266486e-07, "loss": 0.0385, "step": 854225 }, { "epoch": 8.4, "grad_norm": 21.728750228881836, "learning_rate": 8.101671184724003e-07, "loss": 0.0333, "step": 854250 }, { "epoch": 8.4, "grad_norm": 3.773888111114502, "learning_rate": 8.100429960181518e-07, "loss": 0.0327, "step": 854275 }, { "epoch": 8.4, "grad_norm": 1.7933138608932495, "learning_rate": 8.099188735639032e-07, "loss": 0.0172, "step": 854300 }, { "epoch": 8.4, "grad_norm": 0.06621820479631424, "learning_rate": 8.097947511096547e-07, "loss": 0.0447, "step": 854325 }, { "epoch": 8.4, "grad_norm": 12.229044914245605, "learning_rate": 8.096706286554064e-07, "loss": 0.0275, "step": 854350 }, { "epoch": 8.4, "grad_norm": 4.281561374664307, "learning_rate": 8.095465062011579e-07, "loss": 0.0336, "step": 854375 }, { "epoch": 8.4, "grad_norm": 10.431800842285156, "learning_rate": 8.094223837469093e-07, "loss": 0.0281, "step": 854400 }, { "epoch": 8.4, "grad_norm": 0.2628931701183319, "learning_rate": 8.09298261292661e-07, "loss": 0.0348, "step": 854425 }, { "epoch": 8.4, "grad_norm": 12.050881385803223, "learning_rate": 8.091741388384125e-07, "loss": 0.0359, "step": 854450 }, { "epoch": 8.4, "grad_norm": 0.7053664326667786, "learning_rate": 8.09050016384164e-07, "loss": 0.0418, "step": 854475 }, { "epoch": 8.4, "grad_norm": 12.701065063476562, "learning_rate": 8.089258939299154e-07, "loss": 0.0302, "step": 854500 }, { "epoch": 8.4, "grad_norm": 0.14519184827804565, "learning_rate": 8.088017714756671e-07, "loss": 0.0354, "step": 854525 }, { "epoch": 8.4, "grad_norm": 11.469609260559082, "learning_rate": 8.086776490214186e-07, "loss": 0.0323, "step": 854550 }, { "epoch": 8.4, "grad_norm": 0.6406120657920837, "learning_rate": 8.085535265671701e-07, "loss": 0.0283, "step": 854575 }, { "epoch": 8.4, "grad_norm": 13.992619514465332, "learning_rate": 8.084294041129218e-07, "loss": 0.0577, "step": 854600 }, { "epoch": 8.4, "grad_norm": 0.4109087288379669, "learning_rate": 8.083052816586732e-07, "loss": 0.0533, "step": 854625 }, { "epoch": 8.4, "grad_norm": 11.728551864624023, "learning_rate": 8.081811592044247e-07, "loss": 0.0289, "step": 854650 }, { "epoch": 8.4, "grad_norm": 0.4102484881877899, "learning_rate": 8.080570367501764e-07, "loss": 0.0226, "step": 854675 }, { "epoch": 8.4, "grad_norm": 2.507248640060425, "learning_rate": 8.079329142959279e-07, "loss": 0.0311, "step": 854700 }, { "epoch": 8.4, "grad_norm": 0.15374736487865448, "learning_rate": 8.078087918416794e-07, "loss": 0.0444, "step": 854725 }, { "epoch": 8.4, "grad_norm": 20.91575050354004, "learning_rate": 8.076846693874308e-07, "loss": 0.0514, "step": 854750 }, { "epoch": 8.4, "grad_norm": 0.011796810664236546, "learning_rate": 8.075605469331825e-07, "loss": 0.0485, "step": 854775 }, { "epoch": 8.4, "grad_norm": 16.370267868041992, "learning_rate": 8.07436424478934e-07, "loss": 0.0342, "step": 854800 }, { "epoch": 8.4, "grad_norm": 12.042562484741211, "learning_rate": 8.073123020246855e-07, "loss": 0.0503, "step": 854825 }, { "epoch": 8.41, "grad_norm": 20.335315704345703, "learning_rate": 8.071881795704372e-07, "loss": 0.0264, "step": 854850 }, { "epoch": 8.41, "grad_norm": 1.3936651945114136, "learning_rate": 8.070640571161886e-07, "loss": 0.0282, "step": 854875 }, { "epoch": 8.41, "grad_norm": 0.7190821766853333, "learning_rate": 8.069399346619401e-07, "loss": 0.0176, "step": 854900 }, { "epoch": 8.41, "grad_norm": 0.04530326649546623, "learning_rate": 8.068158122076917e-07, "loss": 0.0334, "step": 854925 }, { "epoch": 8.41, "grad_norm": 2.707216262817383, "learning_rate": 8.066916897534433e-07, "loss": 0.0282, "step": 854950 }, { "epoch": 8.41, "grad_norm": 0.05875645950436592, "learning_rate": 8.065675672991947e-07, "loss": 0.0367, "step": 854975 }, { "epoch": 8.41, "grad_norm": 16.596576690673828, "learning_rate": 8.064434448449462e-07, "loss": 0.0372, "step": 855000 }, { "epoch": 8.41, "grad_norm": 0.7117863893508911, "learning_rate": 8.063193223906979e-07, "loss": 0.0417, "step": 855025 }, { "epoch": 8.41, "grad_norm": 4.723476409912109, "learning_rate": 8.061951999364494e-07, "loss": 0.0258, "step": 855050 }, { "epoch": 8.41, "grad_norm": 0.5891786217689514, "learning_rate": 8.060710774822008e-07, "loss": 0.0258, "step": 855075 }, { "epoch": 8.41, "grad_norm": 3.2337398529052734, "learning_rate": 8.059469550279525e-07, "loss": 0.0147, "step": 855100 }, { "epoch": 8.41, "grad_norm": 0.027549637481570244, "learning_rate": 8.058277974718739e-07, "loss": 0.0374, "step": 855125 }, { "epoch": 8.41, "grad_norm": 19.999818801879883, "learning_rate": 8.057036750176254e-07, "loss": 0.0371, "step": 855150 }, { "epoch": 8.41, "grad_norm": 2.5945329666137695, "learning_rate": 8.05579552563377e-07, "loss": 0.0319, "step": 855175 }, { "epoch": 8.41, "grad_norm": 1.0385900735855103, "learning_rate": 8.054554301091286e-07, "loss": 0.0096, "step": 855200 }, { "epoch": 8.41, "grad_norm": 0.03453590348362923, "learning_rate": 8.0533130765488e-07, "loss": 0.0326, "step": 855225 }, { "epoch": 8.41, "grad_norm": 7.3191752433776855, "learning_rate": 8.052071852006316e-07, "loss": 0.0349, "step": 855250 }, { "epoch": 8.41, "grad_norm": 10.167186737060547, "learning_rate": 8.050830627463832e-07, "loss": 0.0435, "step": 855275 }, { "epoch": 8.41, "grad_norm": 1.4856464862823486, "learning_rate": 8.049589402921347e-07, "loss": 0.031, "step": 855300 }, { "epoch": 8.41, "grad_norm": 1.3070049285888672, "learning_rate": 8.048348178378861e-07, "loss": 0.037, "step": 855325 }, { "epoch": 8.41, "grad_norm": 2.293520212173462, "learning_rate": 8.047106953836377e-07, "loss": 0.0282, "step": 855350 }, { "epoch": 8.41, "grad_norm": 0.002905969973653555, "learning_rate": 8.045865729293893e-07, "loss": 0.0227, "step": 855375 }, { "epoch": 8.41, "grad_norm": 8.445296287536621, "learning_rate": 8.044624504751408e-07, "loss": 0.0265, "step": 855400 }, { "epoch": 8.41, "grad_norm": 2.1861743927001953, "learning_rate": 8.043383280208924e-07, "loss": 0.0255, "step": 855425 }, { "epoch": 8.41, "grad_norm": 10.907427787780762, "learning_rate": 8.042142055666439e-07, "loss": 0.0158, "step": 855450 }, { "epoch": 8.41, "grad_norm": 0.2001013159751892, "learning_rate": 8.040900831123954e-07, "loss": 0.0297, "step": 855475 }, { "epoch": 8.41, "grad_norm": 8.030364036560059, "learning_rate": 8.03965960658147e-07, "loss": 0.0543, "step": 855500 }, { "epoch": 8.41, "grad_norm": 0.04073556885123253, "learning_rate": 8.038418382038985e-07, "loss": 0.0328, "step": 855525 }, { "epoch": 8.41, "grad_norm": 0.7442592978477478, "learning_rate": 8.037177157496501e-07, "loss": 0.0336, "step": 855550 }, { "epoch": 8.41, "grad_norm": 0.008011708036065102, "learning_rate": 8.035935932954015e-07, "loss": 0.0368, "step": 855575 }, { "epoch": 8.41, "grad_norm": 3.308234214782715, "learning_rate": 8.034694708411531e-07, "loss": 0.0258, "step": 855600 }, { "epoch": 8.41, "grad_norm": 0.046129900962114334, "learning_rate": 8.033453483869047e-07, "loss": 0.0274, "step": 855625 }, { "epoch": 8.41, "grad_norm": 1.772174596786499, "learning_rate": 8.032212259326562e-07, "loss": 0.0212, "step": 855650 }, { "epoch": 8.41, "grad_norm": 3.251462459564209, "learning_rate": 8.030971034784077e-07, "loss": 0.0347, "step": 855675 }, { "epoch": 8.41, "grad_norm": 15.808440208435059, "learning_rate": 8.029729810241593e-07, "loss": 0.0351, "step": 855700 }, { "epoch": 8.41, "grad_norm": 0.08011915534734726, "learning_rate": 8.028488585699108e-07, "loss": 0.0441, "step": 855725 }, { "epoch": 8.41, "grad_norm": 7.834386348724365, "learning_rate": 8.027247361156624e-07, "loss": 0.0179, "step": 855750 }, { "epoch": 8.41, "grad_norm": 0.022392572835087776, "learning_rate": 8.026006136614138e-07, "loss": 0.0331, "step": 855775 }, { "epoch": 8.41, "grad_norm": 6.361346244812012, "learning_rate": 8.024764912071654e-07, "loss": 0.0252, "step": 855800 }, { "epoch": 8.41, "grad_norm": 0.34013038873672485, "learning_rate": 8.023523687529169e-07, "loss": 0.0254, "step": 855825 }, { "epoch": 8.41, "grad_norm": 11.222862243652344, "learning_rate": 8.022282462986685e-07, "loss": 0.0221, "step": 855850 }, { "epoch": 8.42, "grad_norm": 5.011178493499756, "learning_rate": 8.021041238444201e-07, "loss": 0.0363, "step": 855875 }, { "epoch": 8.42, "grad_norm": 14.59866714477539, "learning_rate": 8.019800013901715e-07, "loss": 0.0195, "step": 855900 }, { "epoch": 8.42, "grad_norm": 1.7059199810028076, "learning_rate": 8.018558789359231e-07, "loss": 0.0313, "step": 855925 }, { "epoch": 8.42, "grad_norm": 0.25893858075141907, "learning_rate": 8.017317564816746e-07, "loss": 0.0196, "step": 855950 }, { "epoch": 8.42, "grad_norm": 0.09052351117134094, "learning_rate": 8.016076340274262e-07, "loss": 0.0491, "step": 855975 }, { "epoch": 8.42, "grad_norm": 15.258813858032227, "learning_rate": 8.014835115731776e-07, "loss": 0.0444, "step": 856000 }, { "epoch": 8.42, "grad_norm": 2.5965933799743652, "learning_rate": 8.013593891189292e-07, "loss": 0.0255, "step": 856025 }, { "epoch": 8.42, "grad_norm": 13.585522651672363, "learning_rate": 8.012352666646808e-07, "loss": 0.04, "step": 856050 }, { "epoch": 8.42, "grad_norm": 7.9402313232421875, "learning_rate": 8.011111442104323e-07, "loss": 0.0308, "step": 856075 }, { "epoch": 8.42, "grad_norm": 8.875741004943848, "learning_rate": 8.009870217561838e-07, "loss": 0.0444, "step": 856100 }, { "epoch": 8.42, "grad_norm": 0.060113172978162766, "learning_rate": 8.008628993019354e-07, "loss": 0.036, "step": 856125 }, { "epoch": 8.42, "grad_norm": 4.843652725219727, "learning_rate": 8.007387768476869e-07, "loss": 0.0252, "step": 856150 }, { "epoch": 8.42, "grad_norm": 5.57909631729126, "learning_rate": 8.006146543934385e-07, "loss": 0.0339, "step": 856175 }, { "epoch": 8.42, "grad_norm": 27.628889083862305, "learning_rate": 8.0049053193919e-07, "loss": 0.0299, "step": 856200 }, { "epoch": 8.42, "grad_norm": 11.234132766723633, "learning_rate": 8.003664094849416e-07, "loss": 0.0305, "step": 856225 }, { "epoch": 8.42, "grad_norm": 16.054195404052734, "learning_rate": 8.00242287030693e-07, "loss": 0.0213, "step": 856250 }, { "epoch": 8.42, "grad_norm": 0.016203975304961205, "learning_rate": 8.001181645764446e-07, "loss": 0.0473, "step": 856275 }, { "epoch": 8.42, "grad_norm": 20.044042587280273, "learning_rate": 7.999940421221962e-07, "loss": 0.0344, "step": 856300 }, { "epoch": 8.42, "grad_norm": 1.7916531562805176, "learning_rate": 7.998699196679477e-07, "loss": 0.0271, "step": 856325 }, { "epoch": 8.42, "grad_norm": 6.092906475067139, "learning_rate": 7.997457972136992e-07, "loss": 0.0254, "step": 856350 }, { "epoch": 8.42, "grad_norm": 0.3219800889492035, "learning_rate": 7.996216747594507e-07, "loss": 0.0265, "step": 856375 }, { "epoch": 8.42, "grad_norm": 2.1080613136291504, "learning_rate": 7.994975523052023e-07, "loss": 0.0184, "step": 856400 }, { "epoch": 8.42, "grad_norm": 0.13701476156711578, "learning_rate": 7.993734298509539e-07, "loss": 0.0405, "step": 856425 }, { "epoch": 8.42, "grad_norm": 6.0987229347229, "learning_rate": 7.992493073967053e-07, "loss": 0.0295, "step": 856450 }, { "epoch": 8.42, "grad_norm": 0.0832635760307312, "learning_rate": 7.991251849424569e-07, "loss": 0.0398, "step": 856475 }, { "epoch": 8.42, "grad_norm": 3.299207925796509, "learning_rate": 7.990010624882084e-07, "loss": 0.0207, "step": 856500 }, { "epoch": 8.42, "grad_norm": 0.13327090442180634, "learning_rate": 7.9887694003396e-07, "loss": 0.0219, "step": 856525 }, { "epoch": 8.42, "grad_norm": 21.39993667602539, "learning_rate": 7.987528175797116e-07, "loss": 0.0306, "step": 856550 }, { "epoch": 8.42, "grad_norm": 0.02176262065768242, "learning_rate": 7.98628695125463e-07, "loss": 0.029, "step": 856575 }, { "epoch": 8.42, "grad_norm": 10.599849700927734, "learning_rate": 7.985045726712146e-07, "loss": 0.0371, "step": 856600 }, { "epoch": 8.42, "grad_norm": 0.4810192286968231, "learning_rate": 7.983804502169661e-07, "loss": 0.0363, "step": 856625 }, { "epoch": 8.42, "grad_norm": 2.7205426692962646, "learning_rate": 7.982563277627177e-07, "loss": 0.0162, "step": 856650 }, { "epoch": 8.42, "grad_norm": 0.304866224527359, "learning_rate": 7.981322053084691e-07, "loss": 0.0416, "step": 856675 }, { "epoch": 8.42, "grad_norm": 6.833945274353027, "learning_rate": 7.980080828542207e-07, "loss": 0.0302, "step": 856700 }, { "epoch": 8.42, "grad_norm": 0.04143937677145004, "learning_rate": 7.978839603999723e-07, "loss": 0.0186, "step": 856725 }, { "epoch": 8.42, "grad_norm": 5.026163578033447, "learning_rate": 7.977598379457238e-07, "loss": 0.0148, "step": 856750 }, { "epoch": 8.42, "grad_norm": 0.016718868166208267, "learning_rate": 7.976357154914753e-07, "loss": 0.0454, "step": 856775 }, { "epoch": 8.42, "grad_norm": 3.6224327087402344, "learning_rate": 7.975115930372268e-07, "loss": 0.0252, "step": 856800 }, { "epoch": 8.42, "grad_norm": 8.392702102661133, "learning_rate": 7.973874705829784e-07, "loss": 0.0394, "step": 856825 }, { "epoch": 8.42, "grad_norm": 11.524086952209473, "learning_rate": 7.9726334812873e-07, "loss": 0.0451, "step": 856850 }, { "epoch": 8.42, "grad_norm": 0.020662730559706688, "learning_rate": 7.971392256744815e-07, "loss": 0.0152, "step": 856875 }, { "epoch": 8.43, "grad_norm": 11.077412605285645, "learning_rate": 7.970151032202331e-07, "loss": 0.0175, "step": 856900 }, { "epoch": 8.43, "grad_norm": 0.06646659225225449, "learning_rate": 7.968909807659845e-07, "loss": 0.0384, "step": 856925 }, { "epoch": 8.43, "grad_norm": 7.749086856842041, "learning_rate": 7.967668583117361e-07, "loss": 0.0202, "step": 856950 }, { "epoch": 8.43, "grad_norm": 0.12655891478061676, "learning_rate": 7.966427358574877e-07, "loss": 0.034, "step": 856975 }, { "epoch": 8.43, "grad_norm": 9.681471824645996, "learning_rate": 7.965186134032392e-07, "loss": 0.013, "step": 857000 }, { "epoch": 8.43, "grad_norm": 7.456437587738037, "learning_rate": 7.963944909489907e-07, "loss": 0.0253, "step": 857025 }, { "epoch": 8.43, "grad_norm": 1.5046266317367554, "learning_rate": 7.962703684947422e-07, "loss": 0.037, "step": 857050 }, { "epoch": 8.43, "grad_norm": 0.00242127594538033, "learning_rate": 7.961462460404938e-07, "loss": 0.0226, "step": 857075 }, { "epoch": 8.43, "grad_norm": 0.566972553730011, "learning_rate": 7.960221235862453e-07, "loss": 0.0198, "step": 857100 }, { "epoch": 8.43, "grad_norm": 0.8075199127197266, "learning_rate": 7.958980011319968e-07, "loss": 0.0452, "step": 857125 }, { "epoch": 8.43, "grad_norm": 3.362881660461426, "learning_rate": 7.957738786777484e-07, "loss": 0.0233, "step": 857150 }, { "epoch": 8.43, "grad_norm": 0.01591566763818264, "learning_rate": 7.956497562234999e-07, "loss": 0.0478, "step": 857175 }, { "epoch": 8.43, "grad_norm": 16.502321243286133, "learning_rate": 7.955256337692515e-07, "loss": 0.0294, "step": 857200 }, { "epoch": 8.43, "grad_norm": 3.05967116355896, "learning_rate": 7.954015113150029e-07, "loss": 0.0207, "step": 857225 }, { "epoch": 8.43, "grad_norm": 2.5556716918945312, "learning_rate": 7.952773888607545e-07, "loss": 0.0413, "step": 857250 }, { "epoch": 8.43, "grad_norm": 0.0783236175775528, "learning_rate": 7.95153266406506e-07, "loss": 0.0245, "step": 857275 }, { "epoch": 8.43, "grad_norm": 28.277433395385742, "learning_rate": 7.950291439522576e-07, "loss": 0.0389, "step": 857300 }, { "epoch": 8.43, "grad_norm": 0.05362724885344505, "learning_rate": 7.949050214980092e-07, "loss": 0.037, "step": 857325 }, { "epoch": 8.43, "grad_norm": 1.2530372142791748, "learning_rate": 7.947808990437606e-07, "loss": 0.0244, "step": 857350 }, { "epoch": 8.43, "grad_norm": 0.844902515411377, "learning_rate": 7.946567765895122e-07, "loss": 0.0227, "step": 857375 }, { "epoch": 8.43, "grad_norm": 4.432713031768799, "learning_rate": 7.945326541352638e-07, "loss": 0.0282, "step": 857400 }, { "epoch": 8.43, "grad_norm": 6.729166030883789, "learning_rate": 7.944085316810153e-07, "loss": 0.0244, "step": 857425 }, { "epoch": 8.43, "grad_norm": 5.884064674377441, "learning_rate": 7.942844092267668e-07, "loss": 0.0126, "step": 857450 }, { "epoch": 8.43, "grad_norm": 0.2511707842350006, "learning_rate": 7.941602867725183e-07, "loss": 0.0425, "step": 857475 }, { "epoch": 8.43, "grad_norm": 32.30373764038086, "learning_rate": 7.940361643182699e-07, "loss": 0.0275, "step": 857500 }, { "epoch": 8.43, "grad_norm": 0.12305210530757904, "learning_rate": 7.939120418640214e-07, "loss": 0.0345, "step": 857525 }, { "epoch": 8.43, "grad_norm": 8.041130065917969, "learning_rate": 7.93787919409773e-07, "loss": 0.0423, "step": 857550 }, { "epoch": 8.43, "grad_norm": 0.0055529274977743626, "learning_rate": 7.936637969555246e-07, "loss": 0.0319, "step": 857575 }, { "epoch": 8.43, "grad_norm": 9.376130104064941, "learning_rate": 7.93539674501276e-07, "loss": 0.0274, "step": 857600 }, { "epoch": 8.43, "grad_norm": 0.8375656604766846, "learning_rate": 7.934155520470276e-07, "loss": 0.0382, "step": 857625 }, { "epoch": 8.43, "grad_norm": 11.905436515808105, "learning_rate": 7.932914295927791e-07, "loss": 0.0306, "step": 857650 }, { "epoch": 8.43, "grad_norm": 1.4263911247253418, "learning_rate": 7.931673071385307e-07, "loss": 0.0298, "step": 857675 }, { "epoch": 8.43, "grad_norm": 9.7365140914917, "learning_rate": 7.930431846842822e-07, "loss": 0.0275, "step": 857700 }, { "epoch": 8.43, "grad_norm": 0.06037956848740578, "learning_rate": 7.929190622300337e-07, "loss": 0.032, "step": 857725 }, { "epoch": 8.43, "grad_norm": 7.621938228607178, "learning_rate": 7.927949397757853e-07, "loss": 0.0252, "step": 857750 }, { "epoch": 8.43, "grad_norm": 0.14985089004039764, "learning_rate": 7.926708173215368e-07, "loss": 0.0401, "step": 857775 }, { "epoch": 8.43, "grad_norm": 0.3341760039329529, "learning_rate": 7.925466948672883e-07, "loss": 0.0224, "step": 857800 }, { "epoch": 8.43, "grad_norm": 2.3775832653045654, "learning_rate": 7.924225724130399e-07, "loss": 0.0336, "step": 857825 }, { "epoch": 8.43, "grad_norm": 11.916203498840332, "learning_rate": 7.922984499587914e-07, "loss": 0.0471, "step": 857850 }, { "epoch": 8.43, "grad_norm": 0.007412839215248823, "learning_rate": 7.92174327504543e-07, "loss": 0.0482, "step": 857875 }, { "epoch": 8.44, "grad_norm": 5.725809574127197, "learning_rate": 7.920502050502944e-07, "loss": 0.035, "step": 857900 }, { "epoch": 8.44, "grad_norm": 0.24711468815803528, "learning_rate": 7.91926082596046e-07, "loss": 0.0396, "step": 857925 }, { "epoch": 8.44, "grad_norm": 0.1643480658531189, "learning_rate": 7.918019601417975e-07, "loss": 0.0258, "step": 857950 }, { "epoch": 8.44, "grad_norm": 10.408573150634766, "learning_rate": 7.916778376875491e-07, "loss": 0.0353, "step": 857975 }, { "epoch": 8.44, "grad_norm": 0.31236350536346436, "learning_rate": 7.915537152333007e-07, "loss": 0.023, "step": 858000 }, { "epoch": 8.44, "grad_norm": 3.666027069091797, "learning_rate": 7.914295927790521e-07, "loss": 0.034, "step": 858025 }, { "epoch": 8.44, "grad_norm": 29.854169845581055, "learning_rate": 7.913054703248037e-07, "loss": 0.0227, "step": 858050 }, { "epoch": 8.44, "grad_norm": 1.5281914472579956, "learning_rate": 7.911813478705552e-07, "loss": 0.0659, "step": 858075 }, { "epoch": 8.44, "grad_norm": 2.4404096603393555, "learning_rate": 7.910572254163068e-07, "loss": 0.0268, "step": 858100 }, { "epoch": 8.44, "grad_norm": 1.8526424169540405, "learning_rate": 7.909331029620583e-07, "loss": 0.0351, "step": 858125 }, { "epoch": 8.44, "grad_norm": 6.7452006340026855, "learning_rate": 7.908089805078098e-07, "loss": 0.0154, "step": 858150 }, { "epoch": 8.44, "grad_norm": 1.4307582378387451, "learning_rate": 7.906848580535614e-07, "loss": 0.0299, "step": 858175 }, { "epoch": 8.44, "grad_norm": 18.813478469848633, "learning_rate": 7.905607355993129e-07, "loss": 0.0299, "step": 858200 }, { "epoch": 8.44, "grad_norm": 0.016503088176250458, "learning_rate": 7.904366131450645e-07, "loss": 0.0359, "step": 858225 }, { "epoch": 8.44, "grad_norm": 7.417838096618652, "learning_rate": 7.903124906908161e-07, "loss": 0.0328, "step": 858250 }, { "epoch": 8.44, "grad_norm": 6.954288482666016, "learning_rate": 7.901933331347375e-07, "loss": 0.0332, "step": 858275 }, { "epoch": 8.44, "grad_norm": 2.3717095851898193, "learning_rate": 7.90069210680489e-07, "loss": 0.0213, "step": 858300 }, { "epoch": 8.44, "grad_norm": 0.1067252904176712, "learning_rate": 7.899450882262405e-07, "loss": 0.023, "step": 858325 }, { "epoch": 8.44, "grad_norm": 5.703030586242676, "learning_rate": 7.898209657719921e-07, "loss": 0.0194, "step": 858350 }, { "epoch": 8.44, "grad_norm": 1.629201889038086, "learning_rate": 7.896968433177437e-07, "loss": 0.0338, "step": 858375 }, { "epoch": 8.44, "grad_norm": 14.783720970153809, "learning_rate": 7.895727208634951e-07, "loss": 0.0284, "step": 858400 }, { "epoch": 8.44, "grad_norm": 0.006651227828115225, "learning_rate": 7.894485984092467e-07, "loss": 0.0594, "step": 858425 }, { "epoch": 8.44, "grad_norm": 21.3419132232666, "learning_rate": 7.893244759549983e-07, "loss": 0.0341, "step": 858450 }, { "epoch": 8.44, "grad_norm": 0.05316012352705002, "learning_rate": 7.892003535007498e-07, "loss": 0.0353, "step": 858475 }, { "epoch": 8.44, "grad_norm": 1.166190266609192, "learning_rate": 7.890762310465012e-07, "loss": 0.0279, "step": 858500 }, { "epoch": 8.44, "grad_norm": 11.214052200317383, "learning_rate": 7.889521085922529e-07, "loss": 0.0578, "step": 858525 }, { "epoch": 8.44, "grad_norm": 7.637385368347168, "learning_rate": 7.888279861380044e-07, "loss": 0.0353, "step": 858550 }, { "epoch": 8.44, "grad_norm": 3.2556121349334717, "learning_rate": 7.887038636837559e-07, "loss": 0.0496, "step": 858575 }, { "epoch": 8.44, "grad_norm": 4.8551225662231445, "learning_rate": 7.885797412295075e-07, "loss": 0.0186, "step": 858600 }, { "epoch": 8.44, "grad_norm": 0.03725830093026161, "learning_rate": 7.88455618775259e-07, "loss": 0.0202, "step": 858625 }, { "epoch": 8.44, "grad_norm": 8.45927906036377, "learning_rate": 7.883314963210105e-07, "loss": 0.0295, "step": 858650 }, { "epoch": 8.44, "grad_norm": 0.1719425469636917, "learning_rate": 7.88207373866762e-07, "loss": 0.0228, "step": 858675 }, { "epoch": 8.44, "grad_norm": 8.426765441894531, "learning_rate": 7.880832514125137e-07, "loss": 0.0233, "step": 858700 }, { "epoch": 8.44, "grad_norm": 0.020520614460110664, "learning_rate": 7.879591289582651e-07, "loss": 0.0307, "step": 858725 }, { "epoch": 8.44, "grad_norm": 19.949796676635742, "learning_rate": 7.878350065040166e-07, "loss": 0.0396, "step": 858750 }, { "epoch": 8.44, "grad_norm": 3.7702040672302246, "learning_rate": 7.877108840497682e-07, "loss": 0.0436, "step": 858775 }, { "epoch": 8.44, "grad_norm": 9.876256942749023, "learning_rate": 7.875867615955198e-07, "loss": 0.0317, "step": 858800 }, { "epoch": 8.44, "grad_norm": 0.4376239776611328, "learning_rate": 7.874626391412712e-07, "loss": 0.0399, "step": 858825 }, { "epoch": 8.44, "grad_norm": 17.58624267578125, "learning_rate": 7.873385166870228e-07, "loss": 0.0242, "step": 858850 }, { "epoch": 8.44, "grad_norm": 0.064442478120327, "learning_rate": 7.872143942327744e-07, "loss": 0.0439, "step": 858875 }, { "epoch": 8.44, "grad_norm": 14.092669486999512, "learning_rate": 7.870902717785259e-07, "loss": 0.0301, "step": 858900 }, { "epoch": 8.45, "grad_norm": 0.04598994180560112, "learning_rate": 7.869661493242773e-07, "loss": 0.0272, "step": 858925 }, { "epoch": 8.45, "grad_norm": 14.408106803894043, "learning_rate": 7.86842026870029e-07, "loss": 0.0132, "step": 858950 }, { "epoch": 8.45, "grad_norm": 0.1810891479253769, "learning_rate": 7.867179044157805e-07, "loss": 0.0403, "step": 858975 }, { "epoch": 8.45, "grad_norm": 21.147153854370117, "learning_rate": 7.86593781961532e-07, "loss": 0.0365, "step": 859000 }, { "epoch": 8.45, "grad_norm": 0.05290122330188751, "learning_rate": 7.864696595072836e-07, "loss": 0.0255, "step": 859025 }, { "epoch": 8.45, "grad_norm": 8.989584922790527, "learning_rate": 7.863455370530352e-07, "loss": 0.0272, "step": 859050 }, { "epoch": 8.45, "grad_norm": 0.06578430533409119, "learning_rate": 7.862214145987866e-07, "loss": 0.0363, "step": 859075 }, { "epoch": 8.45, "grad_norm": 3.2102653980255127, "learning_rate": 7.860972921445381e-07, "loss": 0.0357, "step": 859100 }, { "epoch": 8.45, "grad_norm": 0.07329072803258896, "learning_rate": 7.859731696902898e-07, "loss": 0.0287, "step": 859125 }, { "epoch": 8.45, "grad_norm": 4.94270658493042, "learning_rate": 7.858490472360413e-07, "loss": 0.028, "step": 859150 }, { "epoch": 8.45, "grad_norm": 0.030446959659457207, "learning_rate": 7.857249247817927e-07, "loss": 0.0356, "step": 859175 }, { "epoch": 8.45, "grad_norm": 8.61267375946045, "learning_rate": 7.856008023275443e-07, "loss": 0.0391, "step": 859200 }, { "epoch": 8.45, "grad_norm": 1.8403606414794922, "learning_rate": 7.854766798732959e-07, "loss": 0.0341, "step": 859225 }, { "epoch": 8.45, "grad_norm": 17.45384407043457, "learning_rate": 7.853525574190474e-07, "loss": 0.0223, "step": 859250 }, { "epoch": 8.45, "grad_norm": 34.292972564697266, "learning_rate": 7.85228434964799e-07, "loss": 0.0212, "step": 859275 }, { "epoch": 8.45, "grad_norm": 10.941454887390137, "learning_rate": 7.851043125105505e-07, "loss": 0.032, "step": 859300 }, { "epoch": 8.45, "grad_norm": 0.1095312312245369, "learning_rate": 7.84980190056302e-07, "loss": 0.0429, "step": 859325 }, { "epoch": 8.45, "grad_norm": 7.014735698699951, "learning_rate": 7.848560676020535e-07, "loss": 0.0277, "step": 859350 }, { "epoch": 8.45, "grad_norm": 0.025058431550860405, "learning_rate": 7.847319451478052e-07, "loss": 0.0229, "step": 859375 }, { "epoch": 8.45, "grad_norm": 3.715190887451172, "learning_rate": 7.846078226935566e-07, "loss": 0.0241, "step": 859400 }, { "epoch": 8.45, "grad_norm": 0.21844685077667236, "learning_rate": 7.844837002393081e-07, "loss": 0.0332, "step": 859425 }, { "epoch": 8.45, "grad_norm": 17.767654418945312, "learning_rate": 7.843595777850597e-07, "loss": 0.0212, "step": 859450 }, { "epoch": 8.45, "grad_norm": 2.250082492828369, "learning_rate": 7.842354553308113e-07, "loss": 0.0304, "step": 859475 }, { "epoch": 8.45, "grad_norm": 8.373743057250977, "learning_rate": 7.841113328765627e-07, "loss": 0.027, "step": 859500 }, { "epoch": 8.45, "grad_norm": 0.020151400938630104, "learning_rate": 7.839872104223142e-07, "loss": 0.0321, "step": 859525 }, { "epoch": 8.45, "grad_norm": 6.538455486297607, "learning_rate": 7.838630879680659e-07, "loss": 0.0286, "step": 859550 }, { "epoch": 8.45, "grad_norm": 0.020714307203888893, "learning_rate": 7.837389655138174e-07, "loss": 0.0421, "step": 859575 }, { "epoch": 8.45, "grad_norm": 5.526905059814453, "learning_rate": 7.836148430595688e-07, "loss": 0.0227, "step": 859600 }, { "epoch": 8.45, "grad_norm": 0.031504783779382706, "learning_rate": 7.834907206053205e-07, "loss": 0.0267, "step": 859625 }, { "epoch": 8.45, "grad_norm": 2.254925489425659, "learning_rate": 7.83366598151072e-07, "loss": 0.0189, "step": 859650 }, { "epoch": 8.45, "grad_norm": 4.387927055358887, "learning_rate": 7.832424756968235e-07, "loss": 0.0383, "step": 859675 }, { "epoch": 8.45, "grad_norm": 2.1568803787231445, "learning_rate": 7.831183532425751e-07, "loss": 0.0257, "step": 859700 }, { "epoch": 8.45, "grad_norm": 0.015261918306350708, "learning_rate": 7.829942307883267e-07, "loss": 0.0249, "step": 859725 }, { "epoch": 8.45, "grad_norm": 2.427729368209839, "learning_rate": 7.828701083340781e-07, "loss": 0.0148, "step": 859750 }, { "epoch": 8.45, "grad_norm": 1.3722749948501587, "learning_rate": 7.827459858798296e-07, "loss": 0.0446, "step": 859775 }, { "epoch": 8.45, "grad_norm": 4.311772346496582, "learning_rate": 7.826218634255813e-07, "loss": 0.0421, "step": 859800 }, { "epoch": 8.45, "grad_norm": 1.5874180793762207, "learning_rate": 7.824977409713328e-07, "loss": 0.0408, "step": 859825 }, { "epoch": 8.45, "grad_norm": 9.278017044067383, "learning_rate": 7.823736185170842e-07, "loss": 0.0293, "step": 859850 }, { "epoch": 8.45, "grad_norm": 0.004389012698084116, "learning_rate": 7.822494960628358e-07, "loss": 0.0305, "step": 859875 }, { "epoch": 8.45, "grad_norm": 2.8986294269561768, "learning_rate": 7.821253736085874e-07, "loss": 0.0357, "step": 859900 }, { "epoch": 8.45, "grad_norm": 0.007167748641222715, "learning_rate": 7.820012511543389e-07, "loss": 0.0408, "step": 859925 }, { "epoch": 8.46, "grad_norm": 3.6048998832702637, "learning_rate": 7.818771287000903e-07, "loss": 0.0379, "step": 859950 }, { "epoch": 8.46, "grad_norm": 0.6434996128082275, "learning_rate": 7.81753006245842e-07, "loss": 0.034, "step": 859975 }, { "epoch": 8.46, "grad_norm": 10.093111038208008, "learning_rate": 7.816288837915935e-07, "loss": 0.0172, "step": 860000 }, { "epoch": 8.46, "eval_loss": 0.88985276222229, "eval_runtime": 6090.1594, "eval_samples_per_second": 1.554, "eval_steps_per_second": 0.194, "eval_wer": 0.11074808472284813, "step": 860000 }, { "epoch": 8.46, "grad_norm": 0.02229638770222664, "learning_rate": 7.81504761337345e-07, "loss": 0.0552, "step": 860025 }, { "epoch": 8.46, "grad_norm": 22.035301208496094, "learning_rate": 7.813806388830967e-07, "loss": 0.0287, "step": 860050 }, { "epoch": 8.46, "grad_norm": 0.24098937213420868, "learning_rate": 7.812565164288481e-07, "loss": 0.0296, "step": 860075 }, { "epoch": 8.46, "grad_norm": 14.894339561462402, "learning_rate": 7.811323939745996e-07, "loss": 0.0248, "step": 860100 }, { "epoch": 8.46, "grad_norm": 2.87815523147583, "learning_rate": 7.810082715203512e-07, "loss": 0.0277, "step": 860125 }, { "epoch": 8.46, "grad_norm": 1.5317987203598022, "learning_rate": 7.808841490661028e-07, "loss": 0.0262, "step": 860150 }, { "epoch": 8.46, "grad_norm": 0.2792556881904602, "learning_rate": 7.807600266118542e-07, "loss": 0.0356, "step": 860175 }, { "epoch": 8.46, "grad_norm": 8.157666206359863, "learning_rate": 7.806359041576057e-07, "loss": 0.0153, "step": 860200 }, { "epoch": 8.46, "grad_norm": 0.9063301086425781, "learning_rate": 7.805117817033574e-07, "loss": 0.0173, "step": 860225 }, { "epoch": 8.46, "grad_norm": 0.9273229837417603, "learning_rate": 7.803876592491089e-07, "loss": 0.0161, "step": 860250 }, { "epoch": 8.46, "grad_norm": 1.1863080263137817, "learning_rate": 7.802635367948603e-07, "loss": 0.0399, "step": 860275 }, { "epoch": 8.46, "grad_norm": 5.611753463745117, "learning_rate": 7.80139414340612e-07, "loss": 0.0259, "step": 860300 }, { "epoch": 8.46, "grad_norm": 0.9190221428871155, "learning_rate": 7.800152918863635e-07, "loss": 0.0276, "step": 860325 }, { "epoch": 8.46, "grad_norm": 7.476341247558594, "learning_rate": 7.79891169432115e-07, "loss": 0.0256, "step": 860350 }, { "epoch": 8.46, "grad_norm": 0.10049555450677872, "learning_rate": 7.797670469778665e-07, "loss": 0.0401, "step": 860375 }, { "epoch": 8.46, "grad_norm": 0.20719091594219208, "learning_rate": 7.796429245236181e-07, "loss": 0.0341, "step": 860400 }, { "epoch": 8.46, "grad_norm": 0.02208004891872406, "learning_rate": 7.795188020693696e-07, "loss": 0.0236, "step": 860425 }, { "epoch": 8.46, "grad_norm": 11.685284614562988, "learning_rate": 7.793946796151211e-07, "loss": 0.0286, "step": 860450 }, { "epoch": 8.46, "grad_norm": 0.24583299458026886, "learning_rate": 7.792705571608728e-07, "loss": 0.0324, "step": 860475 }, { "epoch": 8.46, "grad_norm": 7.49837064743042, "learning_rate": 7.791464347066243e-07, "loss": 0.0203, "step": 860500 }, { "epoch": 8.46, "grad_norm": 0.11313199251890182, "learning_rate": 7.790223122523757e-07, "loss": 0.0259, "step": 860525 }, { "epoch": 8.46, "grad_norm": 1.2583823204040527, "learning_rate": 7.788981897981273e-07, "loss": 0.0399, "step": 860550 }, { "epoch": 8.46, "grad_norm": 0.23563456535339355, "learning_rate": 7.787740673438789e-07, "loss": 0.0378, "step": 860575 }, { "epoch": 8.46, "grad_norm": 10.819550514221191, "learning_rate": 7.786499448896304e-07, "loss": 0.0275, "step": 860600 }, { "epoch": 8.46, "grad_norm": 0.3832390308380127, "learning_rate": 7.785258224353818e-07, "loss": 0.0379, "step": 860625 }, { "epoch": 8.46, "grad_norm": 9.41224193572998, "learning_rate": 7.784016999811335e-07, "loss": 0.0468, "step": 860650 }, { "epoch": 8.46, "grad_norm": 0.05236400291323662, "learning_rate": 7.78277577526885e-07, "loss": 0.0401, "step": 860675 }, { "epoch": 8.46, "grad_norm": 8.992298126220703, "learning_rate": 7.781534550726365e-07, "loss": 0.0177, "step": 860700 }, { "epoch": 8.46, "grad_norm": 0.5858299136161804, "learning_rate": 7.780293326183882e-07, "loss": 0.0156, "step": 860725 }, { "epoch": 8.46, "grad_norm": 4.463280200958252, "learning_rate": 7.779052101641396e-07, "loss": 0.0086, "step": 860750 }, { "epoch": 8.46, "grad_norm": 0.20993821322917938, "learning_rate": 7.777810877098911e-07, "loss": 0.0441, "step": 860775 }, { "epoch": 8.46, "grad_norm": 2.0537190437316895, "learning_rate": 7.776569652556426e-07, "loss": 0.0298, "step": 860800 }, { "epoch": 8.46, "grad_norm": 0.026837170124053955, "learning_rate": 7.775328428013943e-07, "loss": 0.0361, "step": 860825 }, { "epoch": 8.46, "grad_norm": 2.8710765838623047, "learning_rate": 7.774087203471457e-07, "loss": 0.0482, "step": 860850 }, { "epoch": 8.46, "grad_norm": 1.602177619934082, "learning_rate": 7.772845978928972e-07, "loss": 0.0276, "step": 860875 }, { "epoch": 8.46, "grad_norm": 0.0565480962395668, "learning_rate": 7.771604754386489e-07, "loss": 0.0434, "step": 860900 }, { "epoch": 8.46, "grad_norm": 0.007401541341096163, "learning_rate": 7.770363529844004e-07, "loss": 0.0417, "step": 860925 }, { "epoch": 8.47, "grad_norm": 6.395952224731445, "learning_rate": 7.769122305301518e-07, "loss": 0.0228, "step": 860950 }, { "epoch": 8.47, "grad_norm": 0.04942837357521057, "learning_rate": 7.767881080759034e-07, "loss": 0.0441, "step": 860975 }, { "epoch": 8.47, "grad_norm": 2.162590742111206, "learning_rate": 7.76663985621655e-07, "loss": 0.0346, "step": 861000 }, { "epoch": 8.47, "grad_norm": 0.006928744725883007, "learning_rate": 7.765398631674065e-07, "loss": 0.0225, "step": 861025 }, { "epoch": 8.47, "grad_norm": 0.9610628485679626, "learning_rate": 7.764157407131579e-07, "loss": 0.0257, "step": 861050 }, { "epoch": 8.47, "grad_norm": 15.342604637145996, "learning_rate": 7.762916182589096e-07, "loss": 0.0338, "step": 861075 }, { "epoch": 8.47, "grad_norm": 5.205479145050049, "learning_rate": 7.761674958046611e-07, "loss": 0.0394, "step": 861100 }, { "epoch": 8.47, "grad_norm": 2.7606735229492188, "learning_rate": 7.760433733504126e-07, "loss": 0.0414, "step": 861125 }, { "epoch": 8.47, "grad_norm": 3.498295783996582, "learning_rate": 7.759192508961643e-07, "loss": 0.0163, "step": 861150 }, { "epoch": 8.47, "grad_norm": 0.003484410233795643, "learning_rate": 7.757951284419158e-07, "loss": 0.0326, "step": 861175 }, { "epoch": 8.47, "grad_norm": 9.326318740844727, "learning_rate": 7.756710059876672e-07, "loss": 0.0174, "step": 861200 }, { "epoch": 8.47, "grad_norm": 0.011847435496747494, "learning_rate": 7.755468835334187e-07, "loss": 0.0414, "step": 861225 }, { "epoch": 8.47, "grad_norm": 8.789132118225098, "learning_rate": 7.754227610791704e-07, "loss": 0.026, "step": 861250 }, { "epoch": 8.47, "grad_norm": 0.3114347457885742, "learning_rate": 7.752986386249219e-07, "loss": 0.0367, "step": 861275 }, { "epoch": 8.47, "grad_norm": 6.713334560394287, "learning_rate": 7.751745161706733e-07, "loss": 0.026, "step": 861300 }, { "epoch": 8.47, "grad_norm": 2.9513235092163086, "learning_rate": 7.75050393716425e-07, "loss": 0.0269, "step": 861325 }, { "epoch": 8.47, "grad_norm": 1.333834171295166, "learning_rate": 7.749262712621765e-07, "loss": 0.0296, "step": 861350 }, { "epoch": 8.47, "grad_norm": 0.030258027836680412, "learning_rate": 7.74802148807928e-07, "loss": 0.0241, "step": 861375 }, { "epoch": 8.47, "grad_norm": 6.365478992462158, "learning_rate": 7.746780263536797e-07, "loss": 0.0323, "step": 861400 }, { "epoch": 8.47, "grad_norm": 8.000746726989746, "learning_rate": 7.745539038994311e-07, "loss": 0.0185, "step": 861425 }, { "epoch": 8.47, "grad_norm": 7.353731632232666, "learning_rate": 7.744297814451826e-07, "loss": 0.0251, "step": 861450 }, { "epoch": 8.47, "grad_norm": 0.12230482697486877, "learning_rate": 7.743056589909341e-07, "loss": 0.0338, "step": 861475 }, { "epoch": 8.47, "grad_norm": 0.4353290796279907, "learning_rate": 7.741815365366858e-07, "loss": 0.0234, "step": 861500 }, { "epoch": 8.47, "grad_norm": 0.014015219174325466, "learning_rate": 7.740574140824372e-07, "loss": 0.0234, "step": 861525 }, { "epoch": 8.47, "grad_norm": 1.935747504234314, "learning_rate": 7.739332916281887e-07, "loss": 0.0192, "step": 861550 }, { "epoch": 8.47, "grad_norm": 0.023399272933602333, "learning_rate": 7.738091691739404e-07, "loss": 0.033, "step": 861575 }, { "epoch": 8.47, "grad_norm": 19.881860733032227, "learning_rate": 7.736850467196919e-07, "loss": 0.031, "step": 861600 }, { "epoch": 8.47, "grad_norm": 0.3481684625148773, "learning_rate": 7.735609242654433e-07, "loss": 0.0325, "step": 861625 }, { "epoch": 8.47, "grad_norm": 14.865214347839355, "learning_rate": 7.734368018111948e-07, "loss": 0.0414, "step": 861650 }, { "epoch": 8.47, "grad_norm": 4.847618579864502, "learning_rate": 7.733126793569465e-07, "loss": 0.0376, "step": 861675 }, { "epoch": 8.47, "grad_norm": 0.8230447173118591, "learning_rate": 7.73188556902698e-07, "loss": 0.0351, "step": 861700 }, { "epoch": 8.47, "grad_norm": 0.008435177616775036, "learning_rate": 7.730644344484494e-07, "loss": 0.0398, "step": 861725 }, { "epoch": 8.47, "grad_norm": 1.9956169128417969, "learning_rate": 7.729403119942011e-07, "loss": 0.0368, "step": 861750 }, { "epoch": 8.47, "grad_norm": 2.6196465492248535, "learning_rate": 7.728161895399526e-07, "loss": 0.0269, "step": 861775 }, { "epoch": 8.47, "grad_norm": 16.276906967163086, "learning_rate": 7.726920670857041e-07, "loss": 0.0315, "step": 861800 }, { "epoch": 8.47, "grad_norm": 3.3982534408569336, "learning_rate": 7.725679446314558e-07, "loss": 0.0274, "step": 861825 }, { "epoch": 8.47, "grad_norm": 5.20296573638916, "learning_rate": 7.724438221772073e-07, "loss": 0.0126, "step": 861850 }, { "epoch": 8.47, "grad_norm": 7.965765953063965, "learning_rate": 7.723196997229587e-07, "loss": 0.0346, "step": 861875 }, { "epoch": 8.47, "grad_norm": 10.004986763000488, "learning_rate": 7.721955772687102e-07, "loss": 0.0285, "step": 861900 }, { "epoch": 8.47, "grad_norm": 1.7944661378860474, "learning_rate": 7.720714548144619e-07, "loss": 0.0284, "step": 861925 }, { "epoch": 8.47, "grad_norm": 8.331294059753418, "learning_rate": 7.719473323602134e-07, "loss": 0.0222, "step": 861950 }, { "epoch": 8.48, "grad_norm": 0.47757938504219055, "learning_rate": 7.718232099059648e-07, "loss": 0.0275, "step": 861975 }, { "epoch": 8.48, "grad_norm": 11.829172134399414, "learning_rate": 7.716990874517165e-07, "loss": 0.0234, "step": 862000 }, { "epoch": 8.48, "grad_norm": 0.01072138361632824, "learning_rate": 7.71574964997468e-07, "loss": 0.0255, "step": 862025 }, { "epoch": 8.48, "grad_norm": 2.8220925331115723, "learning_rate": 7.714508425432195e-07, "loss": 0.0382, "step": 862050 }, { "epoch": 8.48, "grad_norm": 0.01929982751607895, "learning_rate": 7.713267200889709e-07, "loss": 0.0294, "step": 862075 }, { "epoch": 8.48, "grad_norm": 12.428522109985352, "learning_rate": 7.712025976347226e-07, "loss": 0.033, "step": 862100 }, { "epoch": 8.48, "grad_norm": 0.04884019121527672, "learning_rate": 7.710784751804741e-07, "loss": 0.033, "step": 862125 }, { "epoch": 8.48, "grad_norm": 1.5711441040039062, "learning_rate": 7.709543527262256e-07, "loss": 0.036, "step": 862150 }, { "epoch": 8.48, "grad_norm": 0.15755844116210938, "learning_rate": 7.708302302719773e-07, "loss": 0.0401, "step": 862175 }, { "epoch": 8.48, "grad_norm": 16.908395767211914, "learning_rate": 7.707061078177287e-07, "loss": 0.0312, "step": 862200 }, { "epoch": 8.48, "grad_norm": 0.6396179795265198, "learning_rate": 7.705819853634802e-07, "loss": 0.0272, "step": 862225 }, { "epoch": 8.48, "grad_norm": 3.483398199081421, "learning_rate": 7.704578629092319e-07, "loss": 0.0275, "step": 862250 }, { "epoch": 8.48, "grad_norm": 0.1887475997209549, "learning_rate": 7.703337404549834e-07, "loss": 0.0195, "step": 862275 }, { "epoch": 8.48, "grad_norm": 6.943508148193359, "learning_rate": 7.702096180007348e-07, "loss": 0.0147, "step": 862300 }, { "epoch": 8.48, "grad_norm": 7.526276111602783, "learning_rate": 7.700904604446563e-07, "loss": 0.0272, "step": 862325 }, { "epoch": 8.48, "grad_norm": 10.857596397399902, "learning_rate": 7.699663379904079e-07, "loss": 0.0329, "step": 862350 }, { "epoch": 8.48, "grad_norm": 0.12934839725494385, "learning_rate": 7.698422155361594e-07, "loss": 0.033, "step": 862375 }, { "epoch": 8.48, "grad_norm": 1.915978193283081, "learning_rate": 7.69718093081911e-07, "loss": 0.0245, "step": 862400 }, { "epoch": 8.48, "grad_norm": 2.321568012237549, "learning_rate": 7.695939706276626e-07, "loss": 0.0513, "step": 862425 }, { "epoch": 8.48, "grad_norm": 9.956913948059082, "learning_rate": 7.69469848173414e-07, "loss": 0.0209, "step": 862450 }, { "epoch": 8.48, "grad_norm": 0.6505870223045349, "learning_rate": 7.693457257191655e-07, "loss": 0.0396, "step": 862475 }, { "epoch": 8.48, "grad_norm": 1.8966211080551147, "learning_rate": 7.692216032649171e-07, "loss": 0.0339, "step": 862500 }, { "epoch": 8.48, "grad_norm": 2.2328402996063232, "learning_rate": 7.690974808106687e-07, "loss": 0.0353, "step": 862525 }, { "epoch": 8.48, "grad_norm": 6.21418571472168, "learning_rate": 7.689733583564201e-07, "loss": 0.0331, "step": 862550 }, { "epoch": 8.48, "grad_norm": 0.28922903537750244, "learning_rate": 7.688492359021717e-07, "loss": 0.0263, "step": 862575 }, { "epoch": 8.48, "grad_norm": 6.709374904632568, "learning_rate": 7.687251134479233e-07, "loss": 0.0284, "step": 862600 }, { "epoch": 8.48, "grad_norm": 0.0880124419927597, "learning_rate": 7.686009909936748e-07, "loss": 0.0234, "step": 862625 }, { "epoch": 8.48, "grad_norm": 2.5345981121063232, "learning_rate": 7.684768685394263e-07, "loss": 0.0297, "step": 862650 }, { "epoch": 8.48, "grad_norm": 0.4308353066444397, "learning_rate": 7.683527460851778e-07, "loss": 0.0348, "step": 862675 }, { "epoch": 8.48, "grad_norm": 23.669246673583984, "learning_rate": 7.682286236309294e-07, "loss": 0.0287, "step": 862700 }, { "epoch": 8.48, "grad_norm": 0.009146603755652905, "learning_rate": 7.681045011766809e-07, "loss": 0.0514, "step": 862725 }, { "epoch": 8.48, "grad_norm": 18.49981117248535, "learning_rate": 7.679803787224325e-07, "loss": 0.0262, "step": 862750 }, { "epoch": 8.48, "grad_norm": 0.8888931274414062, "learning_rate": 7.678562562681841e-07, "loss": 0.0338, "step": 862775 }, { "epoch": 8.48, "grad_norm": 8.82551097869873, "learning_rate": 7.677321338139355e-07, "loss": 0.0356, "step": 862800 }, { "epoch": 8.48, "grad_norm": 0.1648947298526764, "learning_rate": 7.67608011359687e-07, "loss": 0.0369, "step": 862825 }, { "epoch": 8.48, "grad_norm": 15.23633861541748, "learning_rate": 7.674838889054387e-07, "loss": 0.0187, "step": 862850 }, { "epoch": 8.48, "grad_norm": 0.021318580955266953, "learning_rate": 7.673647313493601e-07, "loss": 0.0401, "step": 862875 }, { "epoch": 8.48, "grad_norm": 11.671856880187988, "learning_rate": 7.672406088951117e-07, "loss": 0.027, "step": 862900 }, { "epoch": 8.48, "grad_norm": 2.0284719467163086, "learning_rate": 7.671164864408631e-07, "loss": 0.0205, "step": 862925 }, { "epoch": 8.48, "grad_norm": 5.501009941101074, "learning_rate": 7.669923639866147e-07, "loss": 0.0346, "step": 862950 }, { "epoch": 8.48, "grad_norm": 0.18731188774108887, "learning_rate": 7.668682415323663e-07, "loss": 0.0322, "step": 862975 }, { "epoch": 8.49, "grad_norm": 3.610962390899658, "learning_rate": 7.667441190781178e-07, "loss": 0.032, "step": 863000 }, { "epoch": 8.49, "grad_norm": 1.4375553131103516, "learning_rate": 7.666199966238692e-07, "loss": 0.0388, "step": 863025 }, { "epoch": 8.49, "grad_norm": 10.357776641845703, "learning_rate": 7.664958741696208e-07, "loss": 0.0305, "step": 863050 }, { "epoch": 8.49, "grad_norm": 3.354325771331787, "learning_rate": 7.663717517153724e-07, "loss": 0.0365, "step": 863075 }, { "epoch": 8.49, "grad_norm": 11.847612380981445, "learning_rate": 7.662476292611239e-07, "loss": 0.0282, "step": 863100 }, { "epoch": 8.49, "grad_norm": 2.2652857303619385, "learning_rate": 7.661235068068755e-07, "loss": 0.0343, "step": 863125 }, { "epoch": 8.49, "grad_norm": 23.20695686340332, "learning_rate": 7.65999384352627e-07, "loss": 0.0275, "step": 863150 }, { "epoch": 8.49, "grad_norm": 2.1417253017425537, "learning_rate": 7.658752618983785e-07, "loss": 0.033, "step": 863175 }, { "epoch": 8.49, "grad_norm": 1.1978199481964111, "learning_rate": 7.657511394441301e-07, "loss": 0.0251, "step": 863200 }, { "epoch": 8.49, "grad_norm": 0.660645067691803, "learning_rate": 7.656270169898816e-07, "loss": 0.0341, "step": 863225 }, { "epoch": 8.49, "grad_norm": 16.08083152770996, "learning_rate": 7.655028945356331e-07, "loss": 0.0196, "step": 863250 }, { "epoch": 8.49, "grad_norm": 3.96266770362854, "learning_rate": 7.653787720813846e-07, "loss": 0.0379, "step": 863275 }, { "epoch": 8.49, "grad_norm": 3.380751371383667, "learning_rate": 7.652546496271362e-07, "loss": 0.0301, "step": 863300 }, { "epoch": 8.49, "grad_norm": 0.0767969861626625, "learning_rate": 7.651305271728878e-07, "loss": 0.0479, "step": 863325 }, { "epoch": 8.49, "grad_norm": 0.8794978260993958, "learning_rate": 7.650064047186392e-07, "loss": 0.0172, "step": 863350 }, { "epoch": 8.49, "grad_norm": 0.23098163306713104, "learning_rate": 7.648822822643908e-07, "loss": 0.0347, "step": 863375 }, { "epoch": 8.49, "grad_norm": 8.57243537902832, "learning_rate": 7.647581598101424e-07, "loss": 0.0137, "step": 863400 }, { "epoch": 8.49, "grad_norm": 1.561647653579712, "learning_rate": 7.646340373558939e-07, "loss": 0.0285, "step": 863425 }, { "epoch": 8.49, "grad_norm": 2.8925912380218506, "learning_rate": 7.645099149016454e-07, "loss": 0.0286, "step": 863450 }, { "epoch": 8.49, "grad_norm": 0.1511671394109726, "learning_rate": 7.64385792447397e-07, "loss": 0.0306, "step": 863475 }, { "epoch": 8.49, "grad_norm": 0.5626429319381714, "learning_rate": 7.642616699931485e-07, "loss": 0.0177, "step": 863500 }, { "epoch": 8.49, "grad_norm": 0.022183042019605637, "learning_rate": 7.641375475389e-07, "loss": 0.0402, "step": 863525 }, { "epoch": 8.49, "grad_norm": 0.6746070981025696, "learning_rate": 7.640134250846516e-07, "loss": 0.0292, "step": 863550 }, { "epoch": 8.49, "grad_norm": 2.5583324432373047, "learning_rate": 7.638893026304032e-07, "loss": 0.0214, "step": 863575 }, { "epoch": 8.49, "grad_norm": 7.674848556518555, "learning_rate": 7.637651801761546e-07, "loss": 0.0188, "step": 863600 }, { "epoch": 8.49, "grad_norm": 0.05713390186429024, "learning_rate": 7.636410577219062e-07, "loss": 0.0372, "step": 863625 }, { "epoch": 8.49, "grad_norm": 21.16768455505371, "learning_rate": 7.635169352676577e-07, "loss": 0.036, "step": 863650 }, { "epoch": 8.49, "grad_norm": 0.2070058435201645, "learning_rate": 7.633928128134093e-07, "loss": 0.0285, "step": 863675 }, { "epoch": 8.49, "grad_norm": 5.8486738204956055, "learning_rate": 7.632686903591607e-07, "loss": 0.0326, "step": 863700 }, { "epoch": 8.49, "grad_norm": 0.06842298060655594, "learning_rate": 7.631445679049123e-07, "loss": 0.0352, "step": 863725 }, { "epoch": 8.49, "grad_norm": 0.04492699354887009, "learning_rate": 7.630204454506639e-07, "loss": 0.0261, "step": 863750 }, { "epoch": 8.49, "grad_norm": 1.430054783821106, "learning_rate": 7.628963229964154e-07, "loss": 0.0247, "step": 863775 }, { "epoch": 8.49, "grad_norm": 14.664674758911133, "learning_rate": 7.62772200542167e-07, "loss": 0.0384, "step": 863800 }, { "epoch": 8.49, "grad_norm": 5.4895710945129395, "learning_rate": 7.626480780879185e-07, "loss": 0.0258, "step": 863825 }, { "epoch": 8.49, "grad_norm": 6.6960296630859375, "learning_rate": 7.6252395563367e-07, "loss": 0.0349, "step": 863850 }, { "epoch": 8.49, "grad_norm": 0.015308217145502567, "learning_rate": 7.623998331794215e-07, "loss": 0.0196, "step": 863875 }, { "epoch": 8.49, "grad_norm": 5.189980506896973, "learning_rate": 7.622757107251731e-07, "loss": 0.0201, "step": 863900 }, { "epoch": 8.49, "grad_norm": 0.044464655220508575, "learning_rate": 7.621515882709246e-07, "loss": 0.0317, "step": 863925 }, { "epoch": 8.49, "grad_norm": 13.715456008911133, "learning_rate": 7.620274658166761e-07, "loss": 0.0235, "step": 863950 }, { "epoch": 8.49, "grad_norm": 0.024910956621170044, "learning_rate": 7.619033433624277e-07, "loss": 0.0374, "step": 863975 }, { "epoch": 8.49, "grad_norm": 14.022533416748047, "learning_rate": 7.617792209081793e-07, "loss": 0.0316, "step": 864000 }, { "epoch": 8.5, "grad_norm": 5.260995388031006, "learning_rate": 7.616550984539307e-07, "loss": 0.0304, "step": 864025 }, { "epoch": 8.5, "grad_norm": 2.1533901691436768, "learning_rate": 7.615309759996823e-07, "loss": 0.0386, "step": 864050 }, { "epoch": 8.5, "grad_norm": 1.5806406736373901, "learning_rate": 7.614068535454338e-07, "loss": 0.0537, "step": 864075 }, { "epoch": 8.5, "grad_norm": 7.0885796546936035, "learning_rate": 7.612827310911854e-07, "loss": 0.0183, "step": 864100 }, { "epoch": 8.5, "grad_norm": 0.013236938044428825, "learning_rate": 7.611586086369369e-07, "loss": 0.039, "step": 864125 }, { "epoch": 8.5, "grad_norm": 2.9870781898498535, "learning_rate": 7.610344861826885e-07, "loss": 0.0221, "step": 864150 }, { "epoch": 8.5, "grad_norm": 0.41977161169052124, "learning_rate": 7.6091036372844e-07, "loss": 0.0195, "step": 864175 }, { "epoch": 8.5, "grad_norm": 3.8452298641204834, "learning_rate": 7.607862412741915e-07, "loss": 0.0414, "step": 864200 }, { "epoch": 8.5, "grad_norm": 0.3536069393157959, "learning_rate": 7.606621188199431e-07, "loss": 0.0426, "step": 864225 }, { "epoch": 8.5, "grad_norm": 2.7833118438720703, "learning_rate": 7.605379963656947e-07, "loss": 0.0255, "step": 864250 }, { "epoch": 8.5, "grad_norm": 0.24436958134174347, "learning_rate": 7.604138739114461e-07, "loss": 0.0301, "step": 864275 }, { "epoch": 8.5, "grad_norm": 1.415498971939087, "learning_rate": 7.602897514571977e-07, "loss": 0.0296, "step": 864300 }, { "epoch": 8.5, "grad_norm": 0.6775014996528625, "learning_rate": 7.601656290029492e-07, "loss": 0.0343, "step": 864325 }, { "epoch": 8.5, "grad_norm": 8.554755210876465, "learning_rate": 7.600415065487008e-07, "loss": 0.0266, "step": 864350 }, { "epoch": 8.5, "grad_norm": 0.0047691017389297485, "learning_rate": 7.599173840944522e-07, "loss": 0.0271, "step": 864375 }, { "epoch": 8.5, "grad_norm": 11.3405122756958, "learning_rate": 7.597932616402038e-07, "loss": 0.0286, "step": 864400 }, { "epoch": 8.5, "grad_norm": 0.021772140637040138, "learning_rate": 7.596691391859554e-07, "loss": 0.0348, "step": 864425 }, { "epoch": 8.5, "grad_norm": 16.667312622070312, "learning_rate": 7.595450167317069e-07, "loss": 0.0364, "step": 864450 }, { "epoch": 8.5, "grad_norm": 0.10939215123653412, "learning_rate": 7.594208942774585e-07, "loss": 0.0272, "step": 864475 }, { "epoch": 8.5, "grad_norm": 13.997326850891113, "learning_rate": 7.5929677182321e-07, "loss": 0.0116, "step": 864500 }, { "epoch": 8.5, "grad_norm": 0.7609087228775024, "learning_rate": 7.591726493689615e-07, "loss": 0.0297, "step": 864525 }, { "epoch": 8.5, "grad_norm": 7.941105842590332, "learning_rate": 7.59048526914713e-07, "loss": 0.0298, "step": 864550 }, { "epoch": 8.5, "grad_norm": 0.1960717886686325, "learning_rate": 7.589244044604646e-07, "loss": 0.0414, "step": 864575 }, { "epoch": 8.5, "grad_norm": 11.185812950134277, "learning_rate": 7.588002820062161e-07, "loss": 0.0332, "step": 864600 }, { "epoch": 8.5, "grad_norm": 0.31973692774772644, "learning_rate": 7.586761595519676e-07, "loss": 0.0423, "step": 864625 }, { "epoch": 8.5, "grad_norm": 12.710755348205566, "learning_rate": 7.585520370977192e-07, "loss": 0.0386, "step": 864650 }, { "epoch": 8.5, "grad_norm": 0.37032923102378845, "learning_rate": 7.584279146434708e-07, "loss": 0.037, "step": 864675 }, { "epoch": 8.5, "grad_norm": 12.07930850982666, "learning_rate": 7.583037921892222e-07, "loss": 0.0331, "step": 864700 }, { "epoch": 8.5, "grad_norm": 0.2582084536552429, "learning_rate": 7.581796697349738e-07, "loss": 0.0335, "step": 864725 }, { "epoch": 8.5, "grad_norm": 12.276612281799316, "learning_rate": 7.580555472807253e-07, "loss": 0.0319, "step": 864750 }, { "epoch": 8.5, "grad_norm": 0.2460719347000122, "learning_rate": 7.579314248264769e-07, "loss": 0.0334, "step": 864775 }, { "epoch": 8.5, "grad_norm": 4.802734851837158, "learning_rate": 7.578073023722284e-07, "loss": 0.0314, "step": 864800 }, { "epoch": 8.5, "grad_norm": 0.9748554229736328, "learning_rate": 7.5768317991798e-07, "loss": 0.034, "step": 864825 }, { "epoch": 8.5, "grad_norm": 8.25206184387207, "learning_rate": 7.575590574637315e-07, "loss": 0.0399, "step": 864850 }, { "epoch": 8.5, "grad_norm": 11.245636940002441, "learning_rate": 7.57434935009483e-07, "loss": 0.0438, "step": 864875 }, { "epoch": 8.5, "grad_norm": 1.6778523921966553, "learning_rate": 7.573108125552346e-07, "loss": 0.0255, "step": 864900 }, { "epoch": 8.5, "grad_norm": 1.9731565713882446, "learning_rate": 7.571866901009862e-07, "loss": 0.0424, "step": 864925 }, { "epoch": 8.5, "grad_norm": 7.715461730957031, "learning_rate": 7.570625676467376e-07, "loss": 0.0166, "step": 864950 }, { "epoch": 8.5, "grad_norm": 0.032663073390722275, "learning_rate": 7.569384451924891e-07, "loss": 0.0207, "step": 864975 }, { "epoch": 8.5, "grad_norm": 1.9997614622116089, "learning_rate": 7.568143227382407e-07, "loss": 0.0155, "step": 865000 }, { "epoch": 8.51, "grad_norm": 0.8555639386177063, "learning_rate": 7.566902002839923e-07, "loss": 0.0316, "step": 865025 }, { "epoch": 8.51, "grad_norm": 16.252300262451172, "learning_rate": 7.565660778297437e-07, "loss": 0.0426, "step": 865050 }, { "epoch": 8.51, "grad_norm": 1.6013424396514893, "learning_rate": 7.564419553754953e-07, "loss": 0.0307, "step": 865075 }, { "epoch": 8.51, "grad_norm": 0.8434453010559082, "learning_rate": 7.563178329212469e-07, "loss": 0.0125, "step": 865100 }, { "epoch": 8.51, "grad_norm": 3.0636134147644043, "learning_rate": 7.561937104669984e-07, "loss": 0.0248, "step": 865125 }, { "epoch": 8.51, "grad_norm": 7.456062316894531, "learning_rate": 7.5606958801275e-07, "loss": 0.0366, "step": 865150 }, { "epoch": 8.51, "grad_norm": 0.09036865830421448, "learning_rate": 7.559454655585014e-07, "loss": 0.0306, "step": 865175 }, { "epoch": 8.51, "grad_norm": 8.099056243896484, "learning_rate": 7.55821343104253e-07, "loss": 0.0351, "step": 865200 }, { "epoch": 8.51, "grad_norm": 0.024650797247886658, "learning_rate": 7.556972206500045e-07, "loss": 0.0255, "step": 865225 }, { "epoch": 8.51, "grad_norm": 17.838838577270508, "learning_rate": 7.555730981957561e-07, "loss": 0.0303, "step": 865250 }, { "epoch": 8.51, "grad_norm": 0.3053348660469055, "learning_rate": 7.554489757415076e-07, "loss": 0.0472, "step": 865275 }, { "epoch": 8.51, "grad_norm": 13.967161178588867, "learning_rate": 7.553248532872591e-07, "loss": 0.0484, "step": 865300 }, { "epoch": 8.51, "grad_norm": 0.07776407897472382, "learning_rate": 7.552007308330107e-07, "loss": 0.0431, "step": 865325 }, { "epoch": 8.51, "grad_norm": 3.1443064212799072, "learning_rate": 7.550766083787623e-07, "loss": 0.02, "step": 865350 }, { "epoch": 8.51, "grad_norm": 0.10694819688796997, "learning_rate": 7.549524859245137e-07, "loss": 0.0498, "step": 865375 }, { "epoch": 8.51, "grad_norm": 15.596186637878418, "learning_rate": 7.548283634702652e-07, "loss": 0.0311, "step": 865400 }, { "epoch": 8.51, "grad_norm": 6.6756768226623535, "learning_rate": 7.547042410160168e-07, "loss": 0.031, "step": 865425 }, { "epoch": 8.51, "grad_norm": 10.53360652923584, "learning_rate": 7.545801185617684e-07, "loss": 0.025, "step": 865450 }, { "epoch": 8.51, "grad_norm": 0.6317028999328613, "learning_rate": 7.544559961075199e-07, "loss": 0.0494, "step": 865475 }, { "epoch": 8.51, "grad_norm": 11.27674674987793, "learning_rate": 7.543318736532715e-07, "loss": 0.0364, "step": 865500 }, { "epoch": 8.51, "grad_norm": 0.06631298363208771, "learning_rate": 7.54207751199023e-07, "loss": 0.0298, "step": 865525 }, { "epoch": 8.51, "grad_norm": 6.920100688934326, "learning_rate": 7.540836287447745e-07, "loss": 0.0328, "step": 865550 }, { "epoch": 8.51, "grad_norm": 0.40535837411880493, "learning_rate": 7.539595062905261e-07, "loss": 0.0295, "step": 865575 }, { "epoch": 8.51, "grad_norm": 11.329692840576172, "learning_rate": 7.538353838362777e-07, "loss": 0.0154, "step": 865600 }, { "epoch": 8.51, "grad_norm": 0.16103121638298035, "learning_rate": 7.537112613820291e-07, "loss": 0.0348, "step": 865625 }, { "epoch": 8.51, "grad_norm": 21.538585662841797, "learning_rate": 7.535871389277806e-07, "loss": 0.0343, "step": 865650 }, { "epoch": 8.51, "grad_norm": 0.549990713596344, "learning_rate": 7.534630164735322e-07, "loss": 0.0402, "step": 865675 }, { "epoch": 8.51, "grad_norm": 11.634726524353027, "learning_rate": 7.533388940192838e-07, "loss": 0.0236, "step": 865700 }, { "epoch": 8.51, "grad_norm": 0.2016756236553192, "learning_rate": 7.532147715650352e-07, "loss": 0.0391, "step": 865725 }, { "epoch": 8.51, "grad_norm": 19.115402221679688, "learning_rate": 7.530906491107868e-07, "loss": 0.0291, "step": 865750 }, { "epoch": 8.51, "grad_norm": 1.076098084449768, "learning_rate": 7.529665266565384e-07, "loss": 0.0349, "step": 865775 }, { "epoch": 8.51, "grad_norm": 13.814043045043945, "learning_rate": 7.528424042022899e-07, "loss": 0.0337, "step": 865800 }, { "epoch": 8.51, "grad_norm": 0.16757671535015106, "learning_rate": 7.527182817480413e-07, "loss": 0.0296, "step": 865825 }, { "epoch": 8.51, "grad_norm": 5.547481060028076, "learning_rate": 7.525941592937929e-07, "loss": 0.0161, "step": 865850 }, { "epoch": 8.51, "grad_norm": 5.896444797515869, "learning_rate": 7.524700368395445e-07, "loss": 0.0364, "step": 865875 }, { "epoch": 8.51, "grad_norm": 0.9409672617912292, "learning_rate": 7.52345914385296e-07, "loss": 0.0333, "step": 865900 }, { "epoch": 8.51, "grad_norm": 0.037093888968229294, "learning_rate": 7.522217919310476e-07, "loss": 0.0457, "step": 865925 }, { "epoch": 8.51, "grad_norm": 1.7831231355667114, "learning_rate": 7.520976694767991e-07, "loss": 0.0202, "step": 865950 }, { "epoch": 8.51, "grad_norm": 5.158915042877197, "learning_rate": 7.519735470225506e-07, "loss": 0.0283, "step": 865975 }, { "epoch": 8.51, "grad_norm": 11.93468189239502, "learning_rate": 7.518494245683022e-07, "loss": 0.0392, "step": 866000 }, { "epoch": 8.51, "grad_norm": 0.034872010350227356, "learning_rate": 7.517253021140538e-07, "loss": 0.0273, "step": 866025 }, { "epoch": 8.52, "grad_norm": 4.187264442443848, "learning_rate": 7.516011796598052e-07, "loss": 0.0218, "step": 866050 }, { "epoch": 8.52, "grad_norm": 0.5823672413825989, "learning_rate": 7.514770572055567e-07, "loss": 0.0224, "step": 866075 }, { "epoch": 8.52, "grad_norm": 6.654551029205322, "learning_rate": 7.513529347513083e-07, "loss": 0.0164, "step": 866100 }, { "epoch": 8.52, "grad_norm": 1.567542552947998, "learning_rate": 7.512288122970599e-07, "loss": 0.0266, "step": 866125 }, { "epoch": 8.52, "grad_norm": 0.2071269005537033, "learning_rate": 7.511046898428113e-07, "loss": 0.0143, "step": 866150 }, { "epoch": 8.52, "grad_norm": 0.0029613999649882317, "learning_rate": 7.509805673885629e-07, "loss": 0.0309, "step": 866175 }, { "epoch": 8.52, "grad_norm": 0.2835092842578888, "learning_rate": 7.508564449343145e-07, "loss": 0.0193, "step": 866200 }, { "epoch": 8.52, "grad_norm": 0.6116399168968201, "learning_rate": 7.50732322480066e-07, "loss": 0.0478, "step": 866225 }, { "epoch": 8.52, "grad_norm": 11.588902473449707, "learning_rate": 7.506082000258175e-07, "loss": 0.0487, "step": 866250 }, { "epoch": 8.52, "grad_norm": 1.4325870275497437, "learning_rate": 7.504890424697391e-07, "loss": 0.0286, "step": 866275 }, { "epoch": 8.52, "grad_norm": 2.3992037773132324, "learning_rate": 7.503649200154906e-07, "loss": 0.0193, "step": 866300 }, { "epoch": 8.52, "grad_norm": 0.11107619851827621, "learning_rate": 7.50240797561242e-07, "loss": 0.0333, "step": 866325 }, { "epoch": 8.52, "grad_norm": 7.424863338470459, "learning_rate": 7.501166751069937e-07, "loss": 0.0266, "step": 866350 }, { "epoch": 8.52, "grad_norm": 1.0105769634246826, "learning_rate": 7.499925526527452e-07, "loss": 0.0227, "step": 866375 }, { "epoch": 8.52, "grad_norm": 1.700412392616272, "learning_rate": 7.498684301984967e-07, "loss": 0.0282, "step": 866400 }, { "epoch": 8.52, "grad_norm": 0.011259898543357849, "learning_rate": 7.497443077442481e-07, "loss": 0.0191, "step": 866425 }, { "epoch": 8.52, "grad_norm": 1.0765434503555298, "learning_rate": 7.496201852899998e-07, "loss": 0.0155, "step": 866450 }, { "epoch": 8.52, "grad_norm": 0.01293619628995657, "learning_rate": 7.494960628357513e-07, "loss": 0.0331, "step": 866475 }, { "epoch": 8.52, "grad_norm": 12.64926528930664, "learning_rate": 7.493719403815028e-07, "loss": 0.0352, "step": 866500 }, { "epoch": 8.52, "grad_norm": 0.009714383631944656, "learning_rate": 7.492478179272545e-07, "loss": 0.0226, "step": 866525 }, { "epoch": 8.52, "grad_norm": 6.585667610168457, "learning_rate": 7.491236954730059e-07, "loss": 0.0237, "step": 866550 }, { "epoch": 8.52, "grad_norm": 1.471500039100647, "learning_rate": 7.489995730187574e-07, "loss": 0.0546, "step": 866575 }, { "epoch": 8.52, "grad_norm": 11.807602882385254, "learning_rate": 7.48875450564509e-07, "loss": 0.0212, "step": 866600 }, { "epoch": 8.52, "grad_norm": 1.6166460514068604, "learning_rate": 7.487513281102606e-07, "loss": 0.0385, "step": 866625 }, { "epoch": 8.52, "grad_norm": 3.5547380447387695, "learning_rate": 7.48627205656012e-07, "loss": 0.0364, "step": 866650 }, { "epoch": 8.52, "grad_norm": 0.4833303391933441, "learning_rate": 7.485030832017635e-07, "loss": 0.041, "step": 866675 }, { "epoch": 8.52, "grad_norm": 3.9534237384796143, "learning_rate": 7.483789607475152e-07, "loss": 0.0397, "step": 866700 }, { "epoch": 8.52, "grad_norm": 0.12160176038742065, "learning_rate": 7.482548382932667e-07, "loss": 0.0549, "step": 866725 }, { "epoch": 8.52, "grad_norm": 17.819467544555664, "learning_rate": 7.481307158390181e-07, "loss": 0.0223, "step": 866750 }, { "epoch": 8.52, "grad_norm": 0.40742799639701843, "learning_rate": 7.480065933847698e-07, "loss": 0.0327, "step": 866775 }, { "epoch": 8.52, "grad_norm": 3.1017823219299316, "learning_rate": 7.478824709305213e-07, "loss": 0.0212, "step": 866800 }, { "epoch": 8.52, "grad_norm": 0.10504434257745743, "learning_rate": 7.477583484762728e-07, "loss": 0.0498, "step": 866825 }, { "epoch": 8.52, "grad_norm": 18.046247482299805, "learning_rate": 7.476342260220242e-07, "loss": 0.0225, "step": 866850 }, { "epoch": 8.52, "grad_norm": 1.7619696855545044, "learning_rate": 7.475101035677759e-07, "loss": 0.0339, "step": 866875 }, { "epoch": 8.52, "grad_norm": 0.804804801940918, "learning_rate": 7.473859811135274e-07, "loss": 0.0212, "step": 866900 }, { "epoch": 8.52, "grad_norm": 4.663517951965332, "learning_rate": 7.472618586592789e-07, "loss": 0.028, "step": 866925 }, { "epoch": 8.52, "grad_norm": 5.566446304321289, "learning_rate": 7.471377362050306e-07, "loss": 0.0272, "step": 866950 }, { "epoch": 8.52, "grad_norm": 0.092622309923172, "learning_rate": 7.470136137507821e-07, "loss": 0.0239, "step": 866975 }, { "epoch": 8.52, "grad_norm": 12.305193901062012, "learning_rate": 7.468894912965335e-07, "loss": 0.038, "step": 867000 }, { "epoch": 8.52, "grad_norm": 1.2932043075561523, "learning_rate": 7.46765368842285e-07, "loss": 0.0348, "step": 867025 }, { "epoch": 8.52, "grad_norm": 10.773797988891602, "learning_rate": 7.466412463880367e-07, "loss": 0.0359, "step": 867050 }, { "epoch": 8.53, "grad_norm": 0.03419023007154465, "learning_rate": 7.465171239337882e-07, "loss": 0.0373, "step": 867075 }, { "epoch": 8.53, "grad_norm": 21.250017166137695, "learning_rate": 7.463930014795396e-07, "loss": 0.0394, "step": 867100 }, { "epoch": 8.53, "grad_norm": 6.1772565841674805, "learning_rate": 7.462688790252913e-07, "loss": 0.0198, "step": 867125 }, { "epoch": 8.53, "grad_norm": 26.728534698486328, "learning_rate": 7.461447565710428e-07, "loss": 0.0354, "step": 867150 }, { "epoch": 8.53, "grad_norm": 6.190487861633301, "learning_rate": 7.460206341167943e-07, "loss": 0.0293, "step": 867175 }, { "epoch": 8.53, "grad_norm": 1.4834108352661133, "learning_rate": 7.45896511662546e-07, "loss": 0.0242, "step": 867200 }, { "epoch": 8.53, "grad_norm": 0.0015573749551549554, "learning_rate": 7.457723892082974e-07, "loss": 0.0307, "step": 867225 }, { "epoch": 8.53, "grad_norm": 1.759181022644043, "learning_rate": 7.456482667540489e-07, "loss": 0.0127, "step": 867250 }, { "epoch": 8.53, "grad_norm": 0.15564848482608795, "learning_rate": 7.455241442998004e-07, "loss": 0.0351, "step": 867275 }, { "epoch": 8.53, "grad_norm": 2.223902940750122, "learning_rate": 7.454000218455521e-07, "loss": 0.0281, "step": 867300 }, { "epoch": 8.53, "grad_norm": 0.6384034156799316, "learning_rate": 7.452758993913035e-07, "loss": 0.0359, "step": 867325 }, { "epoch": 8.53, "grad_norm": 4.556700229644775, "learning_rate": 7.45151776937055e-07, "loss": 0.0144, "step": 867350 }, { "epoch": 8.53, "grad_norm": 0.013732561841607094, "learning_rate": 7.450276544828067e-07, "loss": 0.0346, "step": 867375 }, { "epoch": 8.53, "grad_norm": 1.0515120029449463, "learning_rate": 7.449035320285582e-07, "loss": 0.041, "step": 867400 }, { "epoch": 8.53, "grad_norm": 0.24484626948833466, "learning_rate": 7.447794095743096e-07, "loss": 0.0351, "step": 867425 }, { "epoch": 8.53, "grad_norm": 6.2728352546691895, "learning_rate": 7.446552871200612e-07, "loss": 0.0177, "step": 867450 }, { "epoch": 8.53, "grad_norm": 0.041455697268247604, "learning_rate": 7.445311646658128e-07, "loss": 0.0288, "step": 867475 }, { "epoch": 8.53, "grad_norm": 15.70629596710205, "learning_rate": 7.444070422115643e-07, "loss": 0.0404, "step": 867500 }, { "epoch": 8.53, "grad_norm": 0.0334162712097168, "learning_rate": 7.442829197573157e-07, "loss": 0.0476, "step": 867525 }, { "epoch": 8.53, "grad_norm": 7.681682109832764, "learning_rate": 7.441587973030674e-07, "loss": 0.0411, "step": 867550 }, { "epoch": 8.53, "grad_norm": 0.06395016610622406, "learning_rate": 7.440346748488189e-07, "loss": 0.0374, "step": 867575 }, { "epoch": 8.53, "grad_norm": 13.421501159667969, "learning_rate": 7.439105523945704e-07, "loss": 0.029, "step": 867600 }, { "epoch": 8.53, "grad_norm": 0.03030414693057537, "learning_rate": 7.437864299403221e-07, "loss": 0.0451, "step": 867625 }, { "epoch": 8.53, "grad_norm": 8.487759590148926, "learning_rate": 7.436623074860735e-07, "loss": 0.0168, "step": 867650 }, { "epoch": 8.53, "grad_norm": 0.1007842943072319, "learning_rate": 7.43538185031825e-07, "loss": 0.0514, "step": 867675 }, { "epoch": 8.53, "grad_norm": 3.5850701332092285, "learning_rate": 7.434140625775765e-07, "loss": 0.03, "step": 867700 }, { "epoch": 8.53, "grad_norm": 0.022987794131040573, "learning_rate": 7.432899401233282e-07, "loss": 0.0491, "step": 867725 }, { "epoch": 8.53, "grad_norm": 6.446277618408203, "learning_rate": 7.431658176690797e-07, "loss": 0.0132, "step": 867750 }, { "epoch": 8.53, "grad_norm": 1.3608793020248413, "learning_rate": 7.430416952148311e-07, "loss": 0.0209, "step": 867775 }, { "epoch": 8.53, "grad_norm": 3.495645046234131, "learning_rate": 7.429175727605828e-07, "loss": 0.0312, "step": 867800 }, { "epoch": 8.53, "grad_norm": 0.047014348208904266, "learning_rate": 7.427934503063343e-07, "loss": 0.0434, "step": 867825 }, { "epoch": 8.53, "grad_norm": 10.695406913757324, "learning_rate": 7.426693278520858e-07, "loss": 0.0137, "step": 867850 }, { "epoch": 8.53, "grad_norm": 4.2499260902404785, "learning_rate": 7.425452053978373e-07, "loss": 0.0245, "step": 867875 }, { "epoch": 8.53, "grad_norm": 2.3740859031677246, "learning_rate": 7.424210829435889e-07, "loss": 0.0415, "step": 867900 }, { "epoch": 8.53, "grad_norm": 0.03609552979469299, "learning_rate": 7.422969604893404e-07, "loss": 0.0353, "step": 867925 }, { "epoch": 8.53, "grad_norm": 0.8174365758895874, "learning_rate": 7.421728380350919e-07, "loss": 0.0364, "step": 867950 }, { "epoch": 8.53, "grad_norm": 1.5187935829162598, "learning_rate": 7.420487155808436e-07, "loss": 0.0349, "step": 867975 }, { "epoch": 8.53, "grad_norm": 1.861950159072876, "learning_rate": 7.41924593126595e-07, "loss": 0.029, "step": 868000 }, { "epoch": 8.53, "grad_norm": 0.14523862302303314, "learning_rate": 7.418004706723465e-07, "loss": 0.0303, "step": 868025 }, { "epoch": 8.53, "grad_norm": 9.496304512023926, "learning_rate": 7.416763482180982e-07, "loss": 0.0224, "step": 868050 }, { "epoch": 8.54, "grad_norm": 1.242021918296814, "learning_rate": 7.415522257638497e-07, "loss": 0.0449, "step": 868075 }, { "epoch": 8.54, "grad_norm": 7.333840370178223, "learning_rate": 7.414281033096011e-07, "loss": 0.0417, "step": 868100 }, { "epoch": 8.54, "grad_norm": 5.89125394821167, "learning_rate": 7.413039808553526e-07, "loss": 0.037, "step": 868125 }, { "epoch": 8.54, "grad_norm": 8.710247039794922, "learning_rate": 7.411798584011043e-07, "loss": 0.0231, "step": 868150 }, { "epoch": 8.54, "grad_norm": 0.03409235179424286, "learning_rate": 7.410557359468558e-07, "loss": 0.0502, "step": 868175 }, { "epoch": 8.54, "grad_norm": 12.209981918334961, "learning_rate": 7.409316134926072e-07, "loss": 0.0303, "step": 868200 }, { "epoch": 8.54, "grad_norm": 0.07623621821403503, "learning_rate": 7.408074910383589e-07, "loss": 0.0445, "step": 868225 }, { "epoch": 8.54, "grad_norm": 1.8212324380874634, "learning_rate": 7.406833685841104e-07, "loss": 0.0295, "step": 868250 }, { "epoch": 8.54, "grad_norm": 0.04993889480829239, "learning_rate": 7.405592461298619e-07, "loss": 0.0481, "step": 868275 }, { "epoch": 8.54, "grad_norm": 8.594220161437988, "learning_rate": 7.404351236756136e-07, "loss": 0.0475, "step": 868300 }, { "epoch": 8.54, "grad_norm": 0.126604825258255, "learning_rate": 7.40311001221365e-07, "loss": 0.0342, "step": 868325 }, { "epoch": 8.54, "grad_norm": 12.31800651550293, "learning_rate": 7.401868787671165e-07, "loss": 0.0203, "step": 868350 }, { "epoch": 8.54, "grad_norm": 1.1930946111679077, "learning_rate": 7.40062756312868e-07, "loss": 0.0492, "step": 868375 }, { "epoch": 8.54, "grad_norm": 7.871717929840088, "learning_rate": 7.399386338586197e-07, "loss": 0.0307, "step": 868400 }, { "epoch": 8.54, "grad_norm": 0.013782002963125706, "learning_rate": 7.398145114043712e-07, "loss": 0.0332, "step": 868425 }, { "epoch": 8.54, "grad_norm": 0.49748143553733826, "learning_rate": 7.396903889501226e-07, "loss": 0.0257, "step": 868450 }, { "epoch": 8.54, "grad_norm": 7.033641338348389, "learning_rate": 7.395662664958743e-07, "loss": 0.0298, "step": 868475 }, { "epoch": 8.54, "grad_norm": 5.771315574645996, "learning_rate": 7.394421440416258e-07, "loss": 0.0309, "step": 868500 }, { "epoch": 8.54, "grad_norm": 0.11893017590045929, "learning_rate": 7.393180215873773e-07, "loss": 0.0308, "step": 868525 }, { "epoch": 8.54, "grad_norm": 8.807169914245605, "learning_rate": 7.391938991331287e-07, "loss": 0.0183, "step": 868550 }, { "epoch": 8.54, "grad_norm": 0.24921733140945435, "learning_rate": 7.390747415770504e-07, "loss": 0.0307, "step": 868575 }, { "epoch": 8.54, "grad_norm": 6.1431074142456055, "learning_rate": 7.389506191228018e-07, "loss": 0.0175, "step": 868600 }, { "epoch": 8.54, "grad_norm": 2.285886764526367, "learning_rate": 7.388264966685534e-07, "loss": 0.0294, "step": 868625 }, { "epoch": 8.54, "grad_norm": 8.659818649291992, "learning_rate": 7.38702374214305e-07, "loss": 0.0239, "step": 868650 }, { "epoch": 8.54, "grad_norm": 3.295623302459717, "learning_rate": 7.385782517600565e-07, "loss": 0.0415, "step": 868675 }, { "epoch": 8.54, "grad_norm": 20.396240234375, "learning_rate": 7.38454129305808e-07, "loss": 0.054, "step": 868700 }, { "epoch": 8.54, "grad_norm": 3.8726532459259033, "learning_rate": 7.383300068515595e-07, "loss": 0.0299, "step": 868725 }, { "epoch": 8.54, "grad_norm": 5.35874605178833, "learning_rate": 7.382058843973111e-07, "loss": 0.0393, "step": 868750 }, { "epoch": 8.54, "grad_norm": 3.300628185272217, "learning_rate": 7.380817619430626e-07, "loss": 0.0281, "step": 868775 }, { "epoch": 8.54, "grad_norm": 18.76861000061035, "learning_rate": 7.379576394888141e-07, "loss": 0.0393, "step": 868800 }, { "epoch": 8.54, "grad_norm": 0.013339562341570854, "learning_rate": 7.378335170345657e-07, "loss": 0.0362, "step": 868825 }, { "epoch": 8.54, "grad_norm": 15.310297012329102, "learning_rate": 7.377093945803172e-07, "loss": 0.0247, "step": 868850 }, { "epoch": 8.54, "grad_norm": 0.17255498468875885, "learning_rate": 7.375852721260688e-07, "loss": 0.0304, "step": 868875 }, { "epoch": 8.54, "grad_norm": 9.29733657836914, "learning_rate": 7.374611496718202e-07, "loss": 0.0276, "step": 868900 }, { "epoch": 8.54, "grad_norm": 0.4958263635635376, "learning_rate": 7.373370272175718e-07, "loss": 0.0371, "step": 868925 }, { "epoch": 8.54, "grad_norm": 13.89753532409668, "learning_rate": 7.372129047633233e-07, "loss": 0.0243, "step": 868950 }, { "epoch": 8.54, "grad_norm": 0.021114632487297058, "learning_rate": 7.370887823090749e-07, "loss": 0.0312, "step": 868975 }, { "epoch": 8.54, "grad_norm": 21.353574752807617, "learning_rate": 7.369646598548265e-07, "loss": 0.0378, "step": 869000 }, { "epoch": 8.54, "grad_norm": 0.02567031793296337, "learning_rate": 7.368405374005779e-07, "loss": 0.0251, "step": 869025 }, { "epoch": 8.54, "grad_norm": 11.766837120056152, "learning_rate": 7.367164149463295e-07, "loss": 0.0245, "step": 869050 }, { "epoch": 8.54, "grad_norm": 0.04234279692173004, "learning_rate": 7.365922924920811e-07, "loss": 0.0324, "step": 869075 }, { "epoch": 8.55, "grad_norm": 0.4450433850288391, "learning_rate": 7.364681700378326e-07, "loss": 0.0307, "step": 869100 }, { "epoch": 8.55, "grad_norm": 0.04747321084141731, "learning_rate": 7.36344047583584e-07, "loss": 0.0282, "step": 869125 }, { "epoch": 8.55, "grad_norm": 11.533638000488281, "learning_rate": 7.362199251293356e-07, "loss": 0.0246, "step": 869150 }, { "epoch": 8.55, "grad_norm": 0.21573667228221893, "learning_rate": 7.360958026750872e-07, "loss": 0.042, "step": 869175 }, { "epoch": 8.55, "grad_norm": 12.294989585876465, "learning_rate": 7.359716802208387e-07, "loss": 0.0311, "step": 869200 }, { "epoch": 8.55, "grad_norm": 5.472894668579102, "learning_rate": 7.358475577665903e-07, "loss": 0.0267, "step": 869225 }, { "epoch": 8.55, "grad_norm": 10.30909252166748, "learning_rate": 7.357234353123419e-07, "loss": 0.0219, "step": 869250 }, { "epoch": 8.55, "grad_norm": 0.4796694219112396, "learning_rate": 7.355993128580933e-07, "loss": 0.0335, "step": 869275 }, { "epoch": 8.55, "grad_norm": 0.5405905842781067, "learning_rate": 7.354751904038449e-07, "loss": 0.0178, "step": 869300 }, { "epoch": 8.55, "grad_norm": 0.0769578367471695, "learning_rate": 7.353510679495964e-07, "loss": 0.0357, "step": 869325 }, { "epoch": 8.55, "grad_norm": 9.251306533813477, "learning_rate": 7.35226945495348e-07, "loss": 0.0558, "step": 869350 }, { "epoch": 8.55, "grad_norm": 0.036056723445653915, "learning_rate": 7.351028230410994e-07, "loss": 0.0404, "step": 869375 }, { "epoch": 8.55, "grad_norm": 7.618102550506592, "learning_rate": 7.34978700586851e-07, "loss": 0.0214, "step": 869400 }, { "epoch": 8.55, "grad_norm": 0.1195763424038887, "learning_rate": 7.348545781326026e-07, "loss": 0.0348, "step": 869425 }, { "epoch": 8.55, "grad_norm": 21.788185119628906, "learning_rate": 7.347304556783541e-07, "loss": 0.0293, "step": 869450 }, { "epoch": 8.55, "grad_norm": 0.018002646043896675, "learning_rate": 7.346063332241056e-07, "loss": 0.0229, "step": 869475 }, { "epoch": 8.55, "grad_norm": 2.027650833129883, "learning_rate": 7.344822107698572e-07, "loss": 0.0168, "step": 869500 }, { "epoch": 8.55, "grad_norm": 1.1331851482391357, "learning_rate": 7.343580883156087e-07, "loss": 0.0414, "step": 869525 }, { "epoch": 8.55, "grad_norm": 17.837963104248047, "learning_rate": 7.342339658613603e-07, "loss": 0.0323, "step": 869550 }, { "epoch": 8.55, "grad_norm": 0.08757351338863373, "learning_rate": 7.341098434071117e-07, "loss": 0.0346, "step": 869575 }, { "epoch": 8.55, "grad_norm": 10.902000427246094, "learning_rate": 7.339857209528633e-07, "loss": 0.0223, "step": 869600 }, { "epoch": 8.55, "grad_norm": 0.3457608222961426, "learning_rate": 7.338615984986148e-07, "loss": 0.0329, "step": 869625 }, { "epoch": 8.55, "grad_norm": 12.406974792480469, "learning_rate": 7.337374760443664e-07, "loss": 0.0379, "step": 869650 }, { "epoch": 8.55, "grad_norm": 0.22561298310756683, "learning_rate": 7.33613353590118e-07, "loss": 0.0444, "step": 869675 }, { "epoch": 8.55, "grad_norm": 3.1214733123779297, "learning_rate": 7.334892311358694e-07, "loss": 0.0227, "step": 869700 }, { "epoch": 8.55, "grad_norm": 2.674884796142578, "learning_rate": 7.33365108681621e-07, "loss": 0.0385, "step": 869725 }, { "epoch": 8.55, "grad_norm": 13.587008476257324, "learning_rate": 7.332409862273725e-07, "loss": 0.0313, "step": 869750 }, { "epoch": 8.55, "grad_norm": 0.030515223741531372, "learning_rate": 7.331168637731241e-07, "loss": 0.0322, "step": 869775 }, { "epoch": 8.55, "grad_norm": 6.321138858795166, "learning_rate": 7.329927413188755e-07, "loss": 0.0248, "step": 869800 }, { "epoch": 8.55, "grad_norm": 0.0204982440918684, "learning_rate": 7.328686188646271e-07, "loss": 0.0195, "step": 869825 }, { "epoch": 8.55, "grad_norm": 14.356658935546875, "learning_rate": 7.327444964103787e-07, "loss": 0.024, "step": 869850 }, { "epoch": 8.55, "grad_norm": 2.5511720180511475, "learning_rate": 7.326203739561302e-07, "loss": 0.0429, "step": 869875 }, { "epoch": 8.55, "grad_norm": 16.589941024780273, "learning_rate": 7.324962515018818e-07, "loss": 0.0236, "step": 869900 }, { "epoch": 8.55, "grad_norm": 0.0036000418476760387, "learning_rate": 7.323721290476334e-07, "loss": 0.0297, "step": 869925 }, { "epoch": 8.55, "grad_norm": 6.155008792877197, "learning_rate": 7.322480065933848e-07, "loss": 0.0163, "step": 869950 }, { "epoch": 8.55, "grad_norm": 0.17504015564918518, "learning_rate": 7.321238841391364e-07, "loss": 0.0355, "step": 869975 }, { "epoch": 8.55, "grad_norm": 5.7555832862854, "learning_rate": 7.319997616848879e-07, "loss": 0.0317, "step": 870000 }, { "epoch": 8.55, "grad_norm": 1.9583572149276733, "learning_rate": 7.318756392306395e-07, "loss": 0.0491, "step": 870025 }, { "epoch": 8.55, "grad_norm": 29.908472061157227, "learning_rate": 7.317515167763909e-07, "loss": 0.0344, "step": 870050 }, { "epoch": 8.55, "grad_norm": 0.021252254024147987, "learning_rate": 7.316273943221425e-07, "loss": 0.0344, "step": 870075 }, { "epoch": 8.55, "grad_norm": 9.818620681762695, "learning_rate": 7.315032718678941e-07, "loss": 0.0288, "step": 870100 }, { "epoch": 8.56, "grad_norm": 5.671024322509766, "learning_rate": 7.313791494136456e-07, "loss": 0.0342, "step": 870125 }, { "epoch": 8.56, "grad_norm": 12.036213874816895, "learning_rate": 7.312550269593971e-07, "loss": 0.032, "step": 870150 }, { "epoch": 8.56, "grad_norm": 0.015111715532839298, "learning_rate": 7.311309045051486e-07, "loss": 0.0323, "step": 870175 }, { "epoch": 8.56, "grad_norm": 26.2373104095459, "learning_rate": 7.310067820509002e-07, "loss": 0.042, "step": 870200 }, { "epoch": 8.56, "grad_norm": 0.022311735898256302, "learning_rate": 7.308826595966518e-07, "loss": 0.0472, "step": 870225 }, { "epoch": 8.56, "grad_norm": 1.2044696807861328, "learning_rate": 7.307585371424032e-07, "loss": 0.0255, "step": 870250 }, { "epoch": 8.56, "grad_norm": 8.377535820007324, "learning_rate": 7.306344146881548e-07, "loss": 0.0246, "step": 870275 }, { "epoch": 8.56, "grad_norm": 12.482341766357422, "learning_rate": 7.305102922339063e-07, "loss": 0.0599, "step": 870300 }, { "epoch": 8.56, "grad_norm": 3.499568462371826, "learning_rate": 7.303861697796579e-07, "loss": 0.0289, "step": 870325 }, { "epoch": 8.56, "grad_norm": 10.12027645111084, "learning_rate": 7.302620473254095e-07, "loss": 0.0223, "step": 870350 }, { "epoch": 8.56, "grad_norm": 0.012666609138250351, "learning_rate": 7.301379248711609e-07, "loss": 0.0327, "step": 870375 }, { "epoch": 8.56, "grad_norm": 15.523747444152832, "learning_rate": 7.300138024169125e-07, "loss": 0.0357, "step": 870400 }, { "epoch": 8.56, "grad_norm": 0.10367809236049652, "learning_rate": 7.29889679962664e-07, "loss": 0.031, "step": 870425 }, { "epoch": 8.56, "grad_norm": 8.715137481689453, "learning_rate": 7.297655575084156e-07, "loss": 0.0272, "step": 870450 }, { "epoch": 8.56, "grad_norm": 0.1031588688492775, "learning_rate": 7.29641435054167e-07, "loss": 0.0395, "step": 870475 }, { "epoch": 8.56, "grad_norm": 2.7207820415496826, "learning_rate": 7.295173125999186e-07, "loss": 0.0132, "step": 870500 }, { "epoch": 8.56, "grad_norm": 11.755016326904297, "learning_rate": 7.293931901456702e-07, "loss": 0.044, "step": 870525 }, { "epoch": 8.56, "grad_norm": 2.2722649574279785, "learning_rate": 7.292690676914217e-07, "loss": 0.0436, "step": 870550 }, { "epoch": 8.56, "grad_norm": 3.485236167907715, "learning_rate": 7.291449452371733e-07, "loss": 0.0368, "step": 870575 }, { "epoch": 8.56, "grad_norm": 6.9400739669799805, "learning_rate": 7.290208227829247e-07, "loss": 0.022, "step": 870600 }, { "epoch": 8.56, "grad_norm": 0.022149110212922096, "learning_rate": 7.288967003286763e-07, "loss": 0.0243, "step": 870625 }, { "epoch": 8.56, "grad_norm": 7.942352294921875, "learning_rate": 7.287725778744279e-07, "loss": 0.0424, "step": 870650 }, { "epoch": 8.56, "grad_norm": 1.7874696254730225, "learning_rate": 7.286484554201794e-07, "loss": 0.0376, "step": 870675 }, { "epoch": 8.56, "grad_norm": 0.7001263499259949, "learning_rate": 7.28524332965931e-07, "loss": 0.0179, "step": 870700 }, { "epoch": 8.56, "grad_norm": 0.019906379282474518, "learning_rate": 7.284002105116824e-07, "loss": 0.0379, "step": 870725 }, { "epoch": 8.56, "grad_norm": 11.500153541564941, "learning_rate": 7.28276088057434e-07, "loss": 0.0296, "step": 870750 }, { "epoch": 8.56, "grad_norm": 0.4102313816547394, "learning_rate": 7.281519656031856e-07, "loss": 0.0311, "step": 870775 }, { "epoch": 8.56, "grad_norm": 8.29351806640625, "learning_rate": 7.280278431489371e-07, "loss": 0.0207, "step": 870800 }, { "epoch": 8.56, "grad_norm": 0.007065135519951582, "learning_rate": 7.279037206946886e-07, "loss": 0.0351, "step": 870825 }, { "epoch": 8.56, "grad_norm": 7.776304244995117, "learning_rate": 7.277795982404401e-07, "loss": 0.0373, "step": 870850 }, { "epoch": 8.56, "grad_norm": 0.008239684626460075, "learning_rate": 7.276554757861917e-07, "loss": 0.0295, "step": 870875 }, { "epoch": 8.56, "grad_norm": 10.700825691223145, "learning_rate": 7.275313533319433e-07, "loss": 0.0214, "step": 870900 }, { "epoch": 8.56, "grad_norm": 0.053503766655921936, "learning_rate": 7.274072308776947e-07, "loss": 0.0259, "step": 870925 }, { "epoch": 8.56, "grad_norm": 1.7492191791534424, "learning_rate": 7.272831084234463e-07, "loss": 0.0119, "step": 870950 }, { "epoch": 8.56, "grad_norm": 5.427031993865967, "learning_rate": 7.271589859691978e-07, "loss": 0.0323, "step": 870975 }, { "epoch": 8.56, "grad_norm": 17.20595359802246, "learning_rate": 7.270348635149494e-07, "loss": 0.0322, "step": 871000 }, { "epoch": 8.56, "grad_norm": 1.8254388570785522, "learning_rate": 7.269107410607008e-07, "loss": 0.0276, "step": 871025 }, { "epoch": 8.56, "grad_norm": 14.983502388000488, "learning_rate": 7.267866186064524e-07, "loss": 0.0255, "step": 871050 }, { "epoch": 8.56, "grad_norm": 0.5625624060630798, "learning_rate": 7.26662496152204e-07, "loss": 0.0324, "step": 871075 }, { "epoch": 8.56, "grad_norm": 7.426483631134033, "learning_rate": 7.265383736979555e-07, "loss": 0.0177, "step": 871100 }, { "epoch": 8.57, "grad_norm": 0.027750903740525246, "learning_rate": 7.264142512437071e-07, "loss": 0.0387, "step": 871125 }, { "epoch": 8.57, "grad_norm": 12.658072471618652, "learning_rate": 7.262901287894585e-07, "loss": 0.0288, "step": 871150 }, { "epoch": 8.57, "grad_norm": 1.549834966659546, "learning_rate": 7.2617097123338e-07, "loss": 0.0257, "step": 871175 }, { "epoch": 8.57, "grad_norm": 3.0423035621643066, "learning_rate": 7.260468487791315e-07, "loss": 0.0303, "step": 871200 }, { "epoch": 8.57, "grad_norm": 0.2296871840953827, "learning_rate": 7.259227263248832e-07, "loss": 0.0282, "step": 871225 }, { "epoch": 8.57, "grad_norm": 8.21951675415039, "learning_rate": 7.257986038706347e-07, "loss": 0.0248, "step": 871250 }, { "epoch": 8.57, "grad_norm": 0.06201046705245972, "learning_rate": 7.256744814163861e-07, "loss": 0.0322, "step": 871275 }, { "epoch": 8.57, "grad_norm": 6.868315696716309, "learning_rate": 7.255503589621377e-07, "loss": 0.0263, "step": 871300 }, { "epoch": 8.57, "grad_norm": 0.007535781245678663, "learning_rate": 7.254262365078893e-07, "loss": 0.0219, "step": 871325 }, { "epoch": 8.57, "grad_norm": 6.610786437988281, "learning_rate": 7.253021140536408e-07, "loss": 0.0449, "step": 871350 }, { "epoch": 8.57, "grad_norm": 0.06636420637369156, "learning_rate": 7.251779915993924e-07, "loss": 0.0383, "step": 871375 }, { "epoch": 8.57, "grad_norm": 2.324064016342163, "learning_rate": 7.25053869145144e-07, "loss": 0.0276, "step": 871400 }, { "epoch": 8.57, "grad_norm": 0.29059019684791565, "learning_rate": 7.249297466908954e-07, "loss": 0.0324, "step": 871425 }, { "epoch": 8.57, "grad_norm": 12.839764595031738, "learning_rate": 7.248056242366469e-07, "loss": 0.0303, "step": 871450 }, { "epoch": 8.57, "grad_norm": 0.15879510343074799, "learning_rate": 7.246815017823986e-07, "loss": 0.0253, "step": 871475 }, { "epoch": 8.57, "grad_norm": 4.699973106384277, "learning_rate": 7.245573793281501e-07, "loss": 0.0367, "step": 871500 }, { "epoch": 8.57, "grad_norm": 0.278874009847641, "learning_rate": 7.244332568739015e-07, "loss": 0.0173, "step": 871525 }, { "epoch": 8.57, "grad_norm": 7.821004867553711, "learning_rate": 7.243091344196531e-07, "loss": 0.0323, "step": 871550 }, { "epoch": 8.57, "grad_norm": 0.07485684007406235, "learning_rate": 7.241850119654047e-07, "loss": 0.0236, "step": 871575 }, { "epoch": 8.57, "grad_norm": 19.1282958984375, "learning_rate": 7.240608895111562e-07, "loss": 0.025, "step": 871600 }, { "epoch": 8.57, "grad_norm": 0.5245554447174072, "learning_rate": 7.239367670569076e-07, "loss": 0.04, "step": 871625 }, { "epoch": 8.57, "grad_norm": 8.072650909423828, "learning_rate": 7.238126446026593e-07, "loss": 0.0268, "step": 871650 }, { "epoch": 8.57, "grad_norm": 0.003817468648776412, "learning_rate": 7.236885221484108e-07, "loss": 0.0341, "step": 871675 }, { "epoch": 8.57, "grad_norm": 17.525718688964844, "learning_rate": 7.235643996941623e-07, "loss": 0.0408, "step": 871700 }, { "epoch": 8.57, "grad_norm": 2.47999906539917, "learning_rate": 7.23440277239914e-07, "loss": 0.0442, "step": 871725 }, { "epoch": 8.57, "grad_norm": 5.678764820098877, "learning_rate": 7.233161547856654e-07, "loss": 0.0213, "step": 871750 }, { "epoch": 8.57, "grad_norm": 0.19747455418109894, "learning_rate": 7.231920323314169e-07, "loss": 0.0289, "step": 871775 }, { "epoch": 8.57, "grad_norm": 1.310678482055664, "learning_rate": 7.230679098771685e-07, "loss": 0.0284, "step": 871800 }, { "epoch": 8.57, "grad_norm": 0.2421237975358963, "learning_rate": 7.229437874229201e-07, "loss": 0.0313, "step": 871825 }, { "epoch": 8.57, "grad_norm": 1.0949211120605469, "learning_rate": 7.228196649686715e-07, "loss": 0.0307, "step": 871850 }, { "epoch": 8.57, "grad_norm": 0.024851474910974503, "learning_rate": 7.22695542514423e-07, "loss": 0.0293, "step": 871875 }, { "epoch": 8.57, "grad_norm": 26.97984504699707, "learning_rate": 7.225714200601747e-07, "loss": 0.0223, "step": 871900 }, { "epoch": 8.57, "grad_norm": 0.01455006841570139, "learning_rate": 7.224472976059262e-07, "loss": 0.0241, "step": 871925 }, { "epoch": 8.57, "grad_norm": 8.828962326049805, "learning_rate": 7.223231751516776e-07, "loss": 0.0283, "step": 871950 }, { "epoch": 8.57, "grad_norm": 0.30811822414398193, "learning_rate": 7.221990526974292e-07, "loss": 0.0404, "step": 871975 }, { "epoch": 8.57, "grad_norm": 19.70794677734375, "learning_rate": 7.220749302431808e-07, "loss": 0.0323, "step": 872000 }, { "epoch": 8.57, "grad_norm": 0.35599905252456665, "learning_rate": 7.219508077889323e-07, "loss": 0.032, "step": 872025 }, { "epoch": 8.57, "grad_norm": 1.836093783378601, "learning_rate": 7.218266853346838e-07, "loss": 0.0295, "step": 872050 }, { "epoch": 8.57, "grad_norm": 0.30982205271720886, "learning_rate": 7.217025628804355e-07, "loss": 0.0296, "step": 872075 }, { "epoch": 8.57, "grad_norm": 3.630293607711792, "learning_rate": 7.215784404261869e-07, "loss": 0.022, "step": 872100 }, { "epoch": 8.57, "grad_norm": 2.497377395629883, "learning_rate": 7.214543179719384e-07, "loss": 0.0358, "step": 872125 }, { "epoch": 8.58, "grad_norm": 0.5675708651542664, "learning_rate": 7.213301955176901e-07, "loss": 0.0353, "step": 872150 }, { "epoch": 8.58, "grad_norm": 0.17984536290168762, "learning_rate": 7.212060730634416e-07, "loss": 0.0516, "step": 872175 }, { "epoch": 8.58, "grad_norm": 12.708142280578613, "learning_rate": 7.21081950609193e-07, "loss": 0.0202, "step": 872200 }, { "epoch": 8.58, "grad_norm": 0.11210139095783234, "learning_rate": 7.209578281549446e-07, "loss": 0.021, "step": 872225 }, { "epoch": 8.58, "grad_norm": 3.8438632488250732, "learning_rate": 7.208337057006962e-07, "loss": 0.0209, "step": 872250 }, { "epoch": 8.58, "grad_norm": 0.3266391158103943, "learning_rate": 7.207095832464477e-07, "loss": 0.0367, "step": 872275 }, { "epoch": 8.58, "grad_norm": 13.433152198791504, "learning_rate": 7.205854607921991e-07, "loss": 0.0221, "step": 872300 }, { "epoch": 8.58, "grad_norm": 0.46287256479263306, "learning_rate": 7.204613383379508e-07, "loss": 0.0219, "step": 872325 }, { "epoch": 8.58, "grad_norm": 12.7125244140625, "learning_rate": 7.203372158837023e-07, "loss": 0.0363, "step": 872350 }, { "epoch": 8.58, "grad_norm": 0.020721564069390297, "learning_rate": 7.202130934294538e-07, "loss": 0.0353, "step": 872375 }, { "epoch": 8.58, "grad_norm": 0.40224966406822205, "learning_rate": 7.200889709752055e-07, "loss": 0.0276, "step": 872400 }, { "epoch": 8.58, "grad_norm": 9.289292335510254, "learning_rate": 7.199648485209569e-07, "loss": 0.0228, "step": 872425 }, { "epoch": 8.58, "grad_norm": 7.845274925231934, "learning_rate": 7.198407260667084e-07, "loss": 0.0395, "step": 872450 }, { "epoch": 8.58, "grad_norm": 0.026588890701532364, "learning_rate": 7.197166036124599e-07, "loss": 0.042, "step": 872475 }, { "epoch": 8.58, "grad_norm": 9.050623893737793, "learning_rate": 7.195924811582116e-07, "loss": 0.025, "step": 872500 }, { "epoch": 8.58, "grad_norm": 0.06278370320796967, "learning_rate": 7.19468358703963e-07, "loss": 0.032, "step": 872525 }, { "epoch": 8.58, "grad_norm": 1.1684117317199707, "learning_rate": 7.193442362497145e-07, "loss": 0.0203, "step": 872550 }, { "epoch": 8.58, "grad_norm": 0.10529080778360367, "learning_rate": 7.192201137954662e-07, "loss": 0.0477, "step": 872575 }, { "epoch": 8.58, "grad_norm": 3.0781404972076416, "learning_rate": 7.190959913412177e-07, "loss": 0.0173, "step": 872600 }, { "epoch": 8.58, "grad_norm": 0.0030321944504976273, "learning_rate": 7.189718688869691e-07, "loss": 0.0286, "step": 872625 }, { "epoch": 8.58, "grad_norm": 12.967002868652344, "learning_rate": 7.188477464327207e-07, "loss": 0.0211, "step": 872650 }, { "epoch": 8.58, "grad_norm": 0.7442033886909485, "learning_rate": 7.187236239784723e-07, "loss": 0.0355, "step": 872675 }, { "epoch": 8.58, "grad_norm": 15.213640213012695, "learning_rate": 7.185995015242238e-07, "loss": 0.0263, "step": 872700 }, { "epoch": 8.58, "grad_norm": 0.6221243739128113, "learning_rate": 7.184753790699753e-07, "loss": 0.0334, "step": 872725 }, { "epoch": 8.58, "grad_norm": 14.656651496887207, "learning_rate": 7.183512566157269e-07, "loss": 0.0409, "step": 872750 }, { "epoch": 8.58, "grad_norm": 0.019916802644729614, "learning_rate": 7.182271341614784e-07, "loss": 0.0478, "step": 872775 }, { "epoch": 8.58, "grad_norm": 4.307934284210205, "learning_rate": 7.181030117072299e-07, "loss": 0.0358, "step": 872800 }, { "epoch": 8.58, "grad_norm": 5.0346174240112305, "learning_rate": 7.179788892529816e-07, "loss": 0.0386, "step": 872825 }, { "epoch": 8.58, "grad_norm": 11.635668754577637, "learning_rate": 7.178547667987331e-07, "loss": 0.0367, "step": 872850 }, { "epoch": 8.58, "grad_norm": 0.05614598095417023, "learning_rate": 7.177306443444845e-07, "loss": 0.0171, "step": 872875 }, { "epoch": 8.58, "grad_norm": 6.861333847045898, "learning_rate": 7.17606521890236e-07, "loss": 0.0212, "step": 872900 }, { "epoch": 8.58, "grad_norm": 1.760459303855896, "learning_rate": 7.174823994359877e-07, "loss": 0.0478, "step": 872925 }, { "epoch": 8.58, "grad_norm": 1.2967418432235718, "learning_rate": 7.173582769817392e-07, "loss": 0.0214, "step": 872950 }, { "epoch": 8.58, "grad_norm": 0.4217764735221863, "learning_rate": 7.172341545274906e-07, "loss": 0.0172, "step": 872975 }, { "epoch": 8.58, "grad_norm": 0.13585031032562256, "learning_rate": 7.171100320732423e-07, "loss": 0.0425, "step": 873000 }, { "epoch": 8.58, "grad_norm": 0.00414988724514842, "learning_rate": 7.169859096189938e-07, "loss": 0.0292, "step": 873025 }, { "epoch": 8.58, "grad_norm": 6.743955612182617, "learning_rate": 7.168617871647453e-07, "loss": 0.0363, "step": 873050 }, { "epoch": 8.58, "grad_norm": 0.13039302825927734, "learning_rate": 7.16737664710497e-07, "loss": 0.0438, "step": 873075 }, { "epoch": 8.58, "grad_norm": 5.915351867675781, "learning_rate": 7.166135422562484e-07, "loss": 0.0257, "step": 873100 }, { "epoch": 8.58, "grad_norm": 0.009920350275933743, "learning_rate": 7.164894198019999e-07, "loss": 0.0342, "step": 873125 }, { "epoch": 8.58, "grad_norm": 10.251635551452637, "learning_rate": 7.163652973477514e-07, "loss": 0.0305, "step": 873150 }, { "epoch": 8.59, "grad_norm": 0.7450110912322998, "learning_rate": 7.162411748935031e-07, "loss": 0.0237, "step": 873175 }, { "epoch": 8.59, "grad_norm": 9.800491333007812, "learning_rate": 7.161170524392545e-07, "loss": 0.0255, "step": 873200 }, { "epoch": 8.59, "grad_norm": 0.0020144290756434202, "learning_rate": 7.15992929985006e-07, "loss": 0.0272, "step": 873225 }, { "epoch": 8.59, "grad_norm": 1.5825061798095703, "learning_rate": 7.158688075307577e-07, "loss": 0.0147, "step": 873250 }, { "epoch": 8.59, "grad_norm": 0.036904677748680115, "learning_rate": 7.157446850765092e-07, "loss": 0.035, "step": 873275 }, { "epoch": 8.59, "grad_norm": 12.16539478302002, "learning_rate": 7.156205626222606e-07, "loss": 0.0327, "step": 873300 }, { "epoch": 8.59, "grad_norm": 0.6567522883415222, "learning_rate": 7.154964401680121e-07, "loss": 0.0308, "step": 873325 }, { "epoch": 8.59, "grad_norm": 7.856033802032471, "learning_rate": 7.153723177137638e-07, "loss": 0.0369, "step": 873350 }, { "epoch": 8.59, "grad_norm": 16.702722549438477, "learning_rate": 7.152481952595153e-07, "loss": 0.0329, "step": 873375 }, { "epoch": 8.59, "grad_norm": 8.40709400177002, "learning_rate": 7.151240728052667e-07, "loss": 0.0365, "step": 873400 }, { "epoch": 8.59, "grad_norm": 0.03495590016245842, "learning_rate": 7.149999503510184e-07, "loss": 0.0345, "step": 873425 }, { "epoch": 8.59, "grad_norm": 2.2397279739379883, "learning_rate": 7.148758278967699e-07, "loss": 0.0237, "step": 873450 }, { "epoch": 8.59, "grad_norm": 0.01468002237379551, "learning_rate": 7.147517054425214e-07, "loss": 0.032, "step": 873475 }, { "epoch": 8.59, "grad_norm": 1.256370186805725, "learning_rate": 7.14627582988273e-07, "loss": 0.0216, "step": 873500 }, { "epoch": 8.59, "grad_norm": 0.042693570256233215, "learning_rate": 7.145034605340246e-07, "loss": 0.0643, "step": 873525 }, { "epoch": 8.59, "grad_norm": 24.04730987548828, "learning_rate": 7.14379338079776e-07, "loss": 0.0302, "step": 873550 }, { "epoch": 8.59, "grad_norm": 1.8013938665390015, "learning_rate": 7.142552156255275e-07, "loss": 0.0421, "step": 873575 }, { "epoch": 8.59, "grad_norm": 11.831767082214355, "learning_rate": 7.141310931712792e-07, "loss": 0.0276, "step": 873600 }, { "epoch": 8.59, "grad_norm": 0.5387730598449707, "learning_rate": 7.140069707170307e-07, "loss": 0.039, "step": 873625 }, { "epoch": 8.59, "grad_norm": 10.89974594116211, "learning_rate": 7.138828482627821e-07, "loss": 0.0234, "step": 873650 }, { "epoch": 8.59, "grad_norm": 4.240230560302734, "learning_rate": 7.137587258085338e-07, "loss": 0.0249, "step": 873675 }, { "epoch": 8.59, "grad_norm": 3.6437575817108154, "learning_rate": 7.136346033542853e-07, "loss": 0.0221, "step": 873700 }, { "epoch": 8.59, "grad_norm": 0.742210865020752, "learning_rate": 7.135154457982067e-07, "loss": 0.05, "step": 873725 }, { "epoch": 8.59, "grad_norm": 1.6040390729904175, "learning_rate": 7.133913233439583e-07, "loss": 0.0245, "step": 873750 }, { "epoch": 8.59, "grad_norm": 0.058839235454797745, "learning_rate": 7.132672008897099e-07, "loss": 0.0326, "step": 873775 }, { "epoch": 8.59, "grad_norm": 11.21317195892334, "learning_rate": 7.131430784354613e-07, "loss": 0.0387, "step": 873800 }, { "epoch": 8.59, "grad_norm": 0.03323158621788025, "learning_rate": 7.130189559812128e-07, "loss": 0.0397, "step": 873825 }, { "epoch": 8.59, "grad_norm": 8.64929485321045, "learning_rate": 7.128948335269645e-07, "loss": 0.0299, "step": 873850 }, { "epoch": 8.59, "grad_norm": 7.358468532562256, "learning_rate": 7.12770711072716e-07, "loss": 0.0323, "step": 873875 }, { "epoch": 8.59, "grad_norm": 11.753159523010254, "learning_rate": 7.126465886184674e-07, "loss": 0.0336, "step": 873900 }, { "epoch": 8.59, "grad_norm": 1.5278156995773315, "learning_rate": 7.12522466164219e-07, "loss": 0.0268, "step": 873925 }, { "epoch": 8.59, "grad_norm": 4.901194095611572, "learning_rate": 7.123983437099706e-07, "loss": 0.0196, "step": 873950 }, { "epoch": 8.59, "grad_norm": 1.1300963163375854, "learning_rate": 7.122742212557221e-07, "loss": 0.0389, "step": 873975 }, { "epoch": 8.59, "grad_norm": 23.38512420654297, "learning_rate": 7.121500988014736e-07, "loss": 0.0185, "step": 874000 }, { "epoch": 8.59, "grad_norm": 1.6856513023376465, "learning_rate": 7.120259763472252e-07, "loss": 0.0258, "step": 874025 }, { "epoch": 8.59, "grad_norm": 0.3405667245388031, "learning_rate": 7.119018538929767e-07, "loss": 0.0315, "step": 874050 }, { "epoch": 8.59, "grad_norm": 0.01774025522172451, "learning_rate": 7.117777314387282e-07, "loss": 0.0259, "step": 874075 }, { "epoch": 8.59, "grad_norm": 5.270956993103027, "learning_rate": 7.116536089844799e-07, "loss": 0.0227, "step": 874100 }, { "epoch": 8.59, "grad_norm": 0.05845702812075615, "learning_rate": 7.115294865302313e-07, "loss": 0.0329, "step": 874125 }, { "epoch": 8.59, "grad_norm": 4.798624038696289, "learning_rate": 7.114053640759828e-07, "loss": 0.0222, "step": 874150 }, { "epoch": 8.6, "grad_norm": 3.1789326667785645, "learning_rate": 7.112812416217344e-07, "loss": 0.0287, "step": 874175 }, { "epoch": 8.6, "grad_norm": 27.620561599731445, "learning_rate": 7.11157119167486e-07, "loss": 0.0279, "step": 874200 }, { "epoch": 8.6, "grad_norm": 0.10237950086593628, "learning_rate": 7.110329967132374e-07, "loss": 0.0262, "step": 874225 }, { "epoch": 8.6, "grad_norm": 17.777231216430664, "learning_rate": 7.10908874258989e-07, "loss": 0.0393, "step": 874250 }, { "epoch": 8.6, "grad_norm": 3.8380353450775146, "learning_rate": 7.107847518047406e-07, "loss": 0.0491, "step": 874275 }, { "epoch": 8.6, "grad_norm": 5.846807956695557, "learning_rate": 7.106606293504921e-07, "loss": 0.0122, "step": 874300 }, { "epoch": 8.6, "grad_norm": 0.03304142877459526, "learning_rate": 7.105365068962436e-07, "loss": 0.0286, "step": 874325 }, { "epoch": 8.6, "grad_norm": 4.236408710479736, "learning_rate": 7.104123844419951e-07, "loss": 0.0259, "step": 874350 }, { "epoch": 8.6, "grad_norm": 6.504848957061768, "learning_rate": 7.102882619877467e-07, "loss": 0.0312, "step": 874375 }, { "epoch": 8.6, "grad_norm": 6.248172283172607, "learning_rate": 7.101641395334982e-07, "loss": 0.021, "step": 874400 }, { "epoch": 8.6, "grad_norm": 4.923890113830566, "learning_rate": 7.100400170792498e-07, "loss": 0.0238, "step": 874425 }, { "epoch": 8.6, "grad_norm": 9.130536079406738, "learning_rate": 7.099158946250014e-07, "loss": 0.0235, "step": 874450 }, { "epoch": 8.6, "grad_norm": 0.11611615866422653, "learning_rate": 7.097917721707528e-07, "loss": 0.029, "step": 874475 }, { "epoch": 8.6, "grad_norm": 11.874526977539062, "learning_rate": 7.096676497165043e-07, "loss": 0.0186, "step": 874500 }, { "epoch": 8.6, "grad_norm": 0.362495094537735, "learning_rate": 7.09543527262256e-07, "loss": 0.0252, "step": 874525 }, { "epoch": 8.6, "grad_norm": 3.470595598220825, "learning_rate": 7.094194048080075e-07, "loss": 0.0219, "step": 874550 }, { "epoch": 8.6, "grad_norm": 16.36283302307129, "learning_rate": 7.092952823537589e-07, "loss": 0.0426, "step": 874575 }, { "epoch": 8.6, "grad_norm": 10.117645263671875, "learning_rate": 7.091711598995105e-07, "loss": 0.0388, "step": 874600 }, { "epoch": 8.6, "grad_norm": 0.6935284733772278, "learning_rate": 7.090470374452621e-07, "loss": 0.0475, "step": 874625 }, { "epoch": 8.6, "grad_norm": 8.889236450195312, "learning_rate": 7.089229149910136e-07, "loss": 0.0213, "step": 874650 }, { "epoch": 8.6, "grad_norm": 0.10072018206119537, "learning_rate": 7.08798792536765e-07, "loss": 0.0211, "step": 874675 }, { "epoch": 8.6, "grad_norm": 18.604402542114258, "learning_rate": 7.086746700825167e-07, "loss": 0.0345, "step": 874700 }, { "epoch": 8.6, "grad_norm": 7.315588474273682, "learning_rate": 7.085505476282682e-07, "loss": 0.0315, "step": 874725 }, { "epoch": 8.6, "grad_norm": 0.8063819408416748, "learning_rate": 7.084264251740197e-07, "loss": 0.0096, "step": 874750 }, { "epoch": 8.6, "grad_norm": 4.20232629776001, "learning_rate": 7.083023027197712e-07, "loss": 0.0221, "step": 874775 }, { "epoch": 8.6, "grad_norm": 8.82461166381836, "learning_rate": 7.081781802655228e-07, "loss": 0.0245, "step": 874800 }, { "epoch": 8.6, "grad_norm": 1.2937343120574951, "learning_rate": 7.080540578112743e-07, "loss": 0.0365, "step": 874825 }, { "epoch": 8.6, "grad_norm": 7.148874759674072, "learning_rate": 7.079299353570259e-07, "loss": 0.0136, "step": 874850 }, { "epoch": 8.6, "grad_norm": 3.387712240219116, "learning_rate": 7.078058129027775e-07, "loss": 0.0196, "step": 874875 }, { "epoch": 8.6, "grad_norm": 15.499173164367676, "learning_rate": 7.076816904485289e-07, "loss": 0.0264, "step": 874900 }, { "epoch": 8.6, "grad_norm": 0.9840319752693176, "learning_rate": 7.075575679942804e-07, "loss": 0.0423, "step": 874925 }, { "epoch": 8.6, "grad_norm": 7.361309051513672, "learning_rate": 7.074334455400321e-07, "loss": 0.0258, "step": 874950 }, { "epoch": 8.6, "grad_norm": 0.6602857708930969, "learning_rate": 7.073093230857836e-07, "loss": 0.029, "step": 874975 }, { "epoch": 8.6, "grad_norm": 3.584879159927368, "learning_rate": 7.071852006315351e-07, "loss": 0.0186, "step": 875000 }, { "epoch": 8.6, "grad_norm": 3.3984029293060303, "learning_rate": 7.070610781772866e-07, "loss": 0.0329, "step": 875025 }, { "epoch": 8.6, "grad_norm": 0.18340452015399933, "learning_rate": 7.069369557230382e-07, "loss": 0.0235, "step": 875050 }, { "epoch": 8.6, "grad_norm": 1.6579657793045044, "learning_rate": 7.068128332687897e-07, "loss": 0.0254, "step": 875075 }, { "epoch": 8.6, "grad_norm": 1.514155626296997, "learning_rate": 7.066887108145413e-07, "loss": 0.0223, "step": 875100 }, { "epoch": 8.6, "grad_norm": 0.018143020570278168, "learning_rate": 7.065645883602929e-07, "loss": 0.0511, "step": 875125 }, { "epoch": 8.6, "grad_norm": 12.135505676269531, "learning_rate": 7.064404659060443e-07, "loss": 0.0226, "step": 875150 }, { "epoch": 8.6, "grad_norm": 0.08384819328784943, "learning_rate": 7.063163434517958e-07, "loss": 0.0732, "step": 875175 }, { "epoch": 8.61, "grad_norm": 1.7709083557128906, "learning_rate": 7.061922209975474e-07, "loss": 0.0336, "step": 875200 }, { "epoch": 8.61, "grad_norm": 0.017994089052081108, "learning_rate": 7.06068098543299e-07, "loss": 0.0415, "step": 875225 }, { "epoch": 8.61, "grad_norm": 10.087215423583984, "learning_rate": 7.059439760890504e-07, "loss": 0.0312, "step": 875250 }, { "epoch": 8.61, "grad_norm": 2.4614343643188477, "learning_rate": 7.05819853634802e-07, "loss": 0.0335, "step": 875275 }, { "epoch": 8.61, "grad_norm": 0.23021510243415833, "learning_rate": 7.056957311805536e-07, "loss": 0.031, "step": 875300 }, { "epoch": 8.61, "grad_norm": 2.891756772994995, "learning_rate": 7.055716087263051e-07, "loss": 0.0335, "step": 875325 }, { "epoch": 8.61, "grad_norm": 5.736605644226074, "learning_rate": 7.054474862720565e-07, "loss": 0.0273, "step": 875350 }, { "epoch": 8.61, "grad_norm": 1.3750077486038208, "learning_rate": 7.053233638178082e-07, "loss": 0.0336, "step": 875375 }, { "epoch": 8.61, "grad_norm": 15.035515785217285, "learning_rate": 7.051992413635597e-07, "loss": 0.0412, "step": 875400 }, { "epoch": 8.61, "grad_norm": 0.024196099489927292, "learning_rate": 7.050751189093112e-07, "loss": 0.0313, "step": 875425 }, { "epoch": 8.61, "grad_norm": 12.013777732849121, "learning_rate": 7.049509964550627e-07, "loss": 0.0195, "step": 875450 }, { "epoch": 8.61, "grad_norm": 0.10413146018981934, "learning_rate": 7.048268740008143e-07, "loss": 0.0257, "step": 875475 }, { "epoch": 8.61, "grad_norm": 6.975729465484619, "learning_rate": 7.047027515465658e-07, "loss": 0.0276, "step": 875500 }, { "epoch": 8.61, "grad_norm": 0.5938860774040222, "learning_rate": 7.045786290923174e-07, "loss": 0.0279, "step": 875525 }, { "epoch": 8.61, "grad_norm": 11.198249816894531, "learning_rate": 7.04454506638069e-07, "loss": 0.0427, "step": 875550 }, { "epoch": 8.61, "grad_norm": 0.03927440941333771, "learning_rate": 7.043303841838204e-07, "loss": 0.0194, "step": 875575 }, { "epoch": 8.61, "grad_norm": 0.7849190831184387, "learning_rate": 7.042062617295719e-07, "loss": 0.0189, "step": 875600 }, { "epoch": 8.61, "grad_norm": 0.03756394237279892, "learning_rate": 7.040821392753235e-07, "loss": 0.0382, "step": 875625 }, { "epoch": 8.61, "grad_norm": 2.7517802715301514, "learning_rate": 7.039580168210751e-07, "loss": 0.0244, "step": 875650 }, { "epoch": 8.61, "grad_norm": 0.03630442172288895, "learning_rate": 7.038338943668266e-07, "loss": 0.0278, "step": 875675 }, { "epoch": 8.61, "grad_norm": 3.061378002166748, "learning_rate": 7.037097719125781e-07, "loss": 0.0431, "step": 875700 }, { "epoch": 8.61, "grad_norm": 0.025691892951726913, "learning_rate": 7.035856494583297e-07, "loss": 0.0242, "step": 875725 }, { "epoch": 8.61, "grad_norm": 16.450336456298828, "learning_rate": 7.034615270040812e-07, "loss": 0.026, "step": 875750 }, { "epoch": 8.61, "grad_norm": 0.05177023634314537, "learning_rate": 7.033374045498328e-07, "loss": 0.0388, "step": 875775 }, { "epoch": 8.61, "grad_norm": 4.066999912261963, "learning_rate": 7.032132820955844e-07, "loss": 0.0404, "step": 875800 }, { "epoch": 8.61, "grad_norm": 0.02860049344599247, "learning_rate": 7.030891596413358e-07, "loss": 0.0397, "step": 875825 }, { "epoch": 8.61, "grad_norm": 8.068706512451172, "learning_rate": 7.029650371870873e-07, "loss": 0.0251, "step": 875850 }, { "epoch": 8.61, "grad_norm": 0.22366181015968323, "learning_rate": 7.028409147328389e-07, "loss": 0.0358, "step": 875875 }, { "epoch": 8.61, "grad_norm": 10.881448745727539, "learning_rate": 7.027167922785905e-07, "loss": 0.0407, "step": 875900 }, { "epoch": 8.61, "grad_norm": 0.02555997669696808, "learning_rate": 7.025926698243419e-07, "loss": 0.0313, "step": 875925 }, { "epoch": 8.61, "grad_norm": 6.520327091217041, "learning_rate": 7.024685473700935e-07, "loss": 0.0334, "step": 875950 }, { "epoch": 8.61, "grad_norm": 0.19204837083816528, "learning_rate": 7.023444249158451e-07, "loss": 0.0381, "step": 875975 }, { "epoch": 8.61, "grad_norm": 1.6382800340652466, "learning_rate": 7.022203024615966e-07, "loss": 0.0113, "step": 876000 }, { "epoch": 8.61, "grad_norm": 0.6112721562385559, "learning_rate": 7.02096180007348e-07, "loss": 0.0267, "step": 876025 }, { "epoch": 8.61, "grad_norm": 4.266909599304199, "learning_rate": 7.019720575530996e-07, "loss": 0.0335, "step": 876050 }, { "epoch": 8.61, "grad_norm": 0.14893664419651031, "learning_rate": 7.018479350988512e-07, "loss": 0.0332, "step": 876075 }, { "epoch": 8.61, "grad_norm": 13.558588981628418, "learning_rate": 7.017238126446027e-07, "loss": 0.024, "step": 876100 }, { "epoch": 8.61, "grad_norm": 1.5317702293395996, "learning_rate": 7.015996901903542e-07, "loss": 0.0264, "step": 876125 }, { "epoch": 8.61, "grad_norm": 9.238582611083984, "learning_rate": 7.014755677361058e-07, "loss": 0.0255, "step": 876150 }, { "epoch": 8.61, "grad_norm": 0.013583643361926079, "learning_rate": 7.013514452818573e-07, "loss": 0.0269, "step": 876175 }, { "epoch": 8.61, "grad_norm": 6.94692325592041, "learning_rate": 7.012273228276089e-07, "loss": 0.0378, "step": 876200 }, { "epoch": 8.62, "grad_norm": 0.04315528646111488, "learning_rate": 7.011032003733605e-07, "loss": 0.0358, "step": 876225 }, { "epoch": 8.62, "grad_norm": 0.6439812779426575, "learning_rate": 7.009790779191119e-07, "loss": 0.0418, "step": 876250 }, { "epoch": 8.62, "grad_norm": 0.14919114112854004, "learning_rate": 7.008599203630334e-07, "loss": 0.0347, "step": 876275 }, { "epoch": 8.62, "grad_norm": 9.590107917785645, "learning_rate": 7.007357979087849e-07, "loss": 0.0185, "step": 876300 }, { "epoch": 8.62, "grad_norm": 2.019386053085327, "learning_rate": 7.006116754545365e-07, "loss": 0.0471, "step": 876325 }, { "epoch": 8.62, "grad_norm": 0.5214092135429382, "learning_rate": 7.004875530002881e-07, "loss": 0.0169, "step": 876350 }, { "epoch": 8.62, "grad_norm": 4.5078654289245605, "learning_rate": 7.003634305460395e-07, "loss": 0.037, "step": 876375 }, { "epoch": 8.62, "grad_norm": 8.197613716125488, "learning_rate": 7.00239308091791e-07, "loss": 0.0284, "step": 876400 }, { "epoch": 8.62, "grad_norm": 0.21569719910621643, "learning_rate": 7.001151856375426e-07, "loss": 0.0593, "step": 876425 }, { "epoch": 8.62, "grad_norm": 18.835693359375, "learning_rate": 6.999910631832942e-07, "loss": 0.0221, "step": 876450 }, { "epoch": 8.62, "grad_norm": 2.0134572982788086, "learning_rate": 6.998669407290457e-07, "loss": 0.0258, "step": 876475 }, { "epoch": 8.62, "grad_norm": 7.340190410614014, "learning_rate": 6.997428182747973e-07, "loss": 0.0189, "step": 876500 }, { "epoch": 8.62, "grad_norm": 0.0389440692961216, "learning_rate": 6.996186958205488e-07, "loss": 0.0281, "step": 876525 }, { "epoch": 8.62, "grad_norm": 15.906100273132324, "learning_rate": 6.994945733663003e-07, "loss": 0.0309, "step": 876550 }, { "epoch": 8.62, "grad_norm": 1.3398181200027466, "learning_rate": 6.993704509120519e-07, "loss": 0.0347, "step": 876575 }, { "epoch": 8.62, "grad_norm": 15.44657039642334, "learning_rate": 6.992463284578035e-07, "loss": 0.0381, "step": 876600 }, { "epoch": 8.62, "grad_norm": 0.28107184171676636, "learning_rate": 6.991222060035549e-07, "loss": 0.0516, "step": 876625 }, { "epoch": 8.62, "grad_norm": 4.083735466003418, "learning_rate": 6.989980835493064e-07, "loss": 0.0153, "step": 876650 }, { "epoch": 8.62, "grad_norm": 1.0063470602035522, "learning_rate": 6.98873961095058e-07, "loss": 0.0255, "step": 876675 }, { "epoch": 8.62, "grad_norm": 3.108977794647217, "learning_rate": 6.987498386408096e-07, "loss": 0.0192, "step": 876700 }, { "epoch": 8.62, "grad_norm": 4.075193405151367, "learning_rate": 6.98625716186561e-07, "loss": 0.0419, "step": 876725 }, { "epoch": 8.62, "grad_norm": 4.308524131774902, "learning_rate": 6.985015937323126e-07, "loss": 0.0214, "step": 876750 }, { "epoch": 8.62, "grad_norm": 0.02067854069173336, "learning_rate": 6.983774712780642e-07, "loss": 0.0284, "step": 876775 }, { "epoch": 8.62, "grad_norm": 10.919048309326172, "learning_rate": 6.982533488238157e-07, "loss": 0.0251, "step": 876800 }, { "epoch": 8.62, "grad_norm": 0.4116986393928528, "learning_rate": 6.981292263695673e-07, "loss": 0.0342, "step": 876825 }, { "epoch": 8.62, "grad_norm": 1.4380853176116943, "learning_rate": 6.980051039153187e-07, "loss": 0.0238, "step": 876850 }, { "epoch": 8.62, "grad_norm": 0.07580700516700745, "learning_rate": 6.978809814610703e-07, "loss": 0.0416, "step": 876875 }, { "epoch": 8.62, "grad_norm": 21.31458282470703, "learning_rate": 6.977568590068218e-07, "loss": 0.0307, "step": 876900 }, { "epoch": 8.62, "grad_norm": 0.16949768364429474, "learning_rate": 6.976327365525734e-07, "loss": 0.0389, "step": 876925 }, { "epoch": 8.62, "grad_norm": 9.871606826782227, "learning_rate": 6.975086140983249e-07, "loss": 0.0353, "step": 876950 }, { "epoch": 8.62, "grad_norm": 0.012254261411726475, "learning_rate": 6.973844916440764e-07, "loss": 0.0366, "step": 876975 }, { "epoch": 8.62, "grad_norm": 7.762451171875, "learning_rate": 6.97260369189828e-07, "loss": 0.0285, "step": 877000 }, { "epoch": 8.62, "grad_norm": 9.180586814880371, "learning_rate": 6.971362467355796e-07, "loss": 0.0367, "step": 877025 }, { "epoch": 8.62, "grad_norm": 2.765277147293091, "learning_rate": 6.97012124281331e-07, "loss": 0.0491, "step": 877050 }, { "epoch": 8.62, "grad_norm": 8.059185981750488, "learning_rate": 6.968880018270825e-07, "loss": 0.0267, "step": 877075 }, { "epoch": 8.62, "grad_norm": 13.756922721862793, "learning_rate": 6.967638793728341e-07, "loss": 0.0363, "step": 877100 }, { "epoch": 8.62, "grad_norm": 0.010728719644248486, "learning_rate": 6.966397569185857e-07, "loss": 0.0395, "step": 877125 }, { "epoch": 8.62, "grad_norm": 3.948863983154297, "learning_rate": 6.965156344643372e-07, "loss": 0.0176, "step": 877150 }, { "epoch": 8.62, "grad_norm": 0.07211426645517349, "learning_rate": 6.963915120100888e-07, "loss": 0.0369, "step": 877175 }, { "epoch": 8.62, "grad_norm": 0.5290741324424744, "learning_rate": 6.962673895558403e-07, "loss": 0.0239, "step": 877200 }, { "epoch": 8.63, "grad_norm": 0.05602540820837021, "learning_rate": 6.961432671015918e-07, "loss": 0.0223, "step": 877225 }, { "epoch": 8.63, "grad_norm": 14.148092269897461, "learning_rate": 6.960191446473434e-07, "loss": 0.0201, "step": 877250 }, { "epoch": 8.63, "grad_norm": 1.9736162424087524, "learning_rate": 6.95895022193095e-07, "loss": 0.0443, "step": 877275 }, { "epoch": 8.63, "grad_norm": 6.360560417175293, "learning_rate": 6.957708997388464e-07, "loss": 0.0274, "step": 877300 }, { "epoch": 8.63, "grad_norm": 2.027128219604492, "learning_rate": 6.956467772845979e-07, "loss": 0.0273, "step": 877325 }, { "epoch": 8.63, "grad_norm": 0.8896231651306152, "learning_rate": 6.955226548303495e-07, "loss": 0.0196, "step": 877350 }, { "epoch": 8.63, "grad_norm": 1.4868775606155396, "learning_rate": 6.953985323761011e-07, "loss": 0.0267, "step": 877375 }, { "epoch": 8.63, "grad_norm": 1.777031421661377, "learning_rate": 6.952744099218525e-07, "loss": 0.0333, "step": 877400 }, { "epoch": 8.63, "grad_norm": 0.005823035258799791, "learning_rate": 6.951502874676041e-07, "loss": 0.029, "step": 877425 }, { "epoch": 8.63, "grad_norm": 10.17708683013916, "learning_rate": 6.950261650133557e-07, "loss": 0.0226, "step": 877450 }, { "epoch": 8.63, "grad_norm": 0.03759384900331497, "learning_rate": 6.949020425591072e-07, "loss": 0.0248, "step": 877475 }, { "epoch": 8.63, "grad_norm": 14.59036922454834, "learning_rate": 6.947779201048586e-07, "loss": 0.0241, "step": 877500 }, { "epoch": 8.63, "grad_norm": 4.956711292266846, "learning_rate": 6.946537976506102e-07, "loss": 0.0298, "step": 877525 }, { "epoch": 8.63, "grad_norm": 34.58108901977539, "learning_rate": 6.945296751963618e-07, "loss": 0.023, "step": 877550 }, { "epoch": 8.63, "grad_norm": 0.02821594476699829, "learning_rate": 6.944055527421133e-07, "loss": 0.0365, "step": 877575 }, { "epoch": 8.63, "grad_norm": 13.737408638000488, "learning_rate": 6.942814302878649e-07, "loss": 0.029, "step": 877600 }, { "epoch": 8.63, "grad_norm": 21.84223747253418, "learning_rate": 6.941573078336164e-07, "loss": 0.0293, "step": 877625 }, { "epoch": 8.63, "grad_norm": 15.352027893066406, "learning_rate": 6.940331853793679e-07, "loss": 0.0222, "step": 877650 }, { "epoch": 8.63, "grad_norm": 0.023167883977293968, "learning_rate": 6.939090629251195e-07, "loss": 0.0248, "step": 877675 }, { "epoch": 8.63, "grad_norm": 6.965211868286133, "learning_rate": 6.93784940470871e-07, "loss": 0.0201, "step": 877700 }, { "epoch": 8.63, "grad_norm": 0.020771946758031845, "learning_rate": 6.936608180166225e-07, "loss": 0.0232, "step": 877725 }, { "epoch": 8.63, "grad_norm": 12.708184242248535, "learning_rate": 6.93536695562374e-07, "loss": 0.0185, "step": 877750 }, { "epoch": 8.63, "grad_norm": 0.006846046540886164, "learning_rate": 6.934125731081256e-07, "loss": 0.0334, "step": 877775 }, { "epoch": 8.63, "grad_norm": 27.603641510009766, "learning_rate": 6.932884506538772e-07, "loss": 0.0235, "step": 877800 }, { "epoch": 8.63, "grad_norm": 0.010131454095244408, "learning_rate": 6.931643281996287e-07, "loss": 0.0286, "step": 877825 }, { "epoch": 8.63, "grad_norm": 13.842782974243164, "learning_rate": 6.930402057453803e-07, "loss": 0.0277, "step": 877850 }, { "epoch": 8.63, "grad_norm": 0.09524575620889664, "learning_rate": 6.929160832911318e-07, "loss": 0.0234, "step": 877875 }, { "epoch": 8.63, "grad_norm": 10.628891944885254, "learning_rate": 6.927919608368833e-07, "loss": 0.0272, "step": 877900 }, { "epoch": 8.63, "grad_norm": 0.26680582761764526, "learning_rate": 6.926678383826348e-07, "loss": 0.0258, "step": 877925 }, { "epoch": 8.63, "grad_norm": 11.139817237854004, "learning_rate": 6.925437159283864e-07, "loss": 0.0429, "step": 877950 }, { "epoch": 8.63, "grad_norm": 0.534537672996521, "learning_rate": 6.924195934741379e-07, "loss": 0.0165, "step": 877975 }, { "epoch": 8.63, "grad_norm": 13.863204002380371, "learning_rate": 6.922954710198894e-07, "loss": 0.0364, "step": 878000 }, { "epoch": 8.63, "grad_norm": 1.5703649520874023, "learning_rate": 6.92171348565641e-07, "loss": 0.0372, "step": 878025 }, { "epoch": 8.63, "grad_norm": 10.628235816955566, "learning_rate": 6.920472261113926e-07, "loss": 0.0208, "step": 878050 }, { "epoch": 8.63, "grad_norm": 0.08839895576238632, "learning_rate": 6.91923103657144e-07, "loss": 0.0357, "step": 878075 }, { "epoch": 8.63, "grad_norm": 7.0539093017578125, "learning_rate": 6.917989812028956e-07, "loss": 0.0259, "step": 878100 }, { "epoch": 8.63, "grad_norm": 0.14030763506889343, "learning_rate": 6.916748587486472e-07, "loss": 0.0242, "step": 878125 }, { "epoch": 8.63, "grad_norm": 14.815411567687988, "learning_rate": 6.915507362943987e-07, "loss": 0.0222, "step": 878150 }, { "epoch": 8.63, "grad_norm": 0.6565508842468262, "learning_rate": 6.914266138401501e-07, "loss": 0.0349, "step": 878175 }, { "epoch": 8.63, "grad_norm": 3.3968958854675293, "learning_rate": 6.913024913859017e-07, "loss": 0.0288, "step": 878200 }, { "epoch": 8.63, "grad_norm": 1.1967543363571167, "learning_rate": 6.911783689316533e-07, "loss": 0.0252, "step": 878225 }, { "epoch": 8.64, "grad_norm": 12.638317108154297, "learning_rate": 6.910542464774048e-07, "loss": 0.0168, "step": 878250 }, { "epoch": 8.64, "grad_norm": 0.22810211777687073, "learning_rate": 6.909301240231564e-07, "loss": 0.0384, "step": 878275 }, { "epoch": 8.64, "grad_norm": 0.8805326223373413, "learning_rate": 6.908060015689079e-07, "loss": 0.0285, "step": 878300 }, { "epoch": 8.64, "grad_norm": 2.6384692192077637, "learning_rate": 6.906818791146594e-07, "loss": 0.0484, "step": 878325 }, { "epoch": 8.64, "grad_norm": 7.6231889724731445, "learning_rate": 6.905577566604109e-07, "loss": 0.0254, "step": 878350 }, { "epoch": 8.64, "grad_norm": 7.421876907348633, "learning_rate": 6.904336342061625e-07, "loss": 0.0628, "step": 878375 }, { "epoch": 8.64, "grad_norm": 6.819211483001709, "learning_rate": 6.90309511751914e-07, "loss": 0.0243, "step": 878400 }, { "epoch": 8.64, "grad_norm": 0.04191851243376732, "learning_rate": 6.901853892976655e-07, "loss": 0.0235, "step": 878425 }, { "epoch": 8.64, "grad_norm": 10.471870422363281, "learning_rate": 6.900612668434171e-07, "loss": 0.0362, "step": 878450 }, { "epoch": 8.64, "grad_norm": 0.12050255388021469, "learning_rate": 6.899371443891687e-07, "loss": 0.0258, "step": 878475 }, { "epoch": 8.64, "grad_norm": 1.3628138303756714, "learning_rate": 6.898130219349202e-07, "loss": 0.0251, "step": 878500 }, { "epoch": 8.64, "grad_norm": 0.008287354372441769, "learning_rate": 6.896888994806717e-07, "loss": 0.0342, "step": 878525 }, { "epoch": 8.64, "grad_norm": 2.6372549533843994, "learning_rate": 6.895647770264233e-07, "loss": 0.0233, "step": 878550 }, { "epoch": 8.64, "grad_norm": 0.040225423872470856, "learning_rate": 6.894456194703447e-07, "loss": 0.0287, "step": 878575 }, { "epoch": 8.64, "grad_norm": 11.88508129119873, "learning_rate": 6.893214970160962e-07, "loss": 0.0478, "step": 878600 }, { "epoch": 8.64, "grad_norm": 3.3340682983398438, "learning_rate": 6.891973745618479e-07, "loss": 0.0231, "step": 878625 }, { "epoch": 8.64, "grad_norm": 0.5749115943908691, "learning_rate": 6.890732521075994e-07, "loss": 0.0257, "step": 878650 }, { "epoch": 8.64, "grad_norm": 1.682191252708435, "learning_rate": 6.889491296533508e-07, "loss": 0.0255, "step": 878675 }, { "epoch": 8.64, "grad_norm": 8.769791603088379, "learning_rate": 6.888250071991023e-07, "loss": 0.021, "step": 878700 }, { "epoch": 8.64, "grad_norm": 0.00708476547151804, "learning_rate": 6.88700884744854e-07, "loss": 0.0375, "step": 878725 }, { "epoch": 8.64, "grad_norm": 10.350752830505371, "learning_rate": 6.885767622906055e-07, "loss": 0.0289, "step": 878750 }, { "epoch": 8.64, "grad_norm": 2.68841814994812, "learning_rate": 6.884526398363569e-07, "loss": 0.0282, "step": 878775 }, { "epoch": 8.64, "grad_norm": 1.297088384628296, "learning_rate": 6.883285173821086e-07, "loss": 0.0236, "step": 878800 }, { "epoch": 8.64, "grad_norm": 0.023174576461315155, "learning_rate": 6.882043949278601e-07, "loss": 0.0344, "step": 878825 }, { "epoch": 8.64, "grad_norm": 8.14665699005127, "learning_rate": 6.880802724736116e-07, "loss": 0.0316, "step": 878850 }, { "epoch": 8.64, "grad_norm": 0.19284148514270782, "learning_rate": 6.879561500193633e-07, "loss": 0.039, "step": 878875 }, { "epoch": 8.64, "grad_norm": 1.1193382740020752, "learning_rate": 6.878320275651147e-07, "loss": 0.025, "step": 878900 }, { "epoch": 8.64, "grad_norm": 4.325319766998291, "learning_rate": 6.877079051108662e-07, "loss": 0.0261, "step": 878925 }, { "epoch": 8.64, "grad_norm": 14.210312843322754, "learning_rate": 6.875837826566177e-07, "loss": 0.03, "step": 878950 }, { "epoch": 8.64, "grad_norm": 1.0664836168289185, "learning_rate": 6.874596602023694e-07, "loss": 0.0401, "step": 878975 }, { "epoch": 8.64, "grad_norm": 12.922880172729492, "learning_rate": 6.873355377481208e-07, "loss": 0.0243, "step": 879000 }, { "epoch": 8.64, "grad_norm": 0.4022707939147949, "learning_rate": 6.872114152938723e-07, "loss": 0.0414, "step": 879025 }, { "epoch": 8.64, "grad_norm": 7.261491298675537, "learning_rate": 6.87087292839624e-07, "loss": 0.0124, "step": 879050 }, { "epoch": 8.64, "grad_norm": 0.012951205484569073, "learning_rate": 6.869631703853755e-07, "loss": 0.0424, "step": 879075 }, { "epoch": 8.64, "grad_norm": 15.396158218383789, "learning_rate": 6.868390479311269e-07, "loss": 0.031, "step": 879100 }, { "epoch": 8.64, "grad_norm": 0.2502777576446533, "learning_rate": 6.867149254768784e-07, "loss": 0.0428, "step": 879125 }, { "epoch": 8.64, "grad_norm": 10.85425853729248, "learning_rate": 6.865908030226301e-07, "loss": 0.0404, "step": 879150 }, { "epoch": 8.64, "grad_norm": 0.9543578028678894, "learning_rate": 6.864666805683816e-07, "loss": 0.035, "step": 879175 }, { "epoch": 8.64, "grad_norm": 10.571099281311035, "learning_rate": 6.86342558114133e-07, "loss": 0.0226, "step": 879200 }, { "epoch": 8.64, "grad_norm": 0.014783586375415325, "learning_rate": 6.862184356598847e-07, "loss": 0.0414, "step": 879225 }, { "epoch": 8.64, "grad_norm": 28.639862060546875, "learning_rate": 6.860943132056362e-07, "loss": 0.0257, "step": 879250 }, { "epoch": 8.65, "grad_norm": 0.08390069752931595, "learning_rate": 6.859701907513877e-07, "loss": 0.0287, "step": 879275 }, { "epoch": 8.65, "grad_norm": 16.534303665161133, "learning_rate": 6.858460682971394e-07, "loss": 0.0179, "step": 879300 }, { "epoch": 8.65, "grad_norm": 9.59675407409668, "learning_rate": 6.857219458428909e-07, "loss": 0.0485, "step": 879325 }, { "epoch": 8.65, "grad_norm": 17.511018753051758, "learning_rate": 6.855978233886423e-07, "loss": 0.0304, "step": 879350 }, { "epoch": 8.65, "grad_norm": 4.053983211517334, "learning_rate": 6.854737009343938e-07, "loss": 0.0358, "step": 879375 }, { "epoch": 8.65, "grad_norm": 1.566074252128601, "learning_rate": 6.853495784801455e-07, "loss": 0.0136, "step": 879400 }, { "epoch": 8.65, "grad_norm": 0.27726414799690247, "learning_rate": 6.85225456025897e-07, "loss": 0.0363, "step": 879425 }, { "epoch": 8.65, "grad_norm": 12.083405494689941, "learning_rate": 6.851013335716484e-07, "loss": 0.0195, "step": 879450 }, { "epoch": 8.65, "grad_norm": 0.2076312154531479, "learning_rate": 6.849772111174001e-07, "loss": 0.0277, "step": 879475 }, { "epoch": 8.65, "grad_norm": 18.131771087646484, "learning_rate": 6.848530886631516e-07, "loss": 0.0203, "step": 879500 }, { "epoch": 8.65, "grad_norm": 0.42097344994544983, "learning_rate": 6.847289662089031e-07, "loss": 0.0337, "step": 879525 }, { "epoch": 8.65, "grad_norm": 10.91218090057373, "learning_rate": 6.846048437546545e-07, "loss": 0.0171, "step": 879550 }, { "epoch": 8.65, "grad_norm": 0.05775200203061104, "learning_rate": 6.844807213004062e-07, "loss": 0.0386, "step": 879575 }, { "epoch": 8.65, "grad_norm": 1.1776970624923706, "learning_rate": 6.843565988461577e-07, "loss": 0.026, "step": 879600 }, { "epoch": 8.65, "grad_norm": 0.03422649949789047, "learning_rate": 6.842324763919092e-07, "loss": 0.0357, "step": 879625 }, { "epoch": 8.65, "grad_norm": 9.328051567077637, "learning_rate": 6.841083539376609e-07, "loss": 0.0335, "step": 879650 }, { "epoch": 8.65, "grad_norm": 0.34622904658317566, "learning_rate": 6.839842314834123e-07, "loss": 0.0599, "step": 879675 }, { "epoch": 8.65, "grad_norm": 10.341567993164062, "learning_rate": 6.838601090291638e-07, "loss": 0.0528, "step": 879700 }, { "epoch": 8.65, "grad_norm": 0.1057986319065094, "learning_rate": 6.837359865749155e-07, "loss": 0.0268, "step": 879725 }, { "epoch": 8.65, "grad_norm": 11.526270866394043, "learning_rate": 6.83611864120667e-07, "loss": 0.0256, "step": 879750 }, { "epoch": 8.65, "grad_norm": 0.005422875750809908, "learning_rate": 6.834877416664184e-07, "loss": 0.0379, "step": 879775 }, { "epoch": 8.65, "grad_norm": 2.167929172515869, "learning_rate": 6.833636192121699e-07, "loss": 0.019, "step": 879800 }, { "epoch": 8.65, "grad_norm": 0.006994600873440504, "learning_rate": 6.832394967579216e-07, "loss": 0.0337, "step": 879825 }, { "epoch": 8.65, "grad_norm": 0.953396737575531, "learning_rate": 6.831153743036731e-07, "loss": 0.0228, "step": 879850 }, { "epoch": 8.65, "grad_norm": 4.24962043762207, "learning_rate": 6.829912518494245e-07, "loss": 0.0331, "step": 879875 }, { "epoch": 8.65, "grad_norm": 5.4795823097229, "learning_rate": 6.828671293951762e-07, "loss": 0.0216, "step": 879900 }, { "epoch": 8.65, "grad_norm": 0.7036945819854736, "learning_rate": 6.827430069409277e-07, "loss": 0.0385, "step": 879925 }, { "epoch": 8.65, "grad_norm": 16.3989315032959, "learning_rate": 6.826188844866792e-07, "loss": 0.0314, "step": 879950 }, { "epoch": 8.65, "grad_norm": 0.09204331040382385, "learning_rate": 6.824947620324306e-07, "loss": 0.0266, "step": 879975 }, { "epoch": 8.65, "grad_norm": 14.049995422363281, "learning_rate": 6.823706395781823e-07, "loss": 0.0204, "step": 880000 }, { "epoch": 8.65, "eval_loss": 0.914923369884491, "eval_runtime": 6059.5466, "eval_samples_per_second": 1.562, "eval_steps_per_second": 0.195, "eval_wer": 0.11078027425481234, "step": 880000 }, { "epoch": 8.65, "grad_norm": 0.20387816429138184, "learning_rate": 6.822465171239338e-07, "loss": 0.0206, "step": 880025 }, { "epoch": 8.65, "grad_norm": 9.197921752929688, "learning_rate": 6.821223946696853e-07, "loss": 0.0324, "step": 880050 }, { "epoch": 8.65, "grad_norm": 18.292829513549805, "learning_rate": 6.81998272215437e-07, "loss": 0.0228, "step": 880075 }, { "epoch": 8.65, "grad_norm": 18.183866500854492, "learning_rate": 6.818741497611885e-07, "loss": 0.0451, "step": 880100 }, { "epoch": 8.65, "grad_norm": 0.02790023572742939, "learning_rate": 6.817500273069399e-07, "loss": 0.043, "step": 880125 }, { "epoch": 8.65, "grad_norm": 2.130099058151245, "learning_rate": 6.816259048526916e-07, "loss": 0.0472, "step": 880150 }, { "epoch": 8.65, "grad_norm": 4.723196029663086, "learning_rate": 6.815017823984431e-07, "loss": 0.0321, "step": 880175 }, { "epoch": 8.65, "grad_norm": 11.059800148010254, "learning_rate": 6.813776599441946e-07, "loss": 0.0293, "step": 880200 }, { "epoch": 8.65, "grad_norm": 0.23982921242713928, "learning_rate": 6.81253537489946e-07, "loss": 0.0344, "step": 880225 }, { "epoch": 8.65, "grad_norm": 1.1676145792007446, "learning_rate": 6.811294150356977e-07, "loss": 0.0344, "step": 880250 }, { "epoch": 8.66, "grad_norm": 0.004178628791123629, "learning_rate": 6.810052925814492e-07, "loss": 0.0489, "step": 880275 }, { "epoch": 8.66, "grad_norm": 16.512462615966797, "learning_rate": 6.808811701272007e-07, "loss": 0.0275, "step": 880300 }, { "epoch": 8.66, "grad_norm": 3.8782665729522705, "learning_rate": 6.807570476729524e-07, "loss": 0.0485, "step": 880325 }, { "epoch": 8.66, "grad_norm": 8.038484573364258, "learning_rate": 6.806329252187038e-07, "loss": 0.0219, "step": 880350 }, { "epoch": 8.66, "grad_norm": 0.4775925576686859, "learning_rate": 6.805088027644553e-07, "loss": 0.0257, "step": 880375 }, { "epoch": 8.66, "grad_norm": 1.043399691581726, "learning_rate": 6.803846803102069e-07, "loss": 0.0178, "step": 880400 }, { "epoch": 8.66, "grad_norm": 0.05146437883377075, "learning_rate": 6.802605578559585e-07, "loss": 0.0266, "step": 880425 }, { "epoch": 8.66, "grad_norm": 35.39128112792969, "learning_rate": 6.801364354017099e-07, "loss": 0.0258, "step": 880450 }, { "epoch": 8.66, "grad_norm": 0.2116457223892212, "learning_rate": 6.800123129474614e-07, "loss": 0.0173, "step": 880475 }, { "epoch": 8.66, "grad_norm": 8.9991455078125, "learning_rate": 6.798881904932131e-07, "loss": 0.0302, "step": 880500 }, { "epoch": 8.66, "grad_norm": 0.49778640270233154, "learning_rate": 6.797640680389646e-07, "loss": 0.025, "step": 880525 }, { "epoch": 8.66, "grad_norm": 0.20645590126514435, "learning_rate": 6.79639945584716e-07, "loss": 0.0284, "step": 880550 }, { "epoch": 8.66, "grad_norm": 0.8209651708602905, "learning_rate": 6.795158231304677e-07, "loss": 0.0344, "step": 880575 }, { "epoch": 8.66, "grad_norm": 2.5386016368865967, "learning_rate": 6.793917006762192e-07, "loss": 0.0252, "step": 880600 }, { "epoch": 8.66, "grad_norm": 0.0034594822209328413, "learning_rate": 6.792675782219707e-07, "loss": 0.0408, "step": 880625 }, { "epoch": 8.66, "grad_norm": 0.5232276320457458, "learning_rate": 6.791434557677221e-07, "loss": 0.0184, "step": 880650 }, { "epoch": 8.66, "grad_norm": 5.805713176727295, "learning_rate": 6.790193333134738e-07, "loss": 0.0391, "step": 880675 }, { "epoch": 8.66, "grad_norm": 3.2050886154174805, "learning_rate": 6.788952108592253e-07, "loss": 0.0393, "step": 880700 }, { "epoch": 8.66, "grad_norm": 1.5961097478866577, "learning_rate": 6.787710884049768e-07, "loss": 0.0328, "step": 880725 }, { "epoch": 8.66, "grad_norm": 6.528176307678223, "learning_rate": 6.786469659507285e-07, "loss": 0.0199, "step": 880750 }, { "epoch": 8.66, "grad_norm": 0.014242413453757763, "learning_rate": 6.7852284349648e-07, "loss": 0.0276, "step": 880775 }, { "epoch": 8.66, "grad_norm": 10.803194999694824, "learning_rate": 6.783987210422314e-07, "loss": 0.0209, "step": 880800 }, { "epoch": 8.66, "grad_norm": 1.474308967590332, "learning_rate": 6.782745985879831e-07, "loss": 0.0175, "step": 880825 }, { "epoch": 8.66, "grad_norm": 8.623021125793457, "learning_rate": 6.781504761337346e-07, "loss": 0.0371, "step": 880850 }, { "epoch": 8.66, "grad_norm": 4.768118858337402, "learning_rate": 6.780263536794861e-07, "loss": 0.0397, "step": 880875 }, { "epoch": 8.66, "grad_norm": 9.226064682006836, "learning_rate": 6.779022312252375e-07, "loss": 0.0368, "step": 880900 }, { "epoch": 8.66, "grad_norm": 0.5335020422935486, "learning_rate": 6.777781087709892e-07, "loss": 0.0449, "step": 880925 }, { "epoch": 8.66, "grad_norm": 0.9721835851669312, "learning_rate": 6.776539863167407e-07, "loss": 0.0184, "step": 880950 }, { "epoch": 8.66, "grad_norm": 1.2750204801559448, "learning_rate": 6.775298638624922e-07, "loss": 0.0336, "step": 880975 }, { "epoch": 8.66, "grad_norm": 9.672979354858398, "learning_rate": 6.774057414082439e-07, "loss": 0.0554, "step": 881000 }, { "epoch": 8.66, "grad_norm": 0.012809542007744312, "learning_rate": 6.772816189539953e-07, "loss": 0.0342, "step": 881025 }, { "epoch": 8.66, "grad_norm": 12.075401306152344, "learning_rate": 6.771574964997468e-07, "loss": 0.0391, "step": 881050 }, { "epoch": 8.66, "grad_norm": 0.014752902090549469, "learning_rate": 6.770383389436683e-07, "loss": 0.0289, "step": 881075 }, { "epoch": 8.66, "grad_norm": 0.23386812210083008, "learning_rate": 6.769142164894199e-07, "loss": 0.0442, "step": 881100 }, { "epoch": 8.66, "grad_norm": 0.011956488713622093, "learning_rate": 6.767900940351714e-07, "loss": 0.0229, "step": 881125 }, { "epoch": 8.66, "grad_norm": 17.887317657470703, "learning_rate": 6.766659715809229e-07, "loss": 0.0225, "step": 881150 }, { "epoch": 8.66, "grad_norm": 0.24746288359165192, "learning_rate": 6.765418491266745e-07, "loss": 0.0293, "step": 881175 }, { "epoch": 8.66, "grad_norm": 8.085055351257324, "learning_rate": 6.76417726672426e-07, "loss": 0.02, "step": 881200 }, { "epoch": 8.66, "grad_norm": 3.027524709701538, "learning_rate": 6.762936042181776e-07, "loss": 0.0367, "step": 881225 }, { "epoch": 8.66, "grad_norm": 17.091819763183594, "learning_rate": 6.76169481763929e-07, "loss": 0.0411, "step": 881250 }, { "epoch": 8.66, "grad_norm": 0.11192447692155838, "learning_rate": 6.760453593096806e-07, "loss": 0.0373, "step": 881275 }, { "epoch": 8.67, "grad_norm": 2.731502056121826, "learning_rate": 6.759212368554321e-07, "loss": 0.0241, "step": 881300 }, { "epoch": 8.67, "grad_norm": 8.244041442871094, "learning_rate": 6.757971144011837e-07, "loss": 0.0334, "step": 881325 }, { "epoch": 8.67, "grad_norm": 8.636460304260254, "learning_rate": 6.756729919469353e-07, "loss": 0.0326, "step": 881350 }, { "epoch": 8.67, "grad_norm": 0.4608680009841919, "learning_rate": 6.755488694926867e-07, "loss": 0.0197, "step": 881375 }, { "epoch": 8.67, "grad_norm": 9.016536712646484, "learning_rate": 6.754247470384383e-07, "loss": 0.0305, "step": 881400 }, { "epoch": 8.67, "grad_norm": 2.3236641883850098, "learning_rate": 6.753006245841898e-07, "loss": 0.0293, "step": 881425 }, { "epoch": 8.67, "grad_norm": 9.07714557647705, "learning_rate": 6.751765021299414e-07, "loss": 0.0372, "step": 881450 }, { "epoch": 8.67, "grad_norm": 0.3819523751735687, "learning_rate": 6.750523796756928e-07, "loss": 0.0515, "step": 881475 }, { "epoch": 8.67, "grad_norm": 23.46017074584961, "learning_rate": 6.749282572214444e-07, "loss": 0.0176, "step": 881500 }, { "epoch": 8.67, "grad_norm": 5.948158264160156, "learning_rate": 6.74804134767196e-07, "loss": 0.0323, "step": 881525 }, { "epoch": 8.67, "grad_norm": 17.95444107055664, "learning_rate": 6.746800123129475e-07, "loss": 0.0285, "step": 881550 }, { "epoch": 8.67, "grad_norm": 6.756485939025879, "learning_rate": 6.745558898586991e-07, "loss": 0.0301, "step": 881575 }, { "epoch": 8.67, "grad_norm": 7.847470283508301, "learning_rate": 6.744317674044507e-07, "loss": 0.0291, "step": 881600 }, { "epoch": 8.67, "grad_norm": 8.34363079071045, "learning_rate": 6.743076449502021e-07, "loss": 0.0349, "step": 881625 }, { "epoch": 8.67, "grad_norm": 10.007233619689941, "learning_rate": 6.741835224959537e-07, "loss": 0.0264, "step": 881650 }, { "epoch": 8.67, "grad_norm": 3.310412645339966, "learning_rate": 6.740594000417052e-07, "loss": 0.0401, "step": 881675 }, { "epoch": 8.67, "grad_norm": 8.572540283203125, "learning_rate": 6.739352775874568e-07, "loss": 0.0496, "step": 881700 }, { "epoch": 8.67, "grad_norm": 0.0862785205245018, "learning_rate": 6.738111551332082e-07, "loss": 0.0431, "step": 881725 }, { "epoch": 8.67, "grad_norm": 7.8809404373168945, "learning_rate": 6.736870326789598e-07, "loss": 0.0233, "step": 881750 }, { "epoch": 8.67, "grad_norm": 1.4842007160186768, "learning_rate": 6.735629102247114e-07, "loss": 0.0486, "step": 881775 }, { "epoch": 8.67, "grad_norm": 14.169160842895508, "learning_rate": 6.734387877704629e-07, "loss": 0.0223, "step": 881800 }, { "epoch": 8.67, "grad_norm": 1.3785862922668457, "learning_rate": 6.733146653162144e-07, "loss": 0.0431, "step": 881825 }, { "epoch": 8.67, "grad_norm": 14.94095230102539, "learning_rate": 6.731905428619659e-07, "loss": 0.0153, "step": 881850 }, { "epoch": 8.67, "grad_norm": 0.0249050110578537, "learning_rate": 6.730664204077175e-07, "loss": 0.0336, "step": 881875 }, { "epoch": 8.67, "grad_norm": 5.245434761047363, "learning_rate": 6.729422979534691e-07, "loss": 0.0333, "step": 881900 }, { "epoch": 8.67, "grad_norm": 0.9976574778556824, "learning_rate": 6.728181754992205e-07, "loss": 0.0316, "step": 881925 }, { "epoch": 8.67, "grad_norm": 12.115701675415039, "learning_rate": 6.726940530449721e-07, "loss": 0.0238, "step": 881950 }, { "epoch": 8.67, "grad_norm": 0.3791409432888031, "learning_rate": 6.725699305907236e-07, "loss": 0.0175, "step": 881975 }, { "epoch": 8.67, "grad_norm": 25.46963119506836, "learning_rate": 6.724458081364752e-07, "loss": 0.0319, "step": 882000 }, { "epoch": 8.67, "grad_norm": 6.506702899932861, "learning_rate": 6.723216856822268e-07, "loss": 0.0353, "step": 882025 }, { "epoch": 8.67, "grad_norm": 11.464913368225098, "learning_rate": 6.721975632279782e-07, "loss": 0.039, "step": 882050 }, { "epoch": 8.67, "grad_norm": 0.4616335332393646, "learning_rate": 6.720734407737298e-07, "loss": 0.0168, "step": 882075 }, { "epoch": 8.67, "grad_norm": 5.928136825561523, "learning_rate": 6.719493183194813e-07, "loss": 0.0221, "step": 882100 }, { "epoch": 8.67, "grad_norm": 4.924691677093506, "learning_rate": 6.718251958652329e-07, "loss": 0.0548, "step": 882125 }, { "epoch": 8.67, "grad_norm": 15.015705108642578, "learning_rate": 6.717010734109843e-07, "loss": 0.0241, "step": 882150 }, { "epoch": 8.67, "grad_norm": 0.35742396116256714, "learning_rate": 6.715769509567359e-07, "loss": 0.0242, "step": 882175 }, { "epoch": 8.67, "grad_norm": 6.100856781005859, "learning_rate": 6.714528285024875e-07, "loss": 0.0202, "step": 882200 }, { "epoch": 8.67, "grad_norm": 0.032168563455343246, "learning_rate": 6.71328706048239e-07, "loss": 0.0636, "step": 882225 }, { "epoch": 8.67, "grad_norm": 6.312473297119141, "learning_rate": 6.712045835939906e-07, "loss": 0.0272, "step": 882250 }, { "epoch": 8.67, "grad_norm": 0.3785896897315979, "learning_rate": 6.71080461139742e-07, "loss": 0.04, "step": 882275 }, { "epoch": 8.67, "grad_norm": 12.95732593536377, "learning_rate": 6.709563386854936e-07, "loss": 0.0139, "step": 882300 }, { "epoch": 8.68, "grad_norm": 3.017047882080078, "learning_rate": 6.708322162312452e-07, "loss": 0.0317, "step": 882325 }, { "epoch": 8.68, "grad_norm": 9.12250804901123, "learning_rate": 6.707080937769967e-07, "loss": 0.0321, "step": 882350 }, { "epoch": 8.68, "grad_norm": 0.02471701242029667, "learning_rate": 6.705839713227483e-07, "loss": 0.0386, "step": 882375 }, { "epoch": 8.68, "grad_norm": 6.507273197174072, "learning_rate": 6.704598488684997e-07, "loss": 0.0266, "step": 882400 }, { "epoch": 8.68, "grad_norm": 0.1305725872516632, "learning_rate": 6.703357264142513e-07, "loss": 0.0237, "step": 882425 }, { "epoch": 8.68, "grad_norm": 46.97914123535156, "learning_rate": 6.702116039600029e-07, "loss": 0.0305, "step": 882450 }, { "epoch": 8.68, "grad_norm": 3.6612131595611572, "learning_rate": 6.700874815057544e-07, "loss": 0.0328, "step": 882475 }, { "epoch": 8.68, "grad_norm": 8.671918869018555, "learning_rate": 6.699633590515059e-07, "loss": 0.0311, "step": 882500 }, { "epoch": 8.68, "grad_norm": 4.625923156738281, "learning_rate": 6.698392365972574e-07, "loss": 0.0198, "step": 882525 }, { "epoch": 8.68, "grad_norm": 8.861759185791016, "learning_rate": 6.69715114143009e-07, "loss": 0.0202, "step": 882550 }, { "epoch": 8.68, "grad_norm": 6.577132225036621, "learning_rate": 6.695909916887606e-07, "loss": 0.0465, "step": 882575 }, { "epoch": 8.68, "grad_norm": 12.134832382202148, "learning_rate": 6.69466869234512e-07, "loss": 0.0322, "step": 882600 }, { "epoch": 8.68, "grad_norm": 4.494679927825928, "learning_rate": 6.693427467802636e-07, "loss": 0.0319, "step": 882625 }, { "epoch": 8.68, "grad_norm": 6.22226619720459, "learning_rate": 6.692186243260151e-07, "loss": 0.0211, "step": 882650 }, { "epoch": 8.68, "grad_norm": 0.006305808201432228, "learning_rate": 6.690945018717667e-07, "loss": 0.0331, "step": 882675 }, { "epoch": 8.68, "grad_norm": 5.466740131378174, "learning_rate": 6.689703794175181e-07, "loss": 0.033, "step": 882700 }, { "epoch": 8.68, "grad_norm": 8.025193214416504, "learning_rate": 6.688462569632697e-07, "loss": 0.0263, "step": 882725 }, { "epoch": 8.68, "grad_norm": 13.692605018615723, "learning_rate": 6.687221345090213e-07, "loss": 0.03, "step": 882750 }, { "epoch": 8.68, "grad_norm": 0.05524321645498276, "learning_rate": 6.685980120547728e-07, "loss": 0.0324, "step": 882775 }, { "epoch": 8.68, "grad_norm": 4.068335056304932, "learning_rate": 6.684738896005244e-07, "loss": 0.0337, "step": 882800 }, { "epoch": 8.68, "grad_norm": 0.029925208538770676, "learning_rate": 6.683497671462758e-07, "loss": 0.031, "step": 882825 }, { "epoch": 8.68, "grad_norm": 5.036116600036621, "learning_rate": 6.682256446920274e-07, "loss": 0.0128, "step": 882850 }, { "epoch": 8.68, "grad_norm": 0.05187398940324783, "learning_rate": 6.68101522237779e-07, "loss": 0.0306, "step": 882875 }, { "epoch": 8.68, "grad_norm": 8.94735336303711, "learning_rate": 6.679773997835305e-07, "loss": 0.018, "step": 882900 }, { "epoch": 8.68, "grad_norm": 6.558248043060303, "learning_rate": 6.678532773292821e-07, "loss": 0.0389, "step": 882925 }, { "epoch": 8.68, "grad_norm": 15.61159896850586, "learning_rate": 6.677291548750335e-07, "loss": 0.0365, "step": 882950 }, { "epoch": 8.68, "grad_norm": 5.344432830810547, "learning_rate": 6.676050324207851e-07, "loss": 0.0335, "step": 882975 }, { "epoch": 8.68, "grad_norm": 4.241203784942627, "learning_rate": 6.674809099665367e-07, "loss": 0.0358, "step": 883000 }, { "epoch": 8.68, "grad_norm": 2.740335702896118, "learning_rate": 6.673567875122882e-07, "loss": 0.0451, "step": 883025 }, { "epoch": 8.68, "grad_norm": 12.445734024047852, "learning_rate": 6.672326650580398e-07, "loss": 0.0242, "step": 883050 }, { "epoch": 8.68, "grad_norm": 0.007470476906746626, "learning_rate": 6.671085426037912e-07, "loss": 0.0212, "step": 883075 }, { "epoch": 8.68, "grad_norm": 13.548267364501953, "learning_rate": 6.669844201495428e-07, "loss": 0.0305, "step": 883100 }, { "epoch": 8.68, "grad_norm": 0.4648594260215759, "learning_rate": 6.668602976952943e-07, "loss": 0.028, "step": 883125 }, { "epoch": 8.68, "grad_norm": 6.258001327514648, "learning_rate": 6.667361752410459e-07, "loss": 0.0211, "step": 883150 }, { "epoch": 8.68, "grad_norm": 0.2287599742412567, "learning_rate": 6.666120527867974e-07, "loss": 0.0367, "step": 883175 }, { "epoch": 8.68, "grad_norm": 6.261077404022217, "learning_rate": 6.664879303325489e-07, "loss": 0.0253, "step": 883200 }, { "epoch": 8.68, "grad_norm": 0.4119837284088135, "learning_rate": 6.663638078783005e-07, "loss": 0.0357, "step": 883225 }, { "epoch": 8.68, "grad_norm": 11.828716278076172, "learning_rate": 6.66239685424052e-07, "loss": 0.0172, "step": 883250 }, { "epoch": 8.68, "grad_norm": 0.03776261955499649, "learning_rate": 6.661155629698035e-07, "loss": 0.0284, "step": 883275 }, { "epoch": 8.68, "grad_norm": 7.953262805938721, "learning_rate": 6.659914405155551e-07, "loss": 0.0261, "step": 883300 }, { "epoch": 8.68, "grad_norm": 2.6723694801330566, "learning_rate": 6.658673180613066e-07, "loss": 0.0476, "step": 883325 }, { "epoch": 8.69, "grad_norm": 11.240653991699219, "learning_rate": 6.657431956070582e-07, "loss": 0.0236, "step": 883350 }, { "epoch": 8.69, "grad_norm": 0.12197145074605942, "learning_rate": 6.656190731528096e-07, "loss": 0.0274, "step": 883375 }, { "epoch": 8.69, "grad_norm": 11.706958770751953, "learning_rate": 6.654949506985612e-07, "loss": 0.0248, "step": 883400 }, { "epoch": 8.69, "grad_norm": 4.473205089569092, "learning_rate": 6.653708282443128e-07, "loss": 0.0246, "step": 883425 }, { "epoch": 8.69, "grad_norm": 5.354820728302002, "learning_rate": 6.652467057900643e-07, "loss": 0.028, "step": 883450 }, { "epoch": 8.69, "grad_norm": 12.540962219238281, "learning_rate": 6.651225833358159e-07, "loss": 0.0272, "step": 883475 }, { "epoch": 8.69, "grad_norm": 11.927538871765137, "learning_rate": 6.649984608815673e-07, "loss": 0.0315, "step": 883500 }, { "epoch": 8.69, "grad_norm": 0.1398540586233139, "learning_rate": 6.648743384273189e-07, "loss": 0.0405, "step": 883525 }, { "epoch": 8.69, "grad_norm": 13.295520782470703, "learning_rate": 6.647502159730704e-07, "loss": 0.0228, "step": 883550 }, { "epoch": 8.69, "grad_norm": 0.10812912881374359, "learning_rate": 6.64626093518822e-07, "loss": 0.0246, "step": 883575 }, { "epoch": 8.69, "grad_norm": 7.446054935455322, "learning_rate": 6.645019710645736e-07, "loss": 0.0279, "step": 883600 }, { "epoch": 8.69, "grad_norm": 4.090823650360107, "learning_rate": 6.64377848610325e-07, "loss": 0.0383, "step": 883625 }, { "epoch": 8.69, "grad_norm": 9.611686706542969, "learning_rate": 6.642537261560766e-07, "loss": 0.0289, "step": 883650 }, { "epoch": 8.69, "grad_norm": 0.0205916166305542, "learning_rate": 6.641296037018281e-07, "loss": 0.0351, "step": 883675 }, { "epoch": 8.69, "grad_norm": 1.1463980674743652, "learning_rate": 6.640054812475797e-07, "loss": 0.0195, "step": 883700 }, { "epoch": 8.69, "grad_norm": 2.456137180328369, "learning_rate": 6.638813587933313e-07, "loss": 0.0278, "step": 883725 }, { "epoch": 8.69, "grad_norm": 0.2651804983615875, "learning_rate": 6.637572363390827e-07, "loss": 0.0323, "step": 883750 }, { "epoch": 8.69, "grad_norm": 0.12981364130973816, "learning_rate": 6.636331138848343e-07, "loss": 0.0248, "step": 883775 }, { "epoch": 8.69, "grad_norm": 10.481244087219238, "learning_rate": 6.635089914305858e-07, "loss": 0.0407, "step": 883800 }, { "epoch": 8.69, "grad_norm": 0.013879283331334591, "learning_rate": 6.633848689763374e-07, "loss": 0.0318, "step": 883825 }, { "epoch": 8.69, "grad_norm": 4.095327377319336, "learning_rate": 6.632607465220889e-07, "loss": 0.029, "step": 883850 }, { "epoch": 8.69, "grad_norm": 0.11877650767564774, "learning_rate": 6.631366240678404e-07, "loss": 0.0189, "step": 883875 }, { "epoch": 8.69, "grad_norm": 1.050262212753296, "learning_rate": 6.63012501613592e-07, "loss": 0.0289, "step": 883900 }, { "epoch": 8.69, "grad_norm": 1.7693923711776733, "learning_rate": 6.628883791593435e-07, "loss": 0.0363, "step": 883925 }, { "epoch": 8.69, "grad_norm": 10.107300758361816, "learning_rate": 6.62764256705095e-07, "loss": 0.0248, "step": 883950 }, { "epoch": 8.69, "grad_norm": 0.011057759635150433, "learning_rate": 6.626401342508465e-07, "loss": 0.0519, "step": 883975 }, { "epoch": 8.69, "grad_norm": 1.3631705045700073, "learning_rate": 6.625160117965981e-07, "loss": 0.0184, "step": 884000 }, { "epoch": 8.69, "grad_norm": 6.877813339233398, "learning_rate": 6.623918893423497e-07, "loss": 0.0293, "step": 884025 }, { "epoch": 8.69, "grad_norm": 8.490752220153809, "learning_rate": 6.622677668881011e-07, "loss": 0.0412, "step": 884050 }, { "epoch": 8.69, "grad_norm": 14.421730041503906, "learning_rate": 6.621436444338527e-07, "loss": 0.0425, "step": 884075 }, { "epoch": 8.69, "grad_norm": 6.366644859313965, "learning_rate": 6.620195219796042e-07, "loss": 0.025, "step": 884100 }, { "epoch": 8.69, "grad_norm": 0.4322299659252167, "learning_rate": 6.618953995253558e-07, "loss": 0.0344, "step": 884125 }, { "epoch": 8.69, "grad_norm": 9.10390853881836, "learning_rate": 6.617712770711074e-07, "loss": 0.0219, "step": 884150 }, { "epoch": 8.69, "grad_norm": 0.015592115931212902, "learning_rate": 6.616471546168588e-07, "loss": 0.0313, "step": 884175 }, { "epoch": 8.69, "grad_norm": 7.522032737731934, "learning_rate": 6.615230321626104e-07, "loss": 0.0243, "step": 884200 }, { "epoch": 8.69, "grad_norm": 1.5094504356384277, "learning_rate": 6.613989097083619e-07, "loss": 0.0433, "step": 884225 }, { "epoch": 8.69, "grad_norm": 8.66146183013916, "learning_rate": 6.612747872541135e-07, "loss": 0.026, "step": 884250 }, { "epoch": 8.69, "grad_norm": 0.012369142845273018, "learning_rate": 6.61150664799865e-07, "loss": 0.019, "step": 884275 }, { "epoch": 8.69, "grad_norm": 12.944040298461914, "learning_rate": 6.610265423456165e-07, "loss": 0.0276, "step": 884300 }, { "epoch": 8.69, "grad_norm": 0.09703049063682556, "learning_rate": 6.609024198913681e-07, "loss": 0.0342, "step": 884325 }, { "epoch": 8.7, "grad_norm": 1.6623845100402832, "learning_rate": 6.607782974371196e-07, "loss": 0.0298, "step": 884350 }, { "epoch": 8.7, "grad_norm": 4.326526641845703, "learning_rate": 6.606541749828712e-07, "loss": 0.0417, "step": 884375 }, { "epoch": 8.7, "grad_norm": 19.239391326904297, "learning_rate": 6.605300525286226e-07, "loss": 0.0554, "step": 884400 }, { "epoch": 8.7, "grad_norm": 0.48261210322380066, "learning_rate": 6.604059300743742e-07, "loss": 0.0354, "step": 884425 }, { "epoch": 8.7, "grad_norm": 0.9153763055801392, "learning_rate": 6.602818076201258e-07, "loss": 0.0177, "step": 884450 }, { "epoch": 8.7, "grad_norm": 5.787147045135498, "learning_rate": 6.601576851658773e-07, "loss": 0.0286, "step": 884475 }, { "epoch": 8.7, "grad_norm": 11.769954681396484, "learning_rate": 6.600335627116289e-07, "loss": 0.0285, "step": 884500 }, { "epoch": 8.7, "grad_norm": 0.01926567032933235, "learning_rate": 6.599094402573803e-07, "loss": 0.0438, "step": 884525 }, { "epoch": 8.7, "grad_norm": 5.193471908569336, "learning_rate": 6.597853178031319e-07, "loss": 0.0273, "step": 884550 }, { "epoch": 8.7, "grad_norm": 1.7829965353012085, "learning_rate": 6.596611953488835e-07, "loss": 0.0321, "step": 884575 }, { "epoch": 8.7, "grad_norm": 18.01055335998535, "learning_rate": 6.59537072894635e-07, "loss": 0.0382, "step": 884600 }, { "epoch": 8.7, "grad_norm": 0.09713604301214218, "learning_rate": 6.594129504403865e-07, "loss": 0.0254, "step": 884625 }, { "epoch": 8.7, "grad_norm": 1.9869004487991333, "learning_rate": 6.59288827986138e-07, "loss": 0.0335, "step": 884650 }, { "epoch": 8.7, "grad_norm": 0.1267010122537613, "learning_rate": 6.591647055318896e-07, "loss": 0.0363, "step": 884675 }, { "epoch": 8.7, "grad_norm": 6.719320774078369, "learning_rate": 6.590405830776412e-07, "loss": 0.02, "step": 884700 }, { "epoch": 8.7, "grad_norm": 3.77376651763916, "learning_rate": 6.589214255215626e-07, "loss": 0.038, "step": 884725 }, { "epoch": 8.7, "grad_norm": 15.43254566192627, "learning_rate": 6.587973030673142e-07, "loss": 0.0235, "step": 884750 }, { "epoch": 8.7, "grad_norm": 0.37673893570899963, "learning_rate": 6.586731806130657e-07, "loss": 0.0227, "step": 884775 }, { "epoch": 8.7, "grad_norm": 13.728745460510254, "learning_rate": 6.585490581588172e-07, "loss": 0.0405, "step": 884800 }, { "epoch": 8.7, "grad_norm": 6.305965423583984, "learning_rate": 6.584249357045687e-07, "loss": 0.0415, "step": 884825 }, { "epoch": 8.7, "grad_norm": 10.349159240722656, "learning_rate": 6.583008132503204e-07, "loss": 0.0364, "step": 884850 }, { "epoch": 8.7, "grad_norm": 0.854983389377594, "learning_rate": 6.581766907960718e-07, "loss": 0.0385, "step": 884875 }, { "epoch": 8.7, "grad_norm": 6.979028701782227, "learning_rate": 6.580525683418233e-07, "loss": 0.0305, "step": 884900 }, { "epoch": 8.7, "grad_norm": 0.21506483852863312, "learning_rate": 6.57928445887575e-07, "loss": 0.028, "step": 884925 }, { "epoch": 8.7, "grad_norm": 17.126121520996094, "learning_rate": 6.578043234333265e-07, "loss": 0.014, "step": 884950 }, { "epoch": 8.7, "grad_norm": 0.02035713568329811, "learning_rate": 6.576802009790779e-07, "loss": 0.0244, "step": 884975 }, { "epoch": 8.7, "grad_norm": 10.182226181030273, "learning_rate": 6.575560785248294e-07, "loss": 0.0336, "step": 885000 }, { "epoch": 8.7, "grad_norm": 0.14175400137901306, "learning_rate": 6.574319560705811e-07, "loss": 0.0447, "step": 885025 }, { "epoch": 8.7, "grad_norm": 5.17396879196167, "learning_rate": 6.573078336163326e-07, "loss": 0.025, "step": 885050 }, { "epoch": 8.7, "grad_norm": 2.4623467922210693, "learning_rate": 6.571837111620841e-07, "loss": 0.0321, "step": 885075 }, { "epoch": 8.7, "grad_norm": 1.56570565700531, "learning_rate": 6.570595887078358e-07, "loss": 0.0197, "step": 885100 }, { "epoch": 8.7, "grad_norm": 9.622950553894043, "learning_rate": 6.569354662535872e-07, "loss": 0.0219, "step": 885125 }, { "epoch": 8.7, "grad_norm": 0.20032595098018646, "learning_rate": 6.568113437993387e-07, "loss": 0.0271, "step": 885150 }, { "epoch": 8.7, "grad_norm": 1.5579400062561035, "learning_rate": 6.566872213450903e-07, "loss": 0.027, "step": 885175 }, { "epoch": 8.7, "grad_norm": 7.668702125549316, "learning_rate": 6.565630988908419e-07, "loss": 0.019, "step": 885200 }, { "epoch": 8.7, "grad_norm": 9.485122680664062, "learning_rate": 6.564389764365933e-07, "loss": 0.0546, "step": 885225 }, { "epoch": 8.7, "grad_norm": 1.5544873476028442, "learning_rate": 6.563148539823448e-07, "loss": 0.0246, "step": 885250 }, { "epoch": 8.7, "grad_norm": 0.5467358231544495, "learning_rate": 6.561907315280965e-07, "loss": 0.0298, "step": 885275 }, { "epoch": 8.7, "grad_norm": 11.926558494567871, "learning_rate": 6.56066609073848e-07, "loss": 0.0425, "step": 885300 }, { "epoch": 8.7, "grad_norm": 0.5681467056274414, "learning_rate": 6.559424866195994e-07, "loss": 0.046, "step": 885325 }, { "epoch": 8.7, "grad_norm": 8.741541862487793, "learning_rate": 6.55818364165351e-07, "loss": 0.0224, "step": 885350 }, { "epoch": 8.71, "grad_norm": 0.03790885955095291, "learning_rate": 6.556942417111026e-07, "loss": 0.0538, "step": 885375 }, { "epoch": 8.71, "grad_norm": 6.591927528381348, "learning_rate": 6.555701192568541e-07, "loss": 0.024, "step": 885400 }, { "epoch": 8.71, "grad_norm": 0.007097022607922554, "learning_rate": 6.554459968026055e-07, "loss": 0.0376, "step": 885425 }, { "epoch": 8.71, "grad_norm": 20.49995231628418, "learning_rate": 6.553218743483572e-07, "loss": 0.0233, "step": 885450 }, { "epoch": 8.71, "grad_norm": 0.18261606991291046, "learning_rate": 6.551977518941087e-07, "loss": 0.0426, "step": 885475 }, { "epoch": 8.71, "grad_norm": 1.1025418043136597, "learning_rate": 6.550736294398602e-07, "loss": 0.0183, "step": 885500 }, { "epoch": 8.71, "grad_norm": 0.029716448858380318, "learning_rate": 6.549495069856119e-07, "loss": 0.0457, "step": 885525 }, { "epoch": 8.71, "grad_norm": 5.83576774597168, "learning_rate": 6.548253845313633e-07, "loss": 0.0345, "step": 885550 }, { "epoch": 8.71, "grad_norm": 2.761120080947876, "learning_rate": 6.547012620771148e-07, "loss": 0.0359, "step": 885575 }, { "epoch": 8.71, "grad_norm": 3.2211203575134277, "learning_rate": 6.545771396228664e-07, "loss": 0.0301, "step": 885600 }, { "epoch": 8.71, "grad_norm": 0.26847708225250244, "learning_rate": 6.54453017168618e-07, "loss": 0.0331, "step": 885625 }, { "epoch": 8.71, "grad_norm": 2.4531562328338623, "learning_rate": 6.543288947143694e-07, "loss": 0.0306, "step": 885650 }, { "epoch": 8.71, "grad_norm": 1.175236701965332, "learning_rate": 6.542047722601209e-07, "loss": 0.0215, "step": 885675 }, { "epoch": 8.71, "grad_norm": 4.909669399261475, "learning_rate": 6.540806498058726e-07, "loss": 0.034, "step": 885700 }, { "epoch": 8.71, "grad_norm": 1.1628611087799072, "learning_rate": 6.539565273516241e-07, "loss": 0.0353, "step": 885725 }, { "epoch": 8.71, "grad_norm": 12.540539741516113, "learning_rate": 6.538324048973755e-07, "loss": 0.0209, "step": 885750 }, { "epoch": 8.71, "grad_norm": 1.6741948127746582, "learning_rate": 6.537082824431271e-07, "loss": 0.0282, "step": 885775 }, { "epoch": 8.71, "grad_norm": 11.791090965270996, "learning_rate": 6.535841599888787e-07, "loss": 0.0268, "step": 885800 }, { "epoch": 8.71, "grad_norm": 3.7686052322387695, "learning_rate": 6.534600375346302e-07, "loss": 0.028, "step": 885825 }, { "epoch": 8.71, "grad_norm": 9.970108032226562, "learning_rate": 6.533359150803817e-07, "loss": 0.0128, "step": 885850 }, { "epoch": 8.71, "grad_norm": 9.406604766845703, "learning_rate": 6.532117926261334e-07, "loss": 0.0476, "step": 885875 }, { "epoch": 8.71, "grad_norm": 14.416447639465332, "learning_rate": 6.530876701718848e-07, "loss": 0.029, "step": 885900 }, { "epoch": 8.71, "grad_norm": 0.01248971652239561, "learning_rate": 6.529635477176363e-07, "loss": 0.0302, "step": 885925 }, { "epoch": 8.71, "grad_norm": 1.0041218996047974, "learning_rate": 6.52839425263388e-07, "loss": 0.0283, "step": 885950 }, { "epoch": 8.71, "grad_norm": 15.281054496765137, "learning_rate": 6.527153028091395e-07, "loss": 0.0635, "step": 885975 }, { "epoch": 8.71, "grad_norm": 11.860417366027832, "learning_rate": 6.525911803548909e-07, "loss": 0.0354, "step": 886000 }, { "epoch": 8.71, "grad_norm": 3.5337839126586914, "learning_rate": 6.524670579006425e-07, "loss": 0.0382, "step": 886025 }, { "epoch": 8.71, "grad_norm": 13.920936584472656, "learning_rate": 6.523429354463941e-07, "loss": 0.0286, "step": 886050 }, { "epoch": 8.71, "grad_norm": 5.0776753425598145, "learning_rate": 6.522188129921456e-07, "loss": 0.0406, "step": 886075 }, { "epoch": 8.71, "grad_norm": 15.310379981994629, "learning_rate": 6.52094690537897e-07, "loss": 0.0398, "step": 886100 }, { "epoch": 8.71, "grad_norm": 0.09695801138877869, "learning_rate": 6.519705680836487e-07, "loss": 0.0372, "step": 886125 }, { "epoch": 8.71, "grad_norm": 3.160851001739502, "learning_rate": 6.518464456294002e-07, "loss": 0.0291, "step": 886150 }, { "epoch": 8.71, "grad_norm": 1.5927517414093018, "learning_rate": 6.517223231751517e-07, "loss": 0.0479, "step": 886175 }, { "epoch": 8.71, "grad_norm": 2.168811798095703, "learning_rate": 6.515982007209034e-07, "loss": 0.0226, "step": 886200 }, { "epoch": 8.71, "grad_norm": 9.85565185546875, "learning_rate": 6.514740782666548e-07, "loss": 0.0335, "step": 886225 }, { "epoch": 8.71, "grad_norm": 0.15525081753730774, "learning_rate": 6.513499558124063e-07, "loss": 0.019, "step": 886250 }, { "epoch": 8.71, "grad_norm": 0.11261086165904999, "learning_rate": 6.512258333581578e-07, "loss": 0.0548, "step": 886275 }, { "epoch": 8.71, "grad_norm": 29.124385833740234, "learning_rate": 6.511017109039095e-07, "loss": 0.0468, "step": 886300 }, { "epoch": 8.71, "grad_norm": 0.12252121418714523, "learning_rate": 6.509775884496609e-07, "loss": 0.0336, "step": 886325 }, { "epoch": 8.71, "grad_norm": 7.8786396980285645, "learning_rate": 6.508534659954124e-07, "loss": 0.0278, "step": 886350 }, { "epoch": 8.71, "grad_norm": 3.9719088077545166, "learning_rate": 6.507293435411641e-07, "loss": 0.0291, "step": 886375 }, { "epoch": 8.72, "grad_norm": 11.347172737121582, "learning_rate": 6.506052210869156e-07, "loss": 0.0349, "step": 886400 }, { "epoch": 8.72, "grad_norm": 0.023556459695100784, "learning_rate": 6.50481098632667e-07, "loss": 0.0237, "step": 886425 }, { "epoch": 8.72, "grad_norm": 8.112723350524902, "learning_rate": 6.503569761784186e-07, "loss": 0.0429, "step": 886450 }, { "epoch": 8.72, "grad_norm": 0.4061431288719177, "learning_rate": 6.502328537241702e-07, "loss": 0.0474, "step": 886475 }, { "epoch": 8.72, "grad_norm": 0.17357680201530457, "learning_rate": 6.501087312699217e-07, "loss": 0.0235, "step": 886500 }, { "epoch": 8.72, "grad_norm": 0.021354282274842262, "learning_rate": 6.499846088156732e-07, "loss": 0.0299, "step": 886525 }, { "epoch": 8.72, "grad_norm": 1.0392160415649414, "learning_rate": 6.498604863614249e-07, "loss": 0.0286, "step": 886550 }, { "epoch": 8.72, "grad_norm": 0.04638206213712692, "learning_rate": 6.497363639071763e-07, "loss": 0.0199, "step": 886575 }, { "epoch": 8.72, "grad_norm": 5.942901134490967, "learning_rate": 6.496122414529278e-07, "loss": 0.0187, "step": 886600 }, { "epoch": 8.72, "grad_norm": 0.030197907239198685, "learning_rate": 6.494881189986795e-07, "loss": 0.0284, "step": 886625 }, { "epoch": 8.72, "grad_norm": 7.682000637054443, "learning_rate": 6.49363996544431e-07, "loss": 0.0386, "step": 886650 }, { "epoch": 8.72, "grad_norm": 3.4699759483337402, "learning_rate": 6.492398740901824e-07, "loss": 0.0286, "step": 886675 }, { "epoch": 8.72, "grad_norm": 0.3935282528400421, "learning_rate": 6.491157516359339e-07, "loss": 0.0189, "step": 886700 }, { "epoch": 8.72, "grad_norm": 0.004622448701411486, "learning_rate": 6.489916291816856e-07, "loss": 0.0496, "step": 886725 }, { "epoch": 8.72, "grad_norm": 10.427197456359863, "learning_rate": 6.488675067274371e-07, "loss": 0.0304, "step": 886750 }, { "epoch": 8.72, "grad_norm": 0.20844578742980957, "learning_rate": 6.487433842731885e-07, "loss": 0.0336, "step": 886775 }, { "epoch": 8.72, "grad_norm": 9.314875602722168, "learning_rate": 6.486192618189402e-07, "loss": 0.0252, "step": 886800 }, { "epoch": 8.72, "grad_norm": 0.2500979006290436, "learning_rate": 6.484951393646917e-07, "loss": 0.0232, "step": 886825 }, { "epoch": 8.72, "grad_norm": 22.928131103515625, "learning_rate": 6.483710169104432e-07, "loss": 0.0219, "step": 886850 }, { "epoch": 8.72, "grad_norm": 0.050550974905490875, "learning_rate": 6.482468944561949e-07, "loss": 0.0322, "step": 886875 }, { "epoch": 8.72, "grad_norm": 4.360803127288818, "learning_rate": 6.481227720019463e-07, "loss": 0.0186, "step": 886900 }, { "epoch": 8.72, "grad_norm": 0.0032710882369428873, "learning_rate": 6.479986495476978e-07, "loss": 0.0532, "step": 886925 }, { "epoch": 8.72, "grad_norm": 19.21868324279785, "learning_rate": 6.478745270934493e-07, "loss": 0.0199, "step": 886950 }, { "epoch": 8.72, "grad_norm": 0.1602429747581482, "learning_rate": 6.47750404639201e-07, "loss": 0.0424, "step": 886975 }, { "epoch": 8.72, "grad_norm": 12.978617668151855, "learning_rate": 6.476262821849524e-07, "loss": 0.0306, "step": 887000 }, { "epoch": 8.72, "grad_norm": 0.08417920023202896, "learning_rate": 6.475021597307039e-07, "loss": 0.0392, "step": 887025 }, { "epoch": 8.72, "grad_norm": 14.504257202148438, "learning_rate": 6.473780372764556e-07, "loss": 0.0331, "step": 887050 }, { "epoch": 8.72, "grad_norm": 0.009042096324265003, "learning_rate": 6.472539148222071e-07, "loss": 0.0269, "step": 887075 }, { "epoch": 8.72, "grad_norm": 17.184425354003906, "learning_rate": 6.471297923679585e-07, "loss": 0.0162, "step": 887100 }, { "epoch": 8.72, "grad_norm": 0.2529764771461487, "learning_rate": 6.4700566991371e-07, "loss": 0.0255, "step": 887125 }, { "epoch": 8.72, "grad_norm": 21.403905868530273, "learning_rate": 6.468815474594617e-07, "loss": 0.0285, "step": 887150 }, { "epoch": 8.72, "grad_norm": 0.20467934012413025, "learning_rate": 6.467574250052132e-07, "loss": 0.0423, "step": 887175 }, { "epoch": 8.72, "grad_norm": 12.002900123596191, "learning_rate": 6.466333025509647e-07, "loss": 0.0284, "step": 887200 }, { "epoch": 8.72, "grad_norm": 0.03971010074019432, "learning_rate": 6.465091800967164e-07, "loss": 0.0342, "step": 887225 }, { "epoch": 8.72, "grad_norm": 11.541147232055664, "learning_rate": 6.463850576424678e-07, "loss": 0.0325, "step": 887250 }, { "epoch": 8.72, "grad_norm": 1.9899961948394775, "learning_rate": 6.462609351882193e-07, "loss": 0.0508, "step": 887275 }, { "epoch": 8.72, "grad_norm": 4.245713710784912, "learning_rate": 6.46136812733971e-07, "loss": 0.0145, "step": 887300 }, { "epoch": 8.72, "grad_norm": 0.617810070514679, "learning_rate": 6.460126902797225e-07, "loss": 0.0343, "step": 887325 }, { "epoch": 8.72, "grad_norm": 11.374377250671387, "learning_rate": 6.458885678254739e-07, "loss": 0.019, "step": 887350 }, { "epoch": 8.72, "grad_norm": 2.9120006561279297, "learning_rate": 6.457644453712254e-07, "loss": 0.0328, "step": 887375 }, { "epoch": 8.73, "grad_norm": 1.9472718238830566, "learning_rate": 6.456403229169771e-07, "loss": 0.0225, "step": 887400 }, { "epoch": 8.73, "grad_norm": 0.08871162682771683, "learning_rate": 6.455162004627286e-07, "loss": 0.025, "step": 887425 }, { "epoch": 8.73, "grad_norm": 1.3753628730773926, "learning_rate": 6.4539207800848e-07, "loss": 0.027, "step": 887450 }, { "epoch": 8.73, "grad_norm": 0.734239399433136, "learning_rate": 6.452679555542317e-07, "loss": 0.0345, "step": 887475 }, { "epoch": 8.73, "grad_norm": 21.061418533325195, "learning_rate": 6.451438330999832e-07, "loss": 0.0431, "step": 887500 }, { "epoch": 8.73, "grad_norm": 0.0859111100435257, "learning_rate": 6.450197106457347e-07, "loss": 0.0225, "step": 887525 }, { "epoch": 8.73, "grad_norm": 15.542677879333496, "learning_rate": 6.448955881914861e-07, "loss": 0.0203, "step": 887550 }, { "epoch": 8.73, "grad_norm": 0.4812546372413635, "learning_rate": 6.447714657372378e-07, "loss": 0.0406, "step": 887575 }, { "epoch": 8.73, "grad_norm": 10.402704238891602, "learning_rate": 6.446473432829893e-07, "loss": 0.0374, "step": 887600 }, { "epoch": 8.73, "grad_norm": 0.17532110214233398, "learning_rate": 6.445232208287408e-07, "loss": 0.0293, "step": 887625 }, { "epoch": 8.73, "grad_norm": 14.917469024658203, "learning_rate": 6.443990983744925e-07, "loss": 0.04, "step": 887650 }, { "epoch": 8.73, "grad_norm": 1.3292431831359863, "learning_rate": 6.442749759202439e-07, "loss": 0.0313, "step": 887675 }, { "epoch": 8.73, "grad_norm": 5.660325527191162, "learning_rate": 6.441508534659954e-07, "loss": 0.022, "step": 887700 }, { "epoch": 8.73, "grad_norm": 0.19121162593364716, "learning_rate": 6.440267310117471e-07, "loss": 0.0251, "step": 887725 }, { "epoch": 8.73, "grad_norm": 3.6191062927246094, "learning_rate": 6.439026085574986e-07, "loss": 0.0221, "step": 887750 }, { "epoch": 8.73, "grad_norm": 0.63160640001297, "learning_rate": 6.4377848610325e-07, "loss": 0.0215, "step": 887775 }, { "epoch": 8.73, "grad_norm": 5.880456924438477, "learning_rate": 6.436543636490015e-07, "loss": 0.0213, "step": 887800 }, { "epoch": 8.73, "grad_norm": 3.8695616722106934, "learning_rate": 6.435302411947532e-07, "loss": 0.0389, "step": 887825 }, { "epoch": 8.73, "grad_norm": 16.55977439880371, "learning_rate": 6.434061187405047e-07, "loss": 0.0461, "step": 887850 }, { "epoch": 8.73, "grad_norm": 1.0209029912948608, "learning_rate": 6.432819962862562e-07, "loss": 0.0405, "step": 887875 }, { "epoch": 8.73, "grad_norm": 7.7076287269592285, "learning_rate": 6.431578738320079e-07, "loss": 0.0377, "step": 887900 }, { "epoch": 8.73, "grad_norm": 0.029316894710063934, "learning_rate": 6.430337513777593e-07, "loss": 0.0316, "step": 887925 }, { "epoch": 8.73, "grad_norm": 0.611571192741394, "learning_rate": 6.429096289235108e-07, "loss": 0.0296, "step": 887950 }, { "epoch": 8.73, "grad_norm": 0.5275713801383972, "learning_rate": 6.427855064692623e-07, "loss": 0.0215, "step": 887975 }, { "epoch": 8.73, "grad_norm": 7.4722795486450195, "learning_rate": 6.42661384015014e-07, "loss": 0.0301, "step": 888000 }, { "epoch": 8.73, "grad_norm": 0.7137999534606934, "learning_rate": 6.425372615607654e-07, "loss": 0.0325, "step": 888025 }, { "epoch": 8.73, "grad_norm": 17.303800582885742, "learning_rate": 6.424131391065169e-07, "loss": 0.0256, "step": 888050 }, { "epoch": 8.73, "grad_norm": 0.007941230200231075, "learning_rate": 6.422890166522686e-07, "loss": 0.0328, "step": 888075 }, { "epoch": 8.73, "grad_norm": 11.494147300720215, "learning_rate": 6.421648941980201e-07, "loss": 0.0188, "step": 888100 }, { "epoch": 8.73, "grad_norm": 0.05203322693705559, "learning_rate": 6.420457366419415e-07, "loss": 0.0297, "step": 888125 }, { "epoch": 8.73, "grad_norm": 9.210586547851562, "learning_rate": 6.41921614187693e-07, "loss": 0.0216, "step": 888150 }, { "epoch": 8.73, "grad_norm": 0.04298119619488716, "learning_rate": 6.417974917334446e-07, "loss": 0.0291, "step": 888175 }, { "epoch": 8.73, "grad_norm": 2.909602165222168, "learning_rate": 6.416733692791961e-07, "loss": 0.0211, "step": 888200 }, { "epoch": 8.73, "grad_norm": 1.938244342803955, "learning_rate": 6.415492468249477e-07, "loss": 0.0523, "step": 888225 }, { "epoch": 8.73, "grad_norm": 4.242202281951904, "learning_rate": 6.414251243706993e-07, "loss": 0.0398, "step": 888250 }, { "epoch": 8.73, "grad_norm": 0.10760561376810074, "learning_rate": 6.413010019164507e-07, "loss": 0.0197, "step": 888275 }, { "epoch": 8.73, "grad_norm": 4.684234619140625, "learning_rate": 6.411768794622023e-07, "loss": 0.016, "step": 888300 }, { "epoch": 8.73, "grad_norm": 0.1141415387392044, "learning_rate": 6.410527570079539e-07, "loss": 0.0227, "step": 888325 }, { "epoch": 8.73, "grad_norm": 2.571119546890259, "learning_rate": 6.409286345537054e-07, "loss": 0.0115, "step": 888350 }, { "epoch": 8.73, "grad_norm": 2.5817999839782715, "learning_rate": 6.408045120994568e-07, "loss": 0.0286, "step": 888375 }, { "epoch": 8.73, "grad_norm": 34.20283889770508, "learning_rate": 6.406803896452084e-07, "loss": 0.0312, "step": 888400 }, { "epoch": 8.74, "grad_norm": 0.05124345421791077, "learning_rate": 6.4055626719096e-07, "loss": 0.0579, "step": 888425 }, { "epoch": 8.74, "grad_norm": 20.583744049072266, "learning_rate": 6.404321447367115e-07, "loss": 0.032, "step": 888450 }, { "epoch": 8.74, "grad_norm": 1.1605957746505737, "learning_rate": 6.40308022282463e-07, "loss": 0.0339, "step": 888475 }, { "epoch": 8.74, "grad_norm": 7.325923919677734, "learning_rate": 6.401838998282146e-07, "loss": 0.0135, "step": 888500 }, { "epoch": 8.74, "grad_norm": 0.01689232885837555, "learning_rate": 6.400597773739661e-07, "loss": 0.0362, "step": 888525 }, { "epoch": 8.74, "grad_norm": 20.82209587097168, "learning_rate": 6.399356549197176e-07, "loss": 0.0284, "step": 888550 }, { "epoch": 8.74, "grad_norm": 0.11815954744815826, "learning_rate": 6.398115324654691e-07, "loss": 0.0271, "step": 888575 }, { "epoch": 8.74, "grad_norm": 37.760047912597656, "learning_rate": 6.396874100112207e-07, "loss": 0.0257, "step": 888600 }, { "epoch": 8.74, "grad_norm": 0.33648645877838135, "learning_rate": 6.395632875569722e-07, "loss": 0.0189, "step": 888625 }, { "epoch": 8.74, "grad_norm": 4.9350457191467285, "learning_rate": 6.394391651027238e-07, "loss": 0.0317, "step": 888650 }, { "epoch": 8.74, "grad_norm": 0.09106544405221939, "learning_rate": 6.393150426484754e-07, "loss": 0.0448, "step": 888675 }, { "epoch": 8.74, "grad_norm": 0.4082053303718567, "learning_rate": 6.391909201942269e-07, "loss": 0.0232, "step": 888700 }, { "epoch": 8.74, "grad_norm": 10.906819343566895, "learning_rate": 6.390667977399784e-07, "loss": 0.0482, "step": 888725 }, { "epoch": 8.74, "grad_norm": 5.416766166687012, "learning_rate": 6.3894267528573e-07, "loss": 0.04, "step": 888750 }, { "epoch": 8.74, "grad_norm": 2.2397658824920654, "learning_rate": 6.388185528314815e-07, "loss": 0.0342, "step": 888775 }, { "epoch": 8.74, "grad_norm": 3.7069804668426514, "learning_rate": 6.38694430377233e-07, "loss": 0.0211, "step": 888800 }, { "epoch": 8.74, "grad_norm": 1.50736403465271, "learning_rate": 6.385703079229845e-07, "loss": 0.0312, "step": 888825 }, { "epoch": 8.74, "grad_norm": 1.667902946472168, "learning_rate": 6.384461854687361e-07, "loss": 0.0219, "step": 888850 }, { "epoch": 8.74, "grad_norm": 0.013101457618176937, "learning_rate": 6.383220630144876e-07, "loss": 0.0423, "step": 888875 }, { "epoch": 8.74, "grad_norm": 7.961453437805176, "learning_rate": 6.381979405602392e-07, "loss": 0.023, "step": 888900 }, { "epoch": 8.74, "grad_norm": 0.47103115916252136, "learning_rate": 6.380738181059908e-07, "loss": 0.0376, "step": 888925 }, { "epoch": 8.74, "grad_norm": 7.900140762329102, "learning_rate": 6.379496956517422e-07, "loss": 0.0161, "step": 888950 }, { "epoch": 8.74, "grad_norm": 1.622387170791626, "learning_rate": 6.378255731974937e-07, "loss": 0.0285, "step": 888975 }, { "epoch": 8.74, "grad_norm": 13.88281536102295, "learning_rate": 6.377014507432453e-07, "loss": 0.0275, "step": 889000 }, { "epoch": 8.74, "grad_norm": 3.2596988677978516, "learning_rate": 6.375773282889969e-07, "loss": 0.0368, "step": 889025 }, { "epoch": 8.74, "grad_norm": 11.391465187072754, "learning_rate": 6.374532058347483e-07, "loss": 0.03, "step": 889050 }, { "epoch": 8.74, "grad_norm": 12.119500160217285, "learning_rate": 6.373290833804999e-07, "loss": 0.0327, "step": 889075 }, { "epoch": 8.74, "grad_norm": 8.583532333374023, "learning_rate": 6.372049609262515e-07, "loss": 0.0249, "step": 889100 }, { "epoch": 8.74, "grad_norm": 0.010531281121075153, "learning_rate": 6.37080838472003e-07, "loss": 0.0301, "step": 889125 }, { "epoch": 8.74, "grad_norm": 19.08660316467285, "learning_rate": 6.369567160177545e-07, "loss": 0.0332, "step": 889150 }, { "epoch": 8.74, "grad_norm": 0.013258468359708786, "learning_rate": 6.368325935635061e-07, "loss": 0.0323, "step": 889175 }, { "epoch": 8.74, "grad_norm": 3.467581033706665, "learning_rate": 6.367084711092576e-07, "loss": 0.0125, "step": 889200 }, { "epoch": 8.74, "grad_norm": 7.697360992431641, "learning_rate": 6.365843486550091e-07, "loss": 0.0297, "step": 889225 }, { "epoch": 8.74, "grad_norm": 3.0105690956115723, "learning_rate": 6.364602262007606e-07, "loss": 0.025, "step": 889250 }, { "epoch": 8.74, "grad_norm": 4.904828071594238, "learning_rate": 6.363361037465122e-07, "loss": 0.0242, "step": 889275 }, { "epoch": 8.74, "grad_norm": 10.041143417358398, "learning_rate": 6.362119812922637e-07, "loss": 0.024, "step": 889300 }, { "epoch": 8.74, "grad_norm": 0.0889475867152214, "learning_rate": 6.360878588380153e-07, "loss": 0.0347, "step": 889325 }, { "epoch": 8.74, "grad_norm": 7.12265682220459, "learning_rate": 6.359637363837669e-07, "loss": 0.0349, "step": 889350 }, { "epoch": 8.74, "grad_norm": 0.026680808514356613, "learning_rate": 6.358396139295184e-07, "loss": 0.0281, "step": 889375 }, { "epoch": 8.74, "grad_norm": 13.545197486877441, "learning_rate": 6.357154914752698e-07, "loss": 0.0244, "step": 889400 }, { "epoch": 8.74, "grad_norm": 0.21024654805660248, "learning_rate": 6.355913690210214e-07, "loss": 0.0387, "step": 889425 }, { "epoch": 8.75, "grad_norm": 1.4565273523330688, "learning_rate": 6.35467246566773e-07, "loss": 0.0221, "step": 889450 }, { "epoch": 8.75, "grad_norm": 0.16465049982070923, "learning_rate": 6.353431241125245e-07, "loss": 0.0377, "step": 889475 }, { "epoch": 8.75, "grad_norm": 3.1580753326416016, "learning_rate": 6.35219001658276e-07, "loss": 0.0227, "step": 889500 }, { "epoch": 8.75, "grad_norm": 0.001136035076342523, "learning_rate": 6.350948792040276e-07, "loss": 0.0426, "step": 889525 }, { "epoch": 8.75, "grad_norm": 0.8516182899475098, "learning_rate": 6.349707567497791e-07, "loss": 0.0202, "step": 889550 }, { "epoch": 8.75, "grad_norm": 0.1109653189778328, "learning_rate": 6.348466342955307e-07, "loss": 0.0182, "step": 889575 }, { "epoch": 8.75, "grad_norm": 9.980886459350586, "learning_rate": 6.347225118412823e-07, "loss": 0.0247, "step": 889600 }, { "epoch": 8.75, "grad_norm": 10.988327026367188, "learning_rate": 6.345983893870337e-07, "loss": 0.0299, "step": 889625 }, { "epoch": 8.75, "grad_norm": 0.3919816017150879, "learning_rate": 6.344742669327852e-07, "loss": 0.0148, "step": 889650 }, { "epoch": 8.75, "grad_norm": 0.0046307239681482315, "learning_rate": 6.343501444785368e-07, "loss": 0.0363, "step": 889675 }, { "epoch": 8.75, "grad_norm": 9.947466850280762, "learning_rate": 6.342260220242884e-07, "loss": 0.0255, "step": 889700 }, { "epoch": 8.75, "grad_norm": 7.389728546142578, "learning_rate": 6.341018995700398e-07, "loss": 0.0397, "step": 889725 }, { "epoch": 8.75, "grad_norm": 9.634664535522461, "learning_rate": 6.339777771157914e-07, "loss": 0.0393, "step": 889750 }, { "epoch": 8.75, "grad_norm": 2.560098886489868, "learning_rate": 6.33853654661543e-07, "loss": 0.0224, "step": 889775 }, { "epoch": 8.75, "grad_norm": 8.666510581970215, "learning_rate": 6.337295322072945e-07, "loss": 0.0291, "step": 889800 }, { "epoch": 8.75, "grad_norm": 0.12948466837406158, "learning_rate": 6.33605409753046e-07, "loss": 0.0261, "step": 889825 }, { "epoch": 8.75, "grad_norm": 3.180086135864258, "learning_rate": 6.334812872987975e-07, "loss": 0.0267, "step": 889850 }, { "epoch": 8.75, "grad_norm": 0.05379962921142578, "learning_rate": 6.333571648445491e-07, "loss": 0.0196, "step": 889875 }, { "epoch": 8.75, "grad_norm": 11.256199836730957, "learning_rate": 6.332330423903006e-07, "loss": 0.0276, "step": 889900 }, { "epoch": 8.75, "grad_norm": 0.018564248457551003, "learning_rate": 6.331089199360521e-07, "loss": 0.0255, "step": 889925 }, { "epoch": 8.75, "grad_norm": 2.1733200550079346, "learning_rate": 6.329847974818037e-07, "loss": 0.0263, "step": 889950 }, { "epoch": 8.75, "grad_norm": 0.07253623008728027, "learning_rate": 6.328606750275552e-07, "loss": 0.0269, "step": 889975 }, { "epoch": 8.75, "grad_norm": 3.272989511489868, "learning_rate": 6.327365525733068e-07, "loss": 0.0351, "step": 890000 }, { "epoch": 8.75, "grad_norm": 3.007455348968506, "learning_rate": 6.326124301190584e-07, "loss": 0.0282, "step": 890025 }, { "epoch": 8.75, "grad_norm": 2.5905532836914062, "learning_rate": 6.324883076648098e-07, "loss": 0.0212, "step": 890050 }, { "epoch": 8.75, "grad_norm": 1.2689077854156494, "learning_rate": 6.323641852105613e-07, "loss": 0.033, "step": 890075 }, { "epoch": 8.75, "grad_norm": 12.311470985412598, "learning_rate": 6.322400627563129e-07, "loss": 0.0262, "step": 890100 }, { "epoch": 8.75, "grad_norm": 0.41842737793922424, "learning_rate": 6.321159403020645e-07, "loss": 0.026, "step": 890125 }, { "epoch": 8.75, "grad_norm": 17.69625473022461, "learning_rate": 6.31991817847816e-07, "loss": 0.024, "step": 890150 }, { "epoch": 8.75, "grad_norm": 0.3397890031337738, "learning_rate": 6.318676953935675e-07, "loss": 0.0387, "step": 890175 }, { "epoch": 8.75, "grad_norm": 11.973246574401855, "learning_rate": 6.317435729393191e-07, "loss": 0.0285, "step": 890200 }, { "epoch": 8.75, "grad_norm": 0.048452384769916534, "learning_rate": 6.316194504850706e-07, "loss": 0.016, "step": 890225 }, { "epoch": 8.75, "grad_norm": 5.236021995544434, "learning_rate": 6.314953280308222e-07, "loss": 0.0398, "step": 890250 }, { "epoch": 8.75, "grad_norm": 0.1288965344429016, "learning_rate": 6.313712055765736e-07, "loss": 0.0234, "step": 890275 }, { "epoch": 8.75, "grad_norm": 12.013548851013184, "learning_rate": 6.312470831223252e-07, "loss": 0.0317, "step": 890300 }, { "epoch": 8.75, "grad_norm": 0.050534866750240326, "learning_rate": 6.311229606680767e-07, "loss": 0.0402, "step": 890325 }, { "epoch": 8.75, "grad_norm": 11.610612869262695, "learning_rate": 6.309988382138283e-07, "loss": 0.0298, "step": 890350 }, { "epoch": 8.75, "grad_norm": 0.024285001680254936, "learning_rate": 6.308747157595799e-07, "loss": 0.0261, "step": 890375 }, { "epoch": 8.75, "grad_norm": 0.6169878244400024, "learning_rate": 6.307505933053313e-07, "loss": 0.0443, "step": 890400 }, { "epoch": 8.75, "grad_norm": 0.08407992124557495, "learning_rate": 6.306264708510829e-07, "loss": 0.0209, "step": 890425 }, { "epoch": 8.76, "grad_norm": 15.556965827941895, "learning_rate": 6.305023483968345e-07, "loss": 0.0306, "step": 890450 }, { "epoch": 8.76, "grad_norm": 6.403468608856201, "learning_rate": 6.30378225942586e-07, "loss": 0.0306, "step": 890475 }, { "epoch": 8.76, "grad_norm": 23.592668533325195, "learning_rate": 6.302541034883374e-07, "loss": 0.0354, "step": 890500 }, { "epoch": 8.76, "grad_norm": 12.316316604614258, "learning_rate": 6.30129981034089e-07, "loss": 0.0352, "step": 890525 }, { "epoch": 8.76, "grad_norm": 14.047731399536133, "learning_rate": 6.300058585798406e-07, "loss": 0.0345, "step": 890550 }, { "epoch": 8.76, "grad_norm": 1.653367519378662, "learning_rate": 6.298817361255921e-07, "loss": 0.0448, "step": 890575 }, { "epoch": 8.76, "grad_norm": 15.571053504943848, "learning_rate": 6.297576136713436e-07, "loss": 0.0271, "step": 890600 }, { "epoch": 8.76, "grad_norm": 1.623115062713623, "learning_rate": 6.296334912170952e-07, "loss": 0.0352, "step": 890625 }, { "epoch": 8.76, "grad_norm": 14.056831359863281, "learning_rate": 6.295093687628467e-07, "loss": 0.0364, "step": 890650 }, { "epoch": 8.76, "grad_norm": 0.1466762125492096, "learning_rate": 6.293852463085983e-07, "loss": 0.0399, "step": 890675 }, { "epoch": 8.76, "grad_norm": 11.015434265136719, "learning_rate": 6.292611238543497e-07, "loss": 0.0275, "step": 890700 }, { "epoch": 8.76, "grad_norm": 0.025516584515571594, "learning_rate": 6.291370014001013e-07, "loss": 0.0197, "step": 890725 }, { "epoch": 8.76, "grad_norm": 12.200185775756836, "learning_rate": 6.290128789458528e-07, "loss": 0.0262, "step": 890750 }, { "epoch": 8.76, "grad_norm": 0.10123153775930405, "learning_rate": 6.288887564916044e-07, "loss": 0.0395, "step": 890775 }, { "epoch": 8.76, "grad_norm": 17.25421142578125, "learning_rate": 6.28764634037356e-07, "loss": 0.0342, "step": 890800 }, { "epoch": 8.76, "grad_norm": 7.114356517791748, "learning_rate": 6.286405115831075e-07, "loss": 0.0364, "step": 890825 }, { "epoch": 8.76, "grad_norm": 13.212257385253906, "learning_rate": 6.28516389128859e-07, "loss": 0.017, "step": 890850 }, { "epoch": 8.76, "grad_norm": 3.421361207962036, "learning_rate": 6.283922666746106e-07, "loss": 0.0376, "step": 890875 }, { "epoch": 8.76, "grad_norm": 15.283768653869629, "learning_rate": 6.282681442203621e-07, "loss": 0.0354, "step": 890900 }, { "epoch": 8.76, "grad_norm": 0.3145736753940582, "learning_rate": 6.281440217661137e-07, "loss": 0.0393, "step": 890925 }, { "epoch": 8.76, "grad_norm": 4.724555492401123, "learning_rate": 6.280198993118651e-07, "loss": 0.0209, "step": 890950 }, { "epoch": 8.76, "grad_norm": 0.17551273107528687, "learning_rate": 6.278957768576167e-07, "loss": 0.0504, "step": 890975 }, { "epoch": 8.76, "grad_norm": 2.5192341804504395, "learning_rate": 6.277716544033682e-07, "loss": 0.0176, "step": 891000 }, { "epoch": 8.76, "grad_norm": 0.01290261372923851, "learning_rate": 6.276475319491198e-07, "loss": 0.0478, "step": 891025 }, { "epoch": 8.76, "grad_norm": 4.001796722412109, "learning_rate": 6.275234094948714e-07, "loss": 0.0254, "step": 891050 }, { "epoch": 8.76, "grad_norm": 0.9345253705978394, "learning_rate": 6.273992870406228e-07, "loss": 0.0204, "step": 891075 }, { "epoch": 8.76, "grad_norm": 10.19247817993164, "learning_rate": 6.272751645863744e-07, "loss": 0.0247, "step": 891100 }, { "epoch": 8.76, "grad_norm": 0.07539329677820206, "learning_rate": 6.271560070302958e-07, "loss": 0.0368, "step": 891125 }, { "epoch": 8.76, "grad_norm": 14.393712997436523, "learning_rate": 6.270318845760474e-07, "loss": 0.0285, "step": 891150 }, { "epoch": 8.76, "grad_norm": 4.605910301208496, "learning_rate": 6.26907762121799e-07, "loss": 0.0393, "step": 891175 }, { "epoch": 8.76, "grad_norm": 4.845719337463379, "learning_rate": 6.267836396675504e-07, "loss": 0.0367, "step": 891200 }, { "epoch": 8.76, "grad_norm": 0.13927917182445526, "learning_rate": 6.26659517213302e-07, "loss": 0.0447, "step": 891225 }, { "epoch": 8.76, "grad_norm": 0.564176082611084, "learning_rate": 6.265353947590536e-07, "loss": 0.0224, "step": 891250 }, { "epoch": 8.76, "grad_norm": 0.8857828378677368, "learning_rate": 6.264112723048051e-07, "loss": 0.026, "step": 891275 }, { "epoch": 8.76, "grad_norm": 7.058804512023926, "learning_rate": 6.262871498505565e-07, "loss": 0.0139, "step": 891300 }, { "epoch": 8.76, "grad_norm": 0.02321840077638626, "learning_rate": 6.261630273963081e-07, "loss": 0.0244, "step": 891325 }, { "epoch": 8.76, "grad_norm": 15.314383506774902, "learning_rate": 6.260389049420597e-07, "loss": 0.0487, "step": 891350 }, { "epoch": 8.76, "grad_norm": 34.8225212097168, "learning_rate": 6.259147824878112e-07, "loss": 0.0468, "step": 891375 }, { "epoch": 8.76, "grad_norm": 8.414366722106934, "learning_rate": 6.257906600335628e-07, "loss": 0.0273, "step": 891400 }, { "epoch": 8.76, "grad_norm": 0.05851899832487106, "learning_rate": 6.256665375793143e-07, "loss": 0.0238, "step": 891425 }, { "epoch": 8.76, "grad_norm": 11.469046592712402, "learning_rate": 6.255424151250658e-07, "loss": 0.0397, "step": 891450 }, { "epoch": 8.77, "grad_norm": 0.053534332662820816, "learning_rate": 6.254182926708174e-07, "loss": 0.0382, "step": 891475 }, { "epoch": 8.77, "grad_norm": 5.453963756561279, "learning_rate": 6.25294170216569e-07, "loss": 0.0309, "step": 891500 }, { "epoch": 8.77, "grad_norm": 0.0639263242483139, "learning_rate": 6.251700477623204e-07, "loss": 0.0381, "step": 891525 }, { "epoch": 8.77, "grad_norm": 9.895431518554688, "learning_rate": 6.250459253080719e-07, "loss": 0.021, "step": 891550 }, { "epoch": 8.77, "grad_norm": 3.6760005950927734, "learning_rate": 6.249218028538235e-07, "loss": 0.0201, "step": 891575 }, { "epoch": 8.77, "grad_norm": 11.660737991333008, "learning_rate": 6.247976803995751e-07, "loss": 0.046, "step": 891600 }, { "epoch": 8.77, "grad_norm": 0.4018914997577667, "learning_rate": 6.246735579453266e-07, "loss": 0.0228, "step": 891625 }, { "epoch": 8.77, "grad_norm": 11.497135162353516, "learning_rate": 6.245494354910781e-07, "loss": 0.0289, "step": 891650 }, { "epoch": 8.77, "grad_norm": 0.18234883248806, "learning_rate": 6.244253130368297e-07, "loss": 0.038, "step": 891675 }, { "epoch": 8.77, "grad_norm": 7.929600715637207, "learning_rate": 6.243011905825812e-07, "loss": 0.0426, "step": 891700 }, { "epoch": 8.77, "grad_norm": 1.595997929573059, "learning_rate": 6.241770681283327e-07, "loss": 0.0429, "step": 891725 }, { "epoch": 8.77, "grad_norm": 6.223890781402588, "learning_rate": 6.240529456740844e-07, "loss": 0.0266, "step": 891750 }, { "epoch": 8.77, "grad_norm": 0.02895720861852169, "learning_rate": 6.239288232198358e-07, "loss": 0.0401, "step": 891775 }, { "epoch": 8.77, "grad_norm": 0.4907456040382385, "learning_rate": 6.238047007655874e-07, "loss": 0.0417, "step": 891800 }, { "epoch": 8.77, "grad_norm": 0.02810024842619896, "learning_rate": 6.236805783113388e-07, "loss": 0.0474, "step": 891825 }, { "epoch": 8.77, "grad_norm": 1.3798750638961792, "learning_rate": 6.235564558570905e-07, "loss": 0.0413, "step": 891850 }, { "epoch": 8.77, "grad_norm": 5.311153411865234, "learning_rate": 6.234323334028419e-07, "loss": 0.033, "step": 891875 }, { "epoch": 8.77, "grad_norm": 14.15562629699707, "learning_rate": 6.233082109485935e-07, "loss": 0.0261, "step": 891900 }, { "epoch": 8.77, "grad_norm": 0.1985960155725479, "learning_rate": 6.231840884943451e-07, "loss": 0.0397, "step": 891925 }, { "epoch": 8.77, "grad_norm": 1.2392230033874512, "learning_rate": 6.230599660400966e-07, "loss": 0.0239, "step": 891950 }, { "epoch": 8.77, "grad_norm": 0.7156880497932434, "learning_rate": 6.229358435858481e-07, "loss": 0.0268, "step": 891975 }, { "epoch": 8.77, "grad_norm": 4.9607462882995605, "learning_rate": 6.228117211315996e-07, "loss": 0.0357, "step": 892000 }, { "epoch": 8.77, "grad_norm": 0.16057899594306946, "learning_rate": 6.226875986773512e-07, "loss": 0.0317, "step": 892025 }, { "epoch": 8.77, "grad_norm": 1.8070259094238281, "learning_rate": 6.225634762231027e-07, "loss": 0.0267, "step": 892050 }, { "epoch": 8.77, "grad_norm": 0.036039214581251144, "learning_rate": 6.224393537688542e-07, "loss": 0.0468, "step": 892075 }, { "epoch": 8.77, "grad_norm": 4.954885482788086, "learning_rate": 6.223152313146058e-07, "loss": 0.0259, "step": 892100 }, { "epoch": 8.77, "grad_norm": 0.012876372784376144, "learning_rate": 6.221911088603573e-07, "loss": 0.0269, "step": 892125 }, { "epoch": 8.77, "grad_norm": 6.989842891693115, "learning_rate": 6.220669864061088e-07, "loss": 0.0364, "step": 892150 }, { "epoch": 8.77, "grad_norm": 1.2368730306625366, "learning_rate": 6.219428639518605e-07, "loss": 0.0423, "step": 892175 }, { "epoch": 8.77, "grad_norm": 11.542620658874512, "learning_rate": 6.218187414976119e-07, "loss": 0.0292, "step": 892200 }, { "epoch": 8.77, "grad_norm": 0.0031442176550626755, "learning_rate": 6.216946190433635e-07, "loss": 0.0256, "step": 892225 }, { "epoch": 8.77, "grad_norm": 3.343736410140991, "learning_rate": 6.21570496589115e-07, "loss": 0.0213, "step": 892250 }, { "epoch": 8.77, "grad_norm": 0.011590932495892048, "learning_rate": 6.214463741348666e-07, "loss": 0.0414, "step": 892275 }, { "epoch": 8.77, "grad_norm": 4.568888187408447, "learning_rate": 6.213222516806181e-07, "loss": 0.0118, "step": 892300 }, { "epoch": 8.77, "grad_norm": 0.23343758285045624, "learning_rate": 6.211981292263696e-07, "loss": 0.0378, "step": 892325 }, { "epoch": 8.77, "grad_norm": 29.807649612426758, "learning_rate": 6.210740067721212e-07, "loss": 0.0347, "step": 892350 }, { "epoch": 8.77, "grad_norm": 3.3568131923675537, "learning_rate": 6.209498843178727e-07, "loss": 0.0279, "step": 892375 }, { "epoch": 8.77, "grad_norm": 7.510730743408203, "learning_rate": 6.208257618636242e-07, "loss": 0.0174, "step": 892400 }, { "epoch": 8.77, "grad_norm": 0.15105530619621277, "learning_rate": 6.207016394093757e-07, "loss": 0.0463, "step": 892425 }, { "epoch": 8.77, "grad_norm": 2.6536052227020264, "learning_rate": 6.205775169551273e-07, "loss": 0.0327, "step": 892450 }, { "epoch": 8.77, "grad_norm": 0.1795230358839035, "learning_rate": 6.204533945008789e-07, "loss": 0.0364, "step": 892475 }, { "epoch": 8.78, "grad_norm": 9.53732967376709, "learning_rate": 6.203292720466303e-07, "loss": 0.0274, "step": 892500 }, { "epoch": 8.78, "grad_norm": 0.03851306810975075, "learning_rate": 6.20205149592382e-07, "loss": 0.0367, "step": 892525 }, { "epoch": 8.78, "grad_norm": 11.87699031829834, "learning_rate": 6.200810271381334e-07, "loss": 0.0149, "step": 892550 }, { "epoch": 8.78, "grad_norm": 1.8297008275985718, "learning_rate": 6.19956904683885e-07, "loss": 0.0536, "step": 892575 }, { "epoch": 8.78, "grad_norm": 14.05364990234375, "learning_rate": 6.198327822296366e-07, "loss": 0.0331, "step": 892600 }, { "epoch": 8.78, "grad_norm": 0.041975077241659164, "learning_rate": 6.197086597753881e-07, "loss": 0.0271, "step": 892625 }, { "epoch": 8.78, "grad_norm": 1.9103889465332031, "learning_rate": 6.195845373211396e-07, "loss": 0.0256, "step": 892650 }, { "epoch": 8.78, "grad_norm": 0.02846892550587654, "learning_rate": 6.194604148668911e-07, "loss": 0.0277, "step": 892675 }, { "epoch": 8.78, "grad_norm": 9.882482528686523, "learning_rate": 6.193362924126427e-07, "loss": 0.0274, "step": 892700 }, { "epoch": 8.78, "grad_norm": 1.178804636001587, "learning_rate": 6.192121699583942e-07, "loss": 0.0303, "step": 892725 }, { "epoch": 8.78, "grad_norm": 8.999496459960938, "learning_rate": 6.190880475041457e-07, "loss": 0.0272, "step": 892750 }, { "epoch": 8.78, "grad_norm": 0.05452491343021393, "learning_rate": 6.189639250498973e-07, "loss": 0.042, "step": 892775 }, { "epoch": 8.78, "grad_norm": 3.9134552478790283, "learning_rate": 6.188398025956488e-07, "loss": 0.0183, "step": 892800 }, { "epoch": 8.78, "grad_norm": 0.09854976832866669, "learning_rate": 6.187156801414003e-07, "loss": 0.0166, "step": 892825 }, { "epoch": 8.78, "grad_norm": 1.7558480501174927, "learning_rate": 6.185915576871518e-07, "loss": 0.0262, "step": 892850 }, { "epoch": 8.78, "grad_norm": 2.967374563217163, "learning_rate": 6.184674352329034e-07, "loss": 0.0486, "step": 892875 }, { "epoch": 8.78, "grad_norm": 1.6318159103393555, "learning_rate": 6.18343312778655e-07, "loss": 0.0326, "step": 892900 }, { "epoch": 8.78, "grad_norm": 0.07501034438610077, "learning_rate": 6.182191903244064e-07, "loss": 0.0333, "step": 892925 }, { "epoch": 8.78, "grad_norm": 15.263277053833008, "learning_rate": 6.180950678701581e-07, "loss": 0.025, "step": 892950 }, { "epoch": 8.78, "grad_norm": 0.5450663566589355, "learning_rate": 6.179709454159096e-07, "loss": 0.0282, "step": 892975 }, { "epoch": 8.78, "grad_norm": 3.1670331954956055, "learning_rate": 6.178468229616611e-07, "loss": 0.0257, "step": 893000 }, { "epoch": 8.78, "grad_norm": 0.031458206474781036, "learning_rate": 6.177227005074127e-07, "loss": 0.0122, "step": 893025 }, { "epoch": 8.78, "grad_norm": 7.669879913330078, "learning_rate": 6.175985780531642e-07, "loss": 0.0271, "step": 893050 }, { "epoch": 8.78, "grad_norm": 0.14346936345100403, "learning_rate": 6.174744555989157e-07, "loss": 0.0448, "step": 893075 }, { "epoch": 8.78, "grad_norm": 7.745973110198975, "learning_rate": 6.173503331446672e-07, "loss": 0.0146, "step": 893100 }, { "epoch": 8.78, "grad_norm": 8.9412202835083, "learning_rate": 6.172262106904188e-07, "loss": 0.0342, "step": 893125 }, { "epoch": 8.78, "grad_norm": 14.951078414916992, "learning_rate": 6.171020882361703e-07, "loss": 0.0329, "step": 893150 }, { "epoch": 8.78, "grad_norm": 2.105999708175659, "learning_rate": 6.169779657819218e-07, "loss": 0.0376, "step": 893175 }, { "epoch": 8.78, "grad_norm": 13.891400337219238, "learning_rate": 6.168538433276735e-07, "loss": 0.0374, "step": 893200 }, { "epoch": 8.78, "grad_norm": 1.5671707391738892, "learning_rate": 6.167297208734249e-07, "loss": 0.0269, "step": 893225 }, { "epoch": 8.78, "grad_norm": 6.834671974182129, "learning_rate": 6.166055984191765e-07, "loss": 0.0168, "step": 893250 }, { "epoch": 8.78, "grad_norm": 1.2182978391647339, "learning_rate": 6.164814759649279e-07, "loss": 0.03, "step": 893275 }, { "epoch": 8.78, "grad_norm": 0.5281778573989868, "learning_rate": 6.163573535106796e-07, "loss": 0.0301, "step": 893300 }, { "epoch": 8.78, "grad_norm": 1.6225502490997314, "learning_rate": 6.162332310564311e-07, "loss": 0.0206, "step": 893325 }, { "epoch": 8.78, "grad_norm": 8.126023292541504, "learning_rate": 6.161091086021826e-07, "loss": 0.0374, "step": 893350 }, { "epoch": 8.78, "grad_norm": 0.3084581196308136, "learning_rate": 6.159849861479342e-07, "loss": 0.0326, "step": 893375 }, { "epoch": 8.78, "grad_norm": 14.776534080505371, "learning_rate": 6.158608636936857e-07, "loss": 0.0311, "step": 893400 }, { "epoch": 8.78, "grad_norm": 0.03778167814016342, "learning_rate": 6.157367412394372e-07, "loss": 0.0338, "step": 893425 }, { "epoch": 8.78, "grad_norm": 8.358037948608398, "learning_rate": 6.156126187851888e-07, "loss": 0.0186, "step": 893450 }, { "epoch": 8.78, "grad_norm": 0.0034187932033091784, "learning_rate": 6.154884963309403e-07, "loss": 0.0243, "step": 893475 }, { "epoch": 8.79, "grad_norm": 2.22550106048584, "learning_rate": 6.153643738766918e-07, "loss": 0.0166, "step": 893500 }, { "epoch": 8.79, "grad_norm": 0.5606633424758911, "learning_rate": 6.152402514224433e-07, "loss": 0.0361, "step": 893525 }, { "epoch": 8.79, "grad_norm": 15.57383918762207, "learning_rate": 6.151161289681949e-07, "loss": 0.031, "step": 893550 }, { "epoch": 8.79, "grad_norm": 0.14956332743167877, "learning_rate": 6.149969714121163e-07, "loss": 0.0354, "step": 893575 }, { "epoch": 8.79, "grad_norm": 9.091508865356445, "learning_rate": 6.14872848957868e-07, "loss": 0.0346, "step": 893600 }, { "epoch": 8.79, "grad_norm": 1.4727391004562378, "learning_rate": 6.147487265036194e-07, "loss": 0.0201, "step": 893625 }, { "epoch": 8.79, "grad_norm": 19.59851837158203, "learning_rate": 6.14624604049371e-07, "loss": 0.0248, "step": 893650 }, { "epoch": 8.79, "grad_norm": 7.073828220367432, "learning_rate": 6.145004815951225e-07, "loss": 0.0528, "step": 893675 }, { "epoch": 8.79, "grad_norm": 5.3934454917907715, "learning_rate": 6.143763591408741e-07, "loss": 0.0312, "step": 893700 }, { "epoch": 8.79, "grad_norm": 6.417356967926025, "learning_rate": 6.142522366866256e-07, "loss": 0.0314, "step": 893725 }, { "epoch": 8.79, "grad_norm": 15.628540992736816, "learning_rate": 6.141281142323771e-07, "loss": 0.0248, "step": 893750 }, { "epoch": 8.79, "grad_norm": 0.22303690016269684, "learning_rate": 6.140039917781287e-07, "loss": 0.0288, "step": 893775 }, { "epoch": 8.79, "grad_norm": 6.060579776763916, "learning_rate": 6.138798693238803e-07, "loss": 0.0291, "step": 893800 }, { "epoch": 8.79, "grad_norm": 0.10657896101474762, "learning_rate": 6.137557468696317e-07, "loss": 0.0253, "step": 893825 }, { "epoch": 8.79, "grad_norm": 2.9888579845428467, "learning_rate": 6.136316244153834e-07, "loss": 0.0195, "step": 893850 }, { "epoch": 8.79, "grad_norm": 0.5495550632476807, "learning_rate": 6.135075019611348e-07, "loss": 0.0289, "step": 893875 }, { "epoch": 8.79, "grad_norm": 4.046561241149902, "learning_rate": 6.133833795068864e-07, "loss": 0.031, "step": 893900 }, { "epoch": 8.79, "grad_norm": 3.2187440395355225, "learning_rate": 6.132592570526379e-07, "loss": 0.0296, "step": 893925 }, { "epoch": 8.79, "grad_norm": 6.762036323547363, "learning_rate": 6.131351345983895e-07, "loss": 0.0257, "step": 893950 }, { "epoch": 8.79, "grad_norm": 0.14896757900714874, "learning_rate": 6.13011012144141e-07, "loss": 0.0277, "step": 893975 }, { "epoch": 8.79, "grad_norm": 0.2942734360694885, "learning_rate": 6.128868896898925e-07, "loss": 0.0207, "step": 894000 }, { "epoch": 8.79, "grad_norm": 0.014397288672626019, "learning_rate": 6.127627672356441e-07, "loss": 0.0325, "step": 894025 }, { "epoch": 8.79, "grad_norm": 11.541435241699219, "learning_rate": 6.126386447813956e-07, "loss": 0.0397, "step": 894050 }, { "epoch": 8.79, "grad_norm": 0.03201603516936302, "learning_rate": 6.125145223271471e-07, "loss": 0.0457, "step": 894075 }, { "epoch": 8.79, "grad_norm": 11.411417961120605, "learning_rate": 6.123903998728986e-07, "loss": 0.0189, "step": 894100 }, { "epoch": 8.79, "grad_norm": 0.012370252050459385, "learning_rate": 6.122662774186502e-07, "loss": 0.0387, "step": 894125 }, { "epoch": 8.79, "grad_norm": 4.632658004760742, "learning_rate": 6.121421549644017e-07, "loss": 0.0241, "step": 894150 }, { "epoch": 8.79, "grad_norm": 2.476001262664795, "learning_rate": 6.120180325101532e-07, "loss": 0.0372, "step": 894175 }, { "epoch": 8.79, "grad_norm": 6.588235378265381, "learning_rate": 6.118939100559048e-07, "loss": 0.0253, "step": 894200 }, { "epoch": 8.79, "grad_norm": 0.2666773200035095, "learning_rate": 6.117697876016564e-07, "loss": 0.0384, "step": 894225 }, { "epoch": 8.79, "grad_norm": 11.156538009643555, "learning_rate": 6.116456651474078e-07, "loss": 0.0323, "step": 894250 }, { "epoch": 8.79, "grad_norm": 0.17848049104213715, "learning_rate": 6.115215426931595e-07, "loss": 0.0389, "step": 894275 }, { "epoch": 8.79, "grad_norm": 4.340307712554932, "learning_rate": 6.113974202389109e-07, "loss": 0.0227, "step": 894300 }, { "epoch": 8.79, "grad_norm": 0.2879691421985626, "learning_rate": 6.112732977846625e-07, "loss": 0.0181, "step": 894325 }, { "epoch": 8.79, "grad_norm": 10.51986312866211, "learning_rate": 6.11149175330414e-07, "loss": 0.0318, "step": 894350 }, { "epoch": 8.79, "grad_norm": 0.5723971128463745, "learning_rate": 6.110250528761656e-07, "loss": 0.0187, "step": 894375 }, { "epoch": 8.79, "grad_norm": 18.663755416870117, "learning_rate": 6.109009304219171e-07, "loss": 0.023, "step": 894400 }, { "epoch": 8.79, "grad_norm": 6.584501266479492, "learning_rate": 6.107768079676686e-07, "loss": 0.0451, "step": 894425 }, { "epoch": 8.79, "grad_norm": 9.572281837463379, "learning_rate": 6.106526855134202e-07, "loss": 0.0293, "step": 894450 }, { "epoch": 8.79, "grad_norm": 0.34033167362213135, "learning_rate": 6.105285630591717e-07, "loss": 0.0348, "step": 894475 }, { "epoch": 8.79, "grad_norm": 6.715098857879639, "learning_rate": 6.104044406049232e-07, "loss": 0.022, "step": 894500 }, { "epoch": 8.8, "grad_norm": 0.13748373091220856, "learning_rate": 6.102803181506749e-07, "loss": 0.0234, "step": 894525 }, { "epoch": 8.8, "grad_norm": 5.501849174499512, "learning_rate": 6.101561956964263e-07, "loss": 0.0215, "step": 894550 }, { "epoch": 8.8, "grad_norm": 0.054911598563194275, "learning_rate": 6.100320732421779e-07, "loss": 0.031, "step": 894575 }, { "epoch": 8.8, "grad_norm": 8.750287055969238, "learning_rate": 6.099079507879293e-07, "loss": 0.0167, "step": 894600 }, { "epoch": 8.8, "grad_norm": 0.14117613434791565, "learning_rate": 6.09783828333681e-07, "loss": 0.0297, "step": 894625 }, { "epoch": 8.8, "grad_norm": 11.376325607299805, "learning_rate": 6.096597058794325e-07, "loss": 0.0417, "step": 894650 }, { "epoch": 8.8, "grad_norm": 1.9547237157821655, "learning_rate": 6.09535583425184e-07, "loss": 0.0362, "step": 894675 }, { "epoch": 8.8, "grad_norm": 15.53499698638916, "learning_rate": 6.094114609709356e-07, "loss": 0.0219, "step": 894700 }, { "epoch": 8.8, "grad_norm": 0.05148385837674141, "learning_rate": 6.092873385166871e-07, "loss": 0.0327, "step": 894725 }, { "epoch": 8.8, "grad_norm": 10.422113418579102, "learning_rate": 6.091632160624386e-07, "loss": 0.0184, "step": 894750 }, { "epoch": 8.8, "grad_norm": 1.8584638833999634, "learning_rate": 6.090390936081901e-07, "loss": 0.0384, "step": 894775 }, { "epoch": 8.8, "grad_norm": 11.60620403289795, "learning_rate": 6.089149711539417e-07, "loss": 0.0267, "step": 894800 }, { "epoch": 8.8, "grad_norm": 1.8986132144927979, "learning_rate": 6.087908486996932e-07, "loss": 0.0399, "step": 894825 }, { "epoch": 8.8, "grad_norm": 2.987785816192627, "learning_rate": 6.086667262454447e-07, "loss": 0.0302, "step": 894850 }, { "epoch": 8.8, "grad_norm": 0.3496997654438019, "learning_rate": 6.085426037911963e-07, "loss": 0.0422, "step": 894875 }, { "epoch": 8.8, "grad_norm": 8.94513988494873, "learning_rate": 6.084184813369478e-07, "loss": 0.0369, "step": 894900 }, { "epoch": 8.8, "grad_norm": 0.03880154713988304, "learning_rate": 6.082943588826993e-07, "loss": 0.0327, "step": 894925 }, { "epoch": 8.8, "grad_norm": 2.8421554565429688, "learning_rate": 6.08170236428451e-07, "loss": 0.0294, "step": 894950 }, { "epoch": 8.8, "grad_norm": 1.4422409534454346, "learning_rate": 6.080461139742024e-07, "loss": 0.0477, "step": 894975 }, { "epoch": 8.8, "grad_norm": 10.56800651550293, "learning_rate": 6.07921991519954e-07, "loss": 0.0304, "step": 895000 }, { "epoch": 8.8, "grad_norm": 0.041205182671546936, "learning_rate": 6.077978690657054e-07, "loss": 0.022, "step": 895025 }, { "epoch": 8.8, "grad_norm": 10.67298412322998, "learning_rate": 6.076737466114571e-07, "loss": 0.0226, "step": 895050 }, { "epoch": 8.8, "grad_norm": 0.11585497111082077, "learning_rate": 6.075496241572086e-07, "loss": 0.0441, "step": 895075 }, { "epoch": 8.8, "grad_norm": 0.6781600713729858, "learning_rate": 6.074255017029601e-07, "loss": 0.0149, "step": 895100 }, { "epoch": 8.8, "grad_norm": 0.016540950164198875, "learning_rate": 6.073013792487117e-07, "loss": 0.0208, "step": 895125 }, { "epoch": 8.8, "grad_norm": 10.559366226196289, "learning_rate": 6.071772567944632e-07, "loss": 0.0259, "step": 895150 }, { "epoch": 8.8, "grad_norm": 3.7580454349517822, "learning_rate": 6.070531343402147e-07, "loss": 0.0467, "step": 895175 }, { "epoch": 8.8, "grad_norm": 10.48797607421875, "learning_rate": 6.069290118859663e-07, "loss": 0.0199, "step": 895200 }, { "epoch": 8.8, "grad_norm": 0.22805650532245636, "learning_rate": 6.068048894317178e-07, "loss": 0.0228, "step": 895225 }, { "epoch": 8.8, "grad_norm": 7.88247013092041, "learning_rate": 6.066807669774694e-07, "loss": 0.034, "step": 895250 }, { "epoch": 8.8, "grad_norm": 0.05633668974041939, "learning_rate": 6.065566445232208e-07, "loss": 0.0259, "step": 895275 }, { "epoch": 8.8, "grad_norm": 5.974642753601074, "learning_rate": 6.064325220689725e-07, "loss": 0.0398, "step": 895300 }, { "epoch": 8.8, "grad_norm": 1.6150319576263428, "learning_rate": 6.06308399614724e-07, "loss": 0.0236, "step": 895325 }, { "epoch": 8.8, "grad_norm": 2.267714262008667, "learning_rate": 6.061842771604755e-07, "loss": 0.0139, "step": 895350 }, { "epoch": 8.8, "grad_norm": 0.11724712699651718, "learning_rate": 6.06060154706227e-07, "loss": 0.0516, "step": 895375 }, { "epoch": 8.8, "grad_norm": 7.521982192993164, "learning_rate": 6.059360322519786e-07, "loss": 0.0213, "step": 895400 }, { "epoch": 8.8, "grad_norm": 0.015858426690101624, "learning_rate": 6.058119097977301e-07, "loss": 0.0392, "step": 895425 }, { "epoch": 8.8, "grad_norm": 3.254615545272827, "learning_rate": 6.056877873434816e-07, "loss": 0.0332, "step": 895450 }, { "epoch": 8.8, "grad_norm": 1.5768510103225708, "learning_rate": 6.055636648892332e-07, "loss": 0.0317, "step": 895475 }, { "epoch": 8.8, "grad_norm": 11.62501049041748, "learning_rate": 6.054395424349847e-07, "loss": 0.0163, "step": 895500 }, { "epoch": 8.8, "grad_norm": 0.09634136408567429, "learning_rate": 6.053154199807362e-07, "loss": 0.0282, "step": 895525 }, { "epoch": 8.81, "grad_norm": 0.855615496635437, "learning_rate": 6.051912975264878e-07, "loss": 0.0363, "step": 895550 }, { "epoch": 8.81, "grad_norm": 0.01760016940534115, "learning_rate": 6.050671750722393e-07, "loss": 0.0387, "step": 895575 }, { "epoch": 8.81, "grad_norm": 14.040287971496582, "learning_rate": 6.049430526179908e-07, "loss": 0.0238, "step": 895600 }, { "epoch": 8.81, "grad_norm": 1.079015851020813, "learning_rate": 6.048238950619123e-07, "loss": 0.0261, "step": 895625 }, { "epoch": 8.81, "grad_norm": 1.93276846408844, "learning_rate": 6.046997726076639e-07, "loss": 0.0332, "step": 895650 }, { "epoch": 8.81, "grad_norm": 0.914376437664032, "learning_rate": 6.045756501534154e-07, "loss": 0.0258, "step": 895675 }, { "epoch": 8.81, "grad_norm": 8.507271766662598, "learning_rate": 6.04451527699167e-07, "loss": 0.0285, "step": 895700 }, { "epoch": 8.81, "grad_norm": 0.036887865513563156, "learning_rate": 6.043274052449185e-07, "loss": 0.0257, "step": 895725 }, { "epoch": 8.81, "grad_norm": 7.650245666503906, "learning_rate": 6.0420328279067e-07, "loss": 0.0206, "step": 895750 }, { "epoch": 8.81, "grad_norm": 0.02383435145020485, "learning_rate": 6.040791603364215e-07, "loss": 0.0416, "step": 895775 }, { "epoch": 8.81, "grad_norm": 8.88055419921875, "learning_rate": 6.039550378821731e-07, "loss": 0.0222, "step": 895800 }, { "epoch": 8.81, "grad_norm": 1.0870721340179443, "learning_rate": 6.038309154279246e-07, "loss": 0.0271, "step": 895825 }, { "epoch": 8.81, "grad_norm": 1.4440486431121826, "learning_rate": 6.037067929736761e-07, "loss": 0.018, "step": 895850 }, { "epoch": 8.81, "grad_norm": 0.3755880296230316, "learning_rate": 6.035826705194277e-07, "loss": 0.041, "step": 895875 }, { "epoch": 8.81, "grad_norm": 18.7598819732666, "learning_rate": 6.034585480651792e-07, "loss": 0.0424, "step": 895900 }, { "epoch": 8.81, "grad_norm": 0.2765401601791382, "learning_rate": 6.033344256109307e-07, "loss": 0.0278, "step": 895925 }, { "epoch": 8.81, "grad_norm": 1.4705525636672974, "learning_rate": 6.032103031566824e-07, "loss": 0.021, "step": 895950 }, { "epoch": 8.81, "grad_norm": 0.015512630343437195, "learning_rate": 6.030861807024339e-07, "loss": 0.0379, "step": 895975 }, { "epoch": 8.81, "grad_norm": 0.3136013448238373, "learning_rate": 6.029620582481854e-07, "loss": 0.0286, "step": 896000 }, { "epoch": 8.81, "grad_norm": 35.53017807006836, "learning_rate": 6.028379357939369e-07, "loss": 0.0333, "step": 896025 }, { "epoch": 8.81, "grad_norm": 13.279983520507812, "learning_rate": 6.027138133396885e-07, "loss": 0.0373, "step": 896050 }, { "epoch": 8.81, "grad_norm": 0.3073752224445343, "learning_rate": 6.0258969088544e-07, "loss": 0.0381, "step": 896075 }, { "epoch": 8.81, "grad_norm": 6.445417881011963, "learning_rate": 6.024655684311915e-07, "loss": 0.0379, "step": 896100 }, { "epoch": 8.81, "grad_norm": 1.757728099822998, "learning_rate": 6.023414459769431e-07, "loss": 0.0393, "step": 896125 }, { "epoch": 8.81, "grad_norm": 7.447018146514893, "learning_rate": 6.022173235226946e-07, "loss": 0.011, "step": 896150 }, { "epoch": 8.81, "grad_norm": 0.35190072655677795, "learning_rate": 6.020932010684461e-07, "loss": 0.0433, "step": 896175 }, { "epoch": 8.81, "grad_norm": 0.4346681535243988, "learning_rate": 6.019690786141976e-07, "loss": 0.0286, "step": 896200 }, { "epoch": 8.81, "grad_norm": 0.12103263288736343, "learning_rate": 6.018449561599493e-07, "loss": 0.0512, "step": 896225 }, { "epoch": 8.81, "grad_norm": 10.590993881225586, "learning_rate": 6.017208337057007e-07, "loss": 0.0368, "step": 896250 }, { "epoch": 8.81, "grad_norm": 1.3546196222305298, "learning_rate": 6.015967112514523e-07, "loss": 0.0336, "step": 896275 }, { "epoch": 8.81, "grad_norm": 2.1090738773345947, "learning_rate": 6.014725887972038e-07, "loss": 0.0263, "step": 896300 }, { "epoch": 8.81, "grad_norm": 0.07385177910327911, "learning_rate": 6.013484663429554e-07, "loss": 0.024, "step": 896325 }, { "epoch": 8.81, "grad_norm": 6.864266395568848, "learning_rate": 6.012243438887068e-07, "loss": 0.0314, "step": 896350 }, { "epoch": 8.81, "grad_norm": 0.1553407907485962, "learning_rate": 6.011002214344585e-07, "loss": 0.0302, "step": 896375 }, { "epoch": 8.81, "grad_norm": 13.657344818115234, "learning_rate": 6.0097609898021e-07, "loss": 0.0247, "step": 896400 }, { "epoch": 8.81, "grad_norm": 0.005885659251362085, "learning_rate": 6.008519765259615e-07, "loss": 0.0441, "step": 896425 }, { "epoch": 8.81, "grad_norm": 4.377810001373291, "learning_rate": 6.00727854071713e-07, "loss": 0.0335, "step": 896450 }, { "epoch": 8.81, "grad_norm": 0.06929565221071243, "learning_rate": 6.006037316174646e-07, "loss": 0.0337, "step": 896475 }, { "epoch": 8.81, "grad_norm": 18.752216339111328, "learning_rate": 6.004796091632161e-07, "loss": 0.0289, "step": 896500 }, { "epoch": 8.81, "grad_norm": 12.918031692504883, "learning_rate": 6.003554867089676e-07, "loss": 0.0363, "step": 896525 }, { "epoch": 8.82, "grad_norm": 15.156122207641602, "learning_rate": 6.002313642547192e-07, "loss": 0.0242, "step": 896550 }, { "epoch": 8.82, "grad_norm": 0.047259457409381866, "learning_rate": 6.001072418004707e-07, "loss": 0.0467, "step": 896575 }, { "epoch": 8.82, "grad_norm": 9.84130859375, "learning_rate": 5.999831193462222e-07, "loss": 0.0148, "step": 896600 }, { "epoch": 8.82, "grad_norm": 0.2706244885921478, "learning_rate": 5.998589968919739e-07, "loss": 0.0241, "step": 896625 }, { "epoch": 8.82, "grad_norm": 13.505280494689941, "learning_rate": 5.997348744377254e-07, "loss": 0.0287, "step": 896650 }, { "epoch": 8.82, "grad_norm": 8.100080490112305, "learning_rate": 5.996107519834769e-07, "loss": 0.0314, "step": 896675 }, { "epoch": 8.82, "grad_norm": 14.464370727539062, "learning_rate": 5.994866295292284e-07, "loss": 0.0432, "step": 896700 }, { "epoch": 8.82, "grad_norm": 0.027384497225284576, "learning_rate": 5.9936250707498e-07, "loss": 0.0201, "step": 896725 }, { "epoch": 8.82, "grad_norm": 14.586440086364746, "learning_rate": 5.992383846207315e-07, "loss": 0.0281, "step": 896750 }, { "epoch": 8.82, "grad_norm": 0.8307069540023804, "learning_rate": 5.99114262166483e-07, "loss": 0.0278, "step": 896775 }, { "epoch": 8.82, "grad_norm": 15.865921020507812, "learning_rate": 5.989901397122346e-07, "loss": 0.0398, "step": 896800 }, { "epoch": 8.82, "grad_norm": 0.3631603717803955, "learning_rate": 5.988660172579861e-07, "loss": 0.027, "step": 896825 }, { "epoch": 8.82, "grad_norm": 8.336530685424805, "learning_rate": 5.987418948037376e-07, "loss": 0.0322, "step": 896850 }, { "epoch": 8.82, "grad_norm": 2.759833812713623, "learning_rate": 5.986177723494891e-07, "loss": 0.0528, "step": 896875 }, { "epoch": 8.82, "grad_norm": 0.7256322503089905, "learning_rate": 5.984936498952407e-07, "loss": 0.0355, "step": 896900 }, { "epoch": 8.82, "grad_norm": 2.0153043270111084, "learning_rate": 5.983695274409922e-07, "loss": 0.0426, "step": 896925 }, { "epoch": 8.82, "grad_norm": 2.8819150924682617, "learning_rate": 5.982454049867438e-07, "loss": 0.0502, "step": 896950 }, { "epoch": 8.82, "grad_norm": 0.014809804037213326, "learning_rate": 5.981212825324953e-07, "loss": 0.0444, "step": 896975 }, { "epoch": 8.82, "grad_norm": 6.7399516105651855, "learning_rate": 5.979971600782469e-07, "loss": 0.03, "step": 897000 }, { "epoch": 8.82, "grad_norm": 8.171599388122559, "learning_rate": 5.978730376239983e-07, "loss": 0.0549, "step": 897025 }, { "epoch": 8.82, "grad_norm": 10.672667503356934, "learning_rate": 5.9774891516975e-07, "loss": 0.0111, "step": 897050 }, { "epoch": 8.82, "grad_norm": 0.016828840598464012, "learning_rate": 5.976247927155015e-07, "loss": 0.0283, "step": 897075 }, { "epoch": 8.82, "grad_norm": 0.28046125173568726, "learning_rate": 5.97500670261253e-07, "loss": 0.0213, "step": 897100 }, { "epoch": 8.82, "grad_norm": 0.010989023372530937, "learning_rate": 5.973765478070045e-07, "loss": 0.04, "step": 897125 }, { "epoch": 8.82, "grad_norm": 0.4369089901447296, "learning_rate": 5.972524253527561e-07, "loss": 0.0247, "step": 897150 }, { "epoch": 8.82, "grad_norm": 3.4908664226531982, "learning_rate": 5.971283028985076e-07, "loss": 0.0318, "step": 897175 }, { "epoch": 8.82, "grad_norm": 2.895874500274658, "learning_rate": 5.970041804442591e-07, "loss": 0.0182, "step": 897200 }, { "epoch": 8.82, "grad_norm": 0.0015663292724639177, "learning_rate": 5.968800579900107e-07, "loss": 0.0366, "step": 897225 }, { "epoch": 8.82, "grad_norm": 8.032487869262695, "learning_rate": 5.967559355357622e-07, "loss": 0.0287, "step": 897250 }, { "epoch": 8.82, "grad_norm": 0.14247135818004608, "learning_rate": 5.966318130815137e-07, "loss": 0.0561, "step": 897275 }, { "epoch": 8.82, "grad_norm": 8.277743339538574, "learning_rate": 5.965076906272652e-07, "loss": 0.0413, "step": 897300 }, { "epoch": 8.82, "grad_norm": 0.007132869679480791, "learning_rate": 5.963835681730168e-07, "loss": 0.0465, "step": 897325 }, { "epoch": 8.82, "grad_norm": 6.533717155456543, "learning_rate": 5.962594457187684e-07, "loss": 0.0299, "step": 897350 }, { "epoch": 8.82, "grad_norm": 0.10266193747520447, "learning_rate": 5.961353232645199e-07, "loss": 0.0427, "step": 897375 }, { "epoch": 8.82, "grad_norm": 0.16242757439613342, "learning_rate": 5.960112008102715e-07, "loss": 0.0185, "step": 897400 }, { "epoch": 8.82, "grad_norm": 0.014039133675396442, "learning_rate": 5.95887078356023e-07, "loss": 0.0292, "step": 897425 }, { "epoch": 8.82, "grad_norm": 12.811853408813477, "learning_rate": 5.957629559017745e-07, "loss": 0.0244, "step": 897450 }, { "epoch": 8.82, "grad_norm": 0.08592046052217484, "learning_rate": 5.956388334475261e-07, "loss": 0.0302, "step": 897475 }, { "epoch": 8.82, "grad_norm": 11.96944808959961, "learning_rate": 5.955147109932776e-07, "loss": 0.0177, "step": 897500 }, { "epoch": 8.82, "grad_norm": 0.5470450520515442, "learning_rate": 5.953905885390291e-07, "loss": 0.0267, "step": 897525 }, { "epoch": 8.82, "grad_norm": 19.070905685424805, "learning_rate": 5.952664660847806e-07, "loss": 0.0327, "step": 897550 }, { "epoch": 8.83, "grad_norm": 1.8621768951416016, "learning_rate": 5.951423436305322e-07, "loss": 0.0314, "step": 897575 }, { "epoch": 8.83, "grad_norm": 1.8662633895874023, "learning_rate": 5.950182211762837e-07, "loss": 0.0424, "step": 897600 }, { "epoch": 8.83, "grad_norm": 1.8577971458435059, "learning_rate": 5.948940987220352e-07, "loss": 0.0346, "step": 897625 }, { "epoch": 8.83, "grad_norm": 14.976700782775879, "learning_rate": 5.947699762677868e-07, "loss": 0.0348, "step": 897650 }, { "epoch": 8.83, "grad_norm": 0.8245441913604736, "learning_rate": 5.946458538135384e-07, "loss": 0.0503, "step": 897675 }, { "epoch": 8.83, "grad_norm": 7.7378926277160645, "learning_rate": 5.945217313592898e-07, "loss": 0.0188, "step": 897700 }, { "epoch": 8.83, "grad_norm": 0.5999670028686523, "learning_rate": 5.943976089050415e-07, "loss": 0.0208, "step": 897725 }, { "epoch": 8.83, "grad_norm": 3.040727376937866, "learning_rate": 5.942734864507929e-07, "loss": 0.0206, "step": 897750 }, { "epoch": 8.83, "grad_norm": 0.1144857332110405, "learning_rate": 5.941493639965445e-07, "loss": 0.0487, "step": 897775 }, { "epoch": 8.83, "grad_norm": 6.801270008087158, "learning_rate": 5.94025241542296e-07, "loss": 0.0233, "step": 897800 }, { "epoch": 8.83, "grad_norm": 3.3731777667999268, "learning_rate": 5.939060839862175e-07, "loss": 0.0403, "step": 897825 }, { "epoch": 8.83, "grad_norm": 9.620047569274902, "learning_rate": 5.93781961531969e-07, "loss": 0.0249, "step": 897850 }, { "epoch": 8.83, "grad_norm": 0.24242527782917023, "learning_rate": 5.936578390777205e-07, "loss": 0.0317, "step": 897875 }, { "epoch": 8.83, "grad_norm": 26.359691619873047, "learning_rate": 5.935337166234721e-07, "loss": 0.0304, "step": 897900 }, { "epoch": 8.83, "grad_norm": 0.010518897324800491, "learning_rate": 5.934095941692236e-07, "loss": 0.0438, "step": 897925 }, { "epoch": 8.83, "grad_norm": 17.027740478515625, "learning_rate": 5.932854717149751e-07, "loss": 0.03, "step": 897950 }, { "epoch": 8.83, "grad_norm": 5.289814472198486, "learning_rate": 5.931613492607268e-07, "loss": 0.0346, "step": 897975 }, { "epoch": 8.83, "grad_norm": 10.422730445861816, "learning_rate": 5.930372268064782e-07, "loss": 0.0352, "step": 898000 }, { "epoch": 8.83, "grad_norm": 0.9077843427658081, "learning_rate": 5.929131043522298e-07, "loss": 0.0363, "step": 898025 }, { "epoch": 8.83, "grad_norm": 10.8787202835083, "learning_rate": 5.927889818979813e-07, "loss": 0.0349, "step": 898050 }, { "epoch": 8.83, "grad_norm": 0.264833927154541, "learning_rate": 5.926648594437329e-07, "loss": 0.0246, "step": 898075 }, { "epoch": 8.83, "grad_norm": 7.507013320922852, "learning_rate": 5.925407369894843e-07, "loss": 0.0347, "step": 898100 }, { "epoch": 8.83, "grad_norm": 0.19150298833847046, "learning_rate": 5.924166145352359e-07, "loss": 0.0436, "step": 898125 }, { "epoch": 8.83, "grad_norm": 0.9120723009109497, "learning_rate": 5.922924920809875e-07, "loss": 0.0348, "step": 898150 }, { "epoch": 8.83, "grad_norm": 0.027738410979509354, "learning_rate": 5.92168369626739e-07, "loss": 0.0278, "step": 898175 }, { "epoch": 8.83, "grad_norm": 9.587312698364258, "learning_rate": 5.920442471724905e-07, "loss": 0.0129, "step": 898200 }, { "epoch": 8.83, "grad_norm": 5.914114952087402, "learning_rate": 5.919201247182421e-07, "loss": 0.0305, "step": 898225 }, { "epoch": 8.83, "grad_norm": 0.3880583941936493, "learning_rate": 5.917960022639936e-07, "loss": 0.0241, "step": 898250 }, { "epoch": 8.83, "grad_norm": 2.092238664627075, "learning_rate": 5.916718798097452e-07, "loss": 0.023, "step": 898275 }, { "epoch": 8.83, "grad_norm": 2.8338942527770996, "learning_rate": 5.915477573554966e-07, "loss": 0.0357, "step": 898300 }, { "epoch": 8.83, "grad_norm": 0.03267739713191986, "learning_rate": 5.914236349012483e-07, "loss": 0.0229, "step": 898325 }, { "epoch": 8.83, "grad_norm": 11.19780445098877, "learning_rate": 5.912995124469997e-07, "loss": 0.0315, "step": 898350 }, { "epoch": 8.83, "grad_norm": 0.04415350407361984, "learning_rate": 5.911753899927513e-07, "loss": 0.0336, "step": 898375 }, { "epoch": 8.83, "grad_norm": 5.724123954772949, "learning_rate": 5.910512675385029e-07, "loss": 0.0239, "step": 898400 }, { "epoch": 8.83, "grad_norm": 0.10696624219417572, "learning_rate": 5.909271450842544e-07, "loss": 0.0267, "step": 898425 }, { "epoch": 8.83, "grad_norm": 1.8752503395080566, "learning_rate": 5.908030226300059e-07, "loss": 0.0196, "step": 898450 }, { "epoch": 8.83, "grad_norm": 4.694226264953613, "learning_rate": 5.906789001757575e-07, "loss": 0.0295, "step": 898475 }, { "epoch": 8.83, "grad_norm": 12.819084167480469, "learning_rate": 5.90554777721509e-07, "loss": 0.0224, "step": 898500 }, { "epoch": 8.83, "grad_norm": 0.02139127627015114, "learning_rate": 5.904306552672605e-07, "loss": 0.0311, "step": 898525 }, { "epoch": 8.83, "grad_norm": 12.093297958374023, "learning_rate": 5.90306532813012e-07, "loss": 0.0342, "step": 898550 }, { "epoch": 8.83, "grad_norm": 0.00891677662730217, "learning_rate": 5.901824103587636e-07, "loss": 0.0341, "step": 898575 }, { "epoch": 8.84, "grad_norm": 5.385546684265137, "learning_rate": 5.900582879045151e-07, "loss": 0.0347, "step": 898600 }, { "epoch": 8.84, "grad_norm": 0.025718295946717262, "learning_rate": 5.899341654502666e-07, "loss": 0.0404, "step": 898625 }, { "epoch": 8.84, "grad_norm": 13.492883682250977, "learning_rate": 5.898100429960182e-07, "loss": 0.0332, "step": 898650 }, { "epoch": 8.84, "grad_norm": 3.252310037612915, "learning_rate": 5.896859205417697e-07, "loss": 0.0243, "step": 898675 }, { "epoch": 8.84, "grad_norm": 14.729256629943848, "learning_rate": 5.895617980875213e-07, "loss": 0.0216, "step": 898700 }, { "epoch": 8.84, "grad_norm": 0.043953582644462585, "learning_rate": 5.894376756332727e-07, "loss": 0.0287, "step": 898725 }, { "epoch": 8.84, "grad_norm": 12.309272766113281, "learning_rate": 5.893135531790244e-07, "loss": 0.0186, "step": 898750 }, { "epoch": 8.84, "grad_norm": 0.11235411465167999, "learning_rate": 5.891894307247758e-07, "loss": 0.0259, "step": 898775 }, { "epoch": 8.84, "grad_norm": 22.131885528564453, "learning_rate": 5.890653082705274e-07, "loss": 0.0203, "step": 898800 }, { "epoch": 8.84, "grad_norm": 0.0787416324019432, "learning_rate": 5.88941185816279e-07, "loss": 0.0212, "step": 898825 }, { "epoch": 8.84, "grad_norm": 1.3107390403747559, "learning_rate": 5.888170633620305e-07, "loss": 0.0232, "step": 898850 }, { "epoch": 8.84, "grad_norm": 0.6878010034561157, "learning_rate": 5.88692940907782e-07, "loss": 0.0336, "step": 898875 }, { "epoch": 8.84, "grad_norm": 4.281646251678467, "learning_rate": 5.885688184535336e-07, "loss": 0.0235, "step": 898900 }, { "epoch": 8.84, "grad_norm": 0.04068557918071747, "learning_rate": 5.884446959992851e-07, "loss": 0.0325, "step": 898925 }, { "epoch": 8.84, "grad_norm": 2.6191890239715576, "learning_rate": 5.883205735450366e-07, "loss": 0.029, "step": 898950 }, { "epoch": 8.84, "grad_norm": 2.8976821899414062, "learning_rate": 5.881964510907881e-07, "loss": 0.0308, "step": 898975 }, { "epoch": 8.84, "grad_norm": 10.976140022277832, "learning_rate": 5.880723286365398e-07, "loss": 0.0191, "step": 899000 }, { "epoch": 8.84, "grad_norm": 1.5758978128433228, "learning_rate": 5.879482061822912e-07, "loss": 0.039, "step": 899025 }, { "epoch": 8.84, "grad_norm": 0.06821168214082718, "learning_rate": 5.878240837280428e-07, "loss": 0.0221, "step": 899050 }, { "epoch": 8.84, "grad_norm": 0.3094070255756378, "learning_rate": 5.876999612737943e-07, "loss": 0.0312, "step": 899075 }, { "epoch": 8.84, "grad_norm": 3.3384931087493896, "learning_rate": 5.875758388195459e-07, "loss": 0.015, "step": 899100 }, { "epoch": 8.84, "grad_norm": 0.023602429777383804, "learning_rate": 5.874517163652974e-07, "loss": 0.0349, "step": 899125 }, { "epoch": 8.84, "grad_norm": 14.368895530700684, "learning_rate": 5.87327593911049e-07, "loss": 0.0223, "step": 899150 }, { "epoch": 8.84, "grad_norm": 0.06075672060251236, "learning_rate": 5.872034714568005e-07, "loss": 0.0284, "step": 899175 }, { "epoch": 8.84, "grad_norm": 1.7099220752716064, "learning_rate": 5.87079349002552e-07, "loss": 0.0245, "step": 899200 }, { "epoch": 8.84, "grad_norm": 1.3686540126800537, "learning_rate": 5.869552265483035e-07, "loss": 0.0278, "step": 899225 }, { "epoch": 8.84, "grad_norm": 19.030778884887695, "learning_rate": 5.868311040940551e-07, "loss": 0.0378, "step": 899250 }, { "epoch": 8.84, "grad_norm": 0.003067944897338748, "learning_rate": 5.867069816398066e-07, "loss": 0.0495, "step": 899275 }, { "epoch": 8.84, "grad_norm": 21.611080169677734, "learning_rate": 5.865828591855581e-07, "loss": 0.0449, "step": 899300 }, { "epoch": 8.84, "grad_norm": 0.016437450423836708, "learning_rate": 5.864587367313097e-07, "loss": 0.0437, "step": 899325 }, { "epoch": 8.84, "grad_norm": 6.182276248931885, "learning_rate": 5.863346142770612e-07, "loss": 0.016, "step": 899350 }, { "epoch": 8.84, "grad_norm": 2.8899667263031006, "learning_rate": 5.862104918228128e-07, "loss": 0.0295, "step": 899375 }, { "epoch": 8.84, "grad_norm": 14.566065788269043, "learning_rate": 5.860863693685642e-07, "loss": 0.0289, "step": 899400 }, { "epoch": 8.84, "grad_norm": 0.09680730104446411, "learning_rate": 5.859622469143159e-07, "loss": 0.0342, "step": 899425 }, { "epoch": 8.84, "grad_norm": 0.9706325531005859, "learning_rate": 5.858381244600673e-07, "loss": 0.0312, "step": 899450 }, { "epoch": 8.84, "grad_norm": 0.19471421837806702, "learning_rate": 5.857140020058189e-07, "loss": 0.0405, "step": 899475 }, { "epoch": 8.84, "grad_norm": 4.879146099090576, "learning_rate": 5.855898795515705e-07, "loss": 0.0363, "step": 899500 }, { "epoch": 8.84, "grad_norm": 0.07805135101079941, "learning_rate": 5.85465757097322e-07, "loss": 0.0325, "step": 899525 }, { "epoch": 8.84, "grad_norm": 0.3538115918636322, "learning_rate": 5.853416346430735e-07, "loss": 0.0162, "step": 899550 }, { "epoch": 8.84, "grad_norm": 0.34092241525650024, "learning_rate": 5.852175121888251e-07, "loss": 0.033, "step": 899575 }, { "epoch": 8.85, "grad_norm": 18.1068115234375, "learning_rate": 5.850933897345766e-07, "loss": 0.0374, "step": 899600 }, { "epoch": 8.85, "grad_norm": 5.363978385925293, "learning_rate": 5.849692672803281e-07, "loss": 0.0299, "step": 899625 }, { "epoch": 8.85, "grad_norm": 11.174973487854004, "learning_rate": 5.848451448260796e-07, "loss": 0.0343, "step": 899650 }, { "epoch": 8.85, "grad_norm": 0.6136824488639832, "learning_rate": 5.847210223718313e-07, "loss": 0.04, "step": 899675 }, { "epoch": 8.85, "grad_norm": 1.609055757522583, "learning_rate": 5.845968999175827e-07, "loss": 0.0426, "step": 899700 }, { "epoch": 8.85, "grad_norm": 1.8808341026306152, "learning_rate": 5.844727774633343e-07, "loss": 0.022, "step": 899725 }, { "epoch": 8.85, "grad_norm": 4.619924068450928, "learning_rate": 5.843486550090858e-07, "loss": 0.02, "step": 899750 }, { "epoch": 8.85, "grad_norm": 0.011862190440297127, "learning_rate": 5.842245325548374e-07, "loss": 0.0615, "step": 899775 }, { "epoch": 8.85, "grad_norm": 14.957171440124512, "learning_rate": 5.841004101005889e-07, "loss": 0.0229, "step": 899800 }, { "epoch": 8.85, "grad_norm": 8.374263763427734, "learning_rate": 5.839762876463405e-07, "loss": 0.0462, "step": 899825 }, { "epoch": 8.85, "grad_norm": 5.762059211730957, "learning_rate": 5.83852165192092e-07, "loss": 0.0258, "step": 899850 }, { "epoch": 8.85, "grad_norm": 0.09855471551418304, "learning_rate": 5.837280427378435e-07, "loss": 0.0278, "step": 899875 }, { "epoch": 8.85, "grad_norm": 7.619317531585693, "learning_rate": 5.83603920283595e-07, "loss": 0.023, "step": 899900 }, { "epoch": 8.85, "grad_norm": 0.09520618617534637, "learning_rate": 5.834797978293466e-07, "loss": 0.0179, "step": 899925 }, { "epoch": 8.85, "grad_norm": 9.029765129089355, "learning_rate": 5.833556753750981e-07, "loss": 0.025, "step": 899950 }, { "epoch": 8.85, "grad_norm": 0.034615591168403625, "learning_rate": 5.832315529208496e-07, "loss": 0.0344, "step": 899975 }, { "epoch": 8.85, "grad_norm": 3.177389621734619, "learning_rate": 5.831074304666012e-07, "loss": 0.0145, "step": 900000 }, { "epoch": 8.85, "eval_loss": 0.9097108840942383, "eval_runtime": 6073.1112, "eval_samples_per_second": 1.559, "eval_steps_per_second": 0.195, "eval_wer": 0.11034571557329556, "step": 900000 }, { "epoch": 8.85, "grad_norm": 5.8918890953063965, "learning_rate": 5.829833080123527e-07, "loss": 0.0315, "step": 900025 }, { "epoch": 8.85, "grad_norm": 13.845802307128906, "learning_rate": 5.828641504562741e-07, "loss": 0.0267, "step": 900050 }, { "epoch": 8.85, "grad_norm": 0.11801115423440933, "learning_rate": 5.827400280020258e-07, "loss": 0.0257, "step": 900075 }, { "epoch": 8.85, "grad_norm": 0.5757126808166504, "learning_rate": 5.826159055477772e-07, "loss": 0.0351, "step": 900100 }, { "epoch": 8.85, "grad_norm": 1.2734342813491821, "learning_rate": 5.824917830935288e-07, "loss": 0.0261, "step": 900125 }, { "epoch": 8.85, "grad_norm": 8.846025466918945, "learning_rate": 5.823676606392804e-07, "loss": 0.0208, "step": 900150 }, { "epoch": 8.85, "grad_norm": 0.23554202914237976, "learning_rate": 5.822435381850319e-07, "loss": 0.0251, "step": 900175 }, { "epoch": 8.85, "grad_norm": 0.6563794612884521, "learning_rate": 5.821194157307834e-07, "loss": 0.0343, "step": 900200 }, { "epoch": 8.85, "grad_norm": 0.3831743896007538, "learning_rate": 5.819952932765349e-07, "loss": 0.0244, "step": 900225 }, { "epoch": 8.85, "grad_norm": 11.79438304901123, "learning_rate": 5.818711708222865e-07, "loss": 0.0217, "step": 900250 }, { "epoch": 8.85, "grad_norm": 0.045573197305202484, "learning_rate": 5.81747048368038e-07, "loss": 0.0474, "step": 900275 }, { "epoch": 8.85, "grad_norm": 2.7891294956207275, "learning_rate": 5.816229259137895e-07, "loss": 0.0141, "step": 900300 }, { "epoch": 8.85, "grad_norm": 1.6712864637374878, "learning_rate": 5.814988034595412e-07, "loss": 0.0354, "step": 900325 }, { "epoch": 8.85, "grad_norm": 4.702117443084717, "learning_rate": 5.813746810052926e-07, "loss": 0.0403, "step": 900350 }, { "epoch": 8.85, "grad_norm": 1.2753688097000122, "learning_rate": 5.812505585510442e-07, "loss": 0.0291, "step": 900375 }, { "epoch": 8.85, "grad_norm": 0.3238235414028168, "learning_rate": 5.811264360967956e-07, "loss": 0.0246, "step": 900400 }, { "epoch": 8.85, "grad_norm": 0.022436005994677544, "learning_rate": 5.810023136425473e-07, "loss": 0.0274, "step": 900425 }, { "epoch": 8.85, "grad_norm": 15.319578170776367, "learning_rate": 5.808781911882988e-07, "loss": 0.0323, "step": 900450 }, { "epoch": 8.85, "grad_norm": 0.36343666911125183, "learning_rate": 5.807540687340503e-07, "loss": 0.0281, "step": 900475 }, { "epoch": 8.85, "grad_norm": 9.802200317382812, "learning_rate": 5.806299462798019e-07, "loss": 0.0248, "step": 900500 }, { "epoch": 8.85, "grad_norm": 0.9659993648529053, "learning_rate": 5.805058238255534e-07, "loss": 0.0463, "step": 900525 }, { "epoch": 8.85, "grad_norm": 9.953889846801758, "learning_rate": 5.803817013713049e-07, "loss": 0.0191, "step": 900550 }, { "epoch": 8.85, "grad_norm": 7.483116626739502, "learning_rate": 5.802575789170565e-07, "loss": 0.033, "step": 900575 }, { "epoch": 8.85, "grad_norm": 0.8375031352043152, "learning_rate": 5.80133456462808e-07, "loss": 0.0262, "step": 900600 }, { "epoch": 8.86, "grad_norm": 0.014469210058450699, "learning_rate": 5.800093340085595e-07, "loss": 0.0342, "step": 900625 }, { "epoch": 8.86, "grad_norm": 9.370222091674805, "learning_rate": 5.79885211554311e-07, "loss": 0.0174, "step": 900650 }, { "epoch": 8.86, "grad_norm": 0.38559383153915405, "learning_rate": 5.797610891000626e-07, "loss": 0.0265, "step": 900675 }, { "epoch": 8.86, "grad_norm": 16.388458251953125, "learning_rate": 5.796369666458142e-07, "loss": 0.0296, "step": 900700 }, { "epoch": 8.86, "grad_norm": 0.3039552569389343, "learning_rate": 5.795128441915656e-07, "loss": 0.0135, "step": 900725 }, { "epoch": 8.86, "grad_norm": 10.897216796875, "learning_rate": 5.793887217373173e-07, "loss": 0.0225, "step": 900750 }, { "epoch": 8.86, "grad_norm": 0.026856515556573868, "learning_rate": 5.792645992830687e-07, "loss": 0.0457, "step": 900775 }, { "epoch": 8.86, "grad_norm": 7.900813102722168, "learning_rate": 5.791404768288203e-07, "loss": 0.0336, "step": 900800 }, { "epoch": 8.86, "grad_norm": 0.4944436252117157, "learning_rate": 5.790163543745717e-07, "loss": 0.0344, "step": 900825 }, { "epoch": 8.86, "grad_norm": 1.4110256433486938, "learning_rate": 5.788922319203234e-07, "loss": 0.0234, "step": 900850 }, { "epoch": 8.86, "grad_norm": 0.7546735405921936, "learning_rate": 5.787681094660749e-07, "loss": 0.027, "step": 900875 }, { "epoch": 8.86, "grad_norm": 8.618221282958984, "learning_rate": 5.786439870118264e-07, "loss": 0.024, "step": 900900 }, { "epoch": 8.86, "grad_norm": 0.019575301557779312, "learning_rate": 5.78519864557578e-07, "loss": 0.0374, "step": 900925 }, { "epoch": 8.86, "grad_norm": 5.005484580993652, "learning_rate": 5.783957421033295e-07, "loss": 0.0246, "step": 900950 }, { "epoch": 8.86, "grad_norm": 2.9907283782958984, "learning_rate": 5.78271619649081e-07, "loss": 0.0345, "step": 900975 }, { "epoch": 8.86, "grad_norm": 6.363424301147461, "learning_rate": 5.781474971948326e-07, "loss": 0.0223, "step": 901000 }, { "epoch": 8.86, "grad_norm": 1.8494752645492554, "learning_rate": 5.780233747405841e-07, "loss": 0.0309, "step": 901025 }, { "epoch": 8.86, "grad_norm": 3.2345099449157715, "learning_rate": 5.778992522863357e-07, "loss": 0.0264, "step": 901050 }, { "epoch": 8.86, "grad_norm": 0.2399645298719406, "learning_rate": 5.777751298320871e-07, "loss": 0.0402, "step": 901075 }, { "epoch": 8.86, "grad_norm": 2.6822099685668945, "learning_rate": 5.776510073778388e-07, "loss": 0.0295, "step": 901100 }, { "epoch": 8.86, "grad_norm": 0.1567625254392624, "learning_rate": 5.775268849235903e-07, "loss": 0.0392, "step": 901125 }, { "epoch": 8.86, "grad_norm": 8.338006019592285, "learning_rate": 5.774027624693418e-07, "loss": 0.0363, "step": 901150 }, { "epoch": 8.86, "grad_norm": 0.19732804596424103, "learning_rate": 5.772786400150934e-07, "loss": 0.0657, "step": 901175 }, { "epoch": 8.86, "grad_norm": 16.99410057067871, "learning_rate": 5.771545175608449e-07, "loss": 0.0193, "step": 901200 }, { "epoch": 8.86, "grad_norm": 1.4924978017807007, "learning_rate": 5.770303951065964e-07, "loss": 0.0475, "step": 901225 }, { "epoch": 8.86, "grad_norm": 12.218024253845215, "learning_rate": 5.76906272652348e-07, "loss": 0.0204, "step": 901250 }, { "epoch": 8.86, "grad_norm": 0.03149544820189476, "learning_rate": 5.767821501980995e-07, "loss": 0.0329, "step": 901275 }, { "epoch": 8.86, "grad_norm": 7.94860315322876, "learning_rate": 5.76658027743851e-07, "loss": 0.0425, "step": 901300 }, { "epoch": 8.86, "grad_norm": 5.372663497924805, "learning_rate": 5.765339052896025e-07, "loss": 0.0387, "step": 901325 }, { "epoch": 8.86, "grad_norm": 13.491004943847656, "learning_rate": 5.764097828353541e-07, "loss": 0.0251, "step": 901350 }, { "epoch": 8.86, "grad_norm": 0.354149729013443, "learning_rate": 5.762856603811056e-07, "loss": 0.0468, "step": 901375 }, { "epoch": 8.86, "grad_norm": 10.423861503601074, "learning_rate": 5.761615379268571e-07, "loss": 0.0291, "step": 901400 }, { "epoch": 8.86, "grad_norm": 0.11053924262523651, "learning_rate": 5.760374154726088e-07, "loss": 0.0564, "step": 901425 }, { "epoch": 8.86, "grad_norm": 9.674795150756836, "learning_rate": 5.759132930183602e-07, "loss": 0.0291, "step": 901450 }, { "epoch": 8.86, "grad_norm": 0.05886337533593178, "learning_rate": 5.757891705641118e-07, "loss": 0.0259, "step": 901475 }, { "epoch": 8.86, "grad_norm": 3.1303229331970215, "learning_rate": 5.756650481098632e-07, "loss": 0.0335, "step": 901500 }, { "epoch": 8.86, "grad_norm": 2.3530850410461426, "learning_rate": 5.755409256556149e-07, "loss": 0.0242, "step": 901525 }, { "epoch": 8.86, "grad_norm": 5.9543137550354, "learning_rate": 5.754168032013664e-07, "loss": 0.0222, "step": 901550 }, { "epoch": 8.86, "grad_norm": 0.006637560203671455, "learning_rate": 5.752926807471179e-07, "loss": 0.029, "step": 901575 }, { "epoch": 8.86, "grad_norm": 0.4462318420410156, "learning_rate": 5.751685582928695e-07, "loss": 0.0184, "step": 901600 }, { "epoch": 8.86, "grad_norm": 0.04845678433775902, "learning_rate": 5.75044435838621e-07, "loss": 0.0294, "step": 901625 }, { "epoch": 8.87, "grad_norm": 15.947803497314453, "learning_rate": 5.749203133843725e-07, "loss": 0.0297, "step": 901650 }, { "epoch": 8.87, "grad_norm": 0.08842544257640839, "learning_rate": 5.747961909301241e-07, "loss": 0.0488, "step": 901675 }, { "epoch": 8.87, "grad_norm": 15.572508811950684, "learning_rate": 5.746720684758756e-07, "loss": 0.0317, "step": 901700 }, { "epoch": 8.87, "grad_norm": 16.08411979675293, "learning_rate": 5.745479460216272e-07, "loss": 0.0253, "step": 901725 }, { "epoch": 8.87, "grad_norm": 15.762537956237793, "learning_rate": 5.744238235673786e-07, "loss": 0.031, "step": 901750 }, { "epoch": 8.87, "grad_norm": 0.05862777307629585, "learning_rate": 5.742997011131303e-07, "loss": 0.0266, "step": 901775 }, { "epoch": 8.87, "grad_norm": 7.259613990783691, "learning_rate": 5.741755786588817e-07, "loss": 0.0279, "step": 901800 }, { "epoch": 8.87, "grad_norm": 0.020265920087695122, "learning_rate": 5.740514562046333e-07, "loss": 0.0162, "step": 901825 }, { "epoch": 8.87, "grad_norm": 0.9106729030609131, "learning_rate": 5.739273337503849e-07, "loss": 0.0242, "step": 901850 }, { "epoch": 8.87, "grad_norm": 2.338721513748169, "learning_rate": 5.738032112961364e-07, "loss": 0.0302, "step": 901875 }, { "epoch": 8.87, "grad_norm": 24.993656158447266, "learning_rate": 5.736790888418879e-07, "loss": 0.0401, "step": 901900 }, { "epoch": 8.87, "grad_norm": 0.052967093884944916, "learning_rate": 5.735549663876395e-07, "loss": 0.0367, "step": 901925 }, { "epoch": 8.87, "grad_norm": 11.636042594909668, "learning_rate": 5.73430843933391e-07, "loss": 0.0173, "step": 901950 }, { "epoch": 8.87, "grad_norm": 0.0398925319314003, "learning_rate": 5.733067214791425e-07, "loss": 0.0302, "step": 901975 }, { "epoch": 8.87, "grad_norm": 9.935285568237305, "learning_rate": 5.73182599024894e-07, "loss": 0.0283, "step": 902000 }, { "epoch": 8.87, "grad_norm": 0.057114992290735245, "learning_rate": 5.730584765706456e-07, "loss": 0.0375, "step": 902025 }, { "epoch": 8.87, "grad_norm": 8.988236427307129, "learning_rate": 5.729343541163971e-07, "loss": 0.0325, "step": 902050 }, { "epoch": 8.87, "grad_norm": 0.06738840043544769, "learning_rate": 5.728102316621486e-07, "loss": 0.0356, "step": 902075 }, { "epoch": 8.87, "grad_norm": 1.7262238264083862, "learning_rate": 5.726861092079002e-07, "loss": 0.0252, "step": 902100 }, { "epoch": 8.87, "grad_norm": 0.04540485888719559, "learning_rate": 5.725619867536517e-07, "loss": 0.0285, "step": 902125 }, { "epoch": 8.87, "grad_norm": 0.36112746596336365, "learning_rate": 5.724378642994033e-07, "loss": 0.0364, "step": 902150 }, { "epoch": 8.87, "grad_norm": 0.4846430718898773, "learning_rate": 5.723137418451547e-07, "loss": 0.032, "step": 902175 }, { "epoch": 8.87, "grad_norm": 18.38263511657715, "learning_rate": 5.721896193909064e-07, "loss": 0.0417, "step": 902200 }, { "epoch": 8.87, "grad_norm": 0.05388839542865753, "learning_rate": 5.720654969366578e-07, "loss": 0.0233, "step": 902225 }, { "epoch": 8.87, "grad_norm": 3.146648645401001, "learning_rate": 5.719413744824094e-07, "loss": 0.0252, "step": 902250 }, { "epoch": 8.87, "grad_norm": 0.11441440135240555, "learning_rate": 5.71817252028161e-07, "loss": 0.0289, "step": 902275 }, { "epoch": 8.87, "grad_norm": 17.77977180480957, "learning_rate": 5.716931295739125e-07, "loss": 0.0382, "step": 902300 }, { "epoch": 8.87, "grad_norm": 0.1503467708826065, "learning_rate": 5.715739720178339e-07, "loss": 0.0431, "step": 902325 }, { "epoch": 8.87, "grad_norm": 4.69342565536499, "learning_rate": 5.714498495635855e-07, "loss": 0.0394, "step": 902350 }, { "epoch": 8.87, "grad_norm": 0.4028363525867462, "learning_rate": 5.71325727109337e-07, "loss": 0.0319, "step": 902375 }, { "epoch": 8.87, "grad_norm": 9.440885543823242, "learning_rate": 5.712016046550885e-07, "loss": 0.0268, "step": 902400 }, { "epoch": 8.87, "grad_norm": 0.15313108265399933, "learning_rate": 5.710774822008401e-07, "loss": 0.0222, "step": 902425 }, { "epoch": 8.87, "grad_norm": 13.491217613220215, "learning_rate": 5.709533597465917e-07, "loss": 0.0178, "step": 902450 }, { "epoch": 8.87, "grad_norm": 0.022864332422614098, "learning_rate": 5.708292372923431e-07, "loss": 0.0276, "step": 902475 }, { "epoch": 8.87, "grad_norm": 0.7963042855262756, "learning_rate": 5.707051148380948e-07, "loss": 0.0192, "step": 902500 }, { "epoch": 8.87, "grad_norm": 6.281802654266357, "learning_rate": 5.705809923838463e-07, "loss": 0.0426, "step": 902525 }, { "epoch": 8.87, "grad_norm": 8.233269691467285, "learning_rate": 5.704568699295978e-07, "loss": 0.0261, "step": 902550 }, { "epoch": 8.87, "grad_norm": 1.8206689357757568, "learning_rate": 5.703327474753493e-07, "loss": 0.048, "step": 902575 }, { "epoch": 8.87, "grad_norm": 7.490492343902588, "learning_rate": 5.702086250211009e-07, "loss": 0.0281, "step": 902600 }, { "epoch": 8.87, "grad_norm": 0.7365522980690002, "learning_rate": 5.700845025668524e-07, "loss": 0.038, "step": 902625 }, { "epoch": 8.88, "grad_norm": 2.8480658531188965, "learning_rate": 5.699603801126039e-07, "loss": 0.0137, "step": 902650 }, { "epoch": 8.88, "grad_norm": 0.32351943850517273, "learning_rate": 5.698362576583555e-07, "loss": 0.0326, "step": 902675 }, { "epoch": 8.88, "grad_norm": 4.9615478515625, "learning_rate": 5.69712135204107e-07, "loss": 0.012, "step": 902700 }, { "epoch": 8.88, "grad_norm": 1.5275505781173706, "learning_rate": 5.695880127498585e-07, "loss": 0.031, "step": 902725 }, { "epoch": 8.88, "grad_norm": 2.7220349311828613, "learning_rate": 5.694638902956102e-07, "loss": 0.0216, "step": 902750 }, { "epoch": 8.88, "grad_norm": 0.1144578605890274, "learning_rate": 5.693397678413616e-07, "loss": 0.0395, "step": 902775 }, { "epoch": 8.88, "grad_norm": 6.788030624389648, "learning_rate": 5.692156453871132e-07, "loss": 0.0333, "step": 902800 }, { "epoch": 8.88, "grad_norm": 0.009860620833933353, "learning_rate": 5.690915229328646e-07, "loss": 0.039, "step": 902825 }, { "epoch": 8.88, "grad_norm": 10.046339988708496, "learning_rate": 5.689674004786163e-07, "loss": 0.0219, "step": 902850 }, { "epoch": 8.88, "grad_norm": 1.0211176872253418, "learning_rate": 5.688432780243678e-07, "loss": 0.0181, "step": 902875 }, { "epoch": 8.88, "grad_norm": 1.3578670024871826, "learning_rate": 5.687191555701193e-07, "loss": 0.0178, "step": 902900 }, { "epoch": 8.88, "grad_norm": 0.013352654874324799, "learning_rate": 5.685950331158709e-07, "loss": 0.0287, "step": 902925 }, { "epoch": 8.88, "grad_norm": 27.355051040649414, "learning_rate": 5.684709106616224e-07, "loss": 0.0184, "step": 902950 }, { "epoch": 8.88, "grad_norm": 3.655437469482422, "learning_rate": 5.683467882073739e-07, "loss": 0.0343, "step": 902975 }, { "epoch": 8.88, "grad_norm": 24.26475715637207, "learning_rate": 5.682226657531254e-07, "loss": 0.0426, "step": 903000 }, { "epoch": 8.88, "grad_norm": 0.019114140421152115, "learning_rate": 5.68098543298877e-07, "loss": 0.0258, "step": 903025 }, { "epoch": 8.88, "grad_norm": 0.7663190364837646, "learning_rate": 5.679744208446285e-07, "loss": 0.021, "step": 903050 }, { "epoch": 8.88, "grad_norm": 0.05853060260415077, "learning_rate": 5.6785029839038e-07, "loss": 0.0454, "step": 903075 }, { "epoch": 8.88, "grad_norm": 0.12578818202018738, "learning_rate": 5.677261759361316e-07, "loss": 0.0235, "step": 903100 }, { "epoch": 8.88, "grad_norm": 0.015168831683695316, "learning_rate": 5.676020534818831e-07, "loss": 0.0385, "step": 903125 }, { "epoch": 8.88, "grad_norm": 1.0499058961868286, "learning_rate": 5.674779310276346e-07, "loss": 0.0282, "step": 903150 }, { "epoch": 8.88, "grad_norm": 0.06464964896440506, "learning_rate": 5.673538085733863e-07, "loss": 0.0199, "step": 903175 }, { "epoch": 8.88, "grad_norm": 6.440171718597412, "learning_rate": 5.672296861191378e-07, "loss": 0.0243, "step": 903200 }, { "epoch": 8.88, "grad_norm": 0.8032906651496887, "learning_rate": 5.671055636648893e-07, "loss": 0.0345, "step": 903225 }, { "epoch": 8.88, "grad_norm": 0.3559263348579407, "learning_rate": 5.669814412106408e-07, "loss": 0.0242, "step": 903250 }, { "epoch": 8.88, "grad_norm": 0.14479027688503265, "learning_rate": 5.668573187563924e-07, "loss": 0.022, "step": 903275 }, { "epoch": 8.88, "grad_norm": 7.192817687988281, "learning_rate": 5.667331963021439e-07, "loss": 0.0251, "step": 903300 }, { "epoch": 8.88, "grad_norm": 0.12616178393363953, "learning_rate": 5.666090738478954e-07, "loss": 0.0334, "step": 903325 }, { "epoch": 8.88, "grad_norm": 11.99393367767334, "learning_rate": 5.66484951393647e-07, "loss": 0.0455, "step": 903350 }, { "epoch": 8.88, "grad_norm": 6.124199867248535, "learning_rate": 5.663608289393985e-07, "loss": 0.0356, "step": 903375 }, { "epoch": 8.88, "grad_norm": 12.40310001373291, "learning_rate": 5.6623670648515e-07, "loss": 0.0195, "step": 903400 }, { "epoch": 8.88, "grad_norm": 0.025256577879190445, "learning_rate": 5.661125840309015e-07, "loss": 0.044, "step": 903425 }, { "epoch": 8.88, "grad_norm": 5.6533589363098145, "learning_rate": 5.659884615766531e-07, "loss": 0.019, "step": 903450 }, { "epoch": 8.88, "grad_norm": 0.45380133390426636, "learning_rate": 5.658643391224047e-07, "loss": 0.0327, "step": 903475 }, { "epoch": 8.88, "grad_norm": 31.66545295715332, "learning_rate": 5.657402166681561e-07, "loss": 0.0421, "step": 903500 }, { "epoch": 8.88, "grad_norm": 0.08270201832056046, "learning_rate": 5.656160942139078e-07, "loss": 0.0331, "step": 903525 }, { "epoch": 8.88, "grad_norm": 15.235872268676758, "learning_rate": 5.654919717596592e-07, "loss": 0.0307, "step": 903550 }, { "epoch": 8.88, "grad_norm": 0.05024892836809158, "learning_rate": 5.653678493054108e-07, "loss": 0.025, "step": 903575 }, { "epoch": 8.88, "grad_norm": 10.517634391784668, "learning_rate": 5.652437268511624e-07, "loss": 0.0346, "step": 903600 }, { "epoch": 8.88, "grad_norm": 0.19138380885124207, "learning_rate": 5.651196043969139e-07, "loss": 0.0202, "step": 903625 }, { "epoch": 8.88, "grad_norm": 6.246630668640137, "learning_rate": 5.649954819426654e-07, "loss": 0.0311, "step": 903650 }, { "epoch": 8.89, "grad_norm": 0.044793058186769485, "learning_rate": 5.648713594884169e-07, "loss": 0.0371, "step": 903675 }, { "epoch": 8.89, "grad_norm": 15.11386489868164, "learning_rate": 5.647472370341685e-07, "loss": 0.0383, "step": 903700 }, { "epoch": 8.89, "grad_norm": 0.14612814784049988, "learning_rate": 5.6462311457992e-07, "loss": 0.03, "step": 903725 }, { "epoch": 8.89, "grad_norm": 9.340723037719727, "learning_rate": 5.644989921256715e-07, "loss": 0.021, "step": 903750 }, { "epoch": 8.89, "grad_norm": 0.24733294546604156, "learning_rate": 5.643748696714231e-07, "loss": 0.0251, "step": 903775 }, { "epoch": 8.89, "grad_norm": 15.065670013427734, "learning_rate": 5.642507472171746e-07, "loss": 0.0226, "step": 903800 }, { "epoch": 8.89, "grad_norm": 0.01007872261106968, "learning_rate": 5.641266247629261e-07, "loss": 0.0311, "step": 903825 }, { "epoch": 8.89, "grad_norm": 15.851527214050293, "learning_rate": 5.640025023086777e-07, "loss": 0.0383, "step": 903850 }, { "epoch": 8.89, "grad_norm": 0.19022205471992493, "learning_rate": 5.638783798544292e-07, "loss": 0.0438, "step": 903875 }, { "epoch": 8.89, "grad_norm": 9.801679611206055, "learning_rate": 5.637542574001808e-07, "loss": 0.0339, "step": 903900 }, { "epoch": 8.89, "grad_norm": 1.4656568765640259, "learning_rate": 5.636301349459323e-07, "loss": 0.0247, "step": 903925 }, { "epoch": 8.89, "grad_norm": 11.833551406860352, "learning_rate": 5.635060124916839e-07, "loss": 0.0384, "step": 903950 }, { "epoch": 8.89, "grad_norm": 29.919403076171875, "learning_rate": 5.633818900374354e-07, "loss": 0.0599, "step": 903975 }, { "epoch": 8.89, "grad_norm": 15.662704467773438, "learning_rate": 5.632577675831869e-07, "loss": 0.0261, "step": 904000 }, { "epoch": 8.89, "grad_norm": 1.5350966453552246, "learning_rate": 5.631336451289385e-07, "loss": 0.0443, "step": 904025 }, { "epoch": 8.89, "grad_norm": 14.51917552947998, "learning_rate": 5.6300952267469e-07, "loss": 0.0275, "step": 904050 }, { "epoch": 8.89, "grad_norm": 0.1042964830994606, "learning_rate": 5.628854002204415e-07, "loss": 0.0274, "step": 904075 }, { "epoch": 8.89, "grad_norm": 7.143418312072754, "learning_rate": 5.62761277766193e-07, "loss": 0.0247, "step": 904100 }, { "epoch": 8.89, "grad_norm": 1.3344560861587524, "learning_rate": 5.626371553119446e-07, "loss": 0.043, "step": 904125 }, { "epoch": 8.89, "grad_norm": 15.447481155395508, "learning_rate": 5.625130328576962e-07, "loss": 0.0288, "step": 904150 }, { "epoch": 8.89, "grad_norm": 0.04544548690319061, "learning_rate": 5.623889104034476e-07, "loss": 0.0406, "step": 904175 }, { "epoch": 8.89, "grad_norm": 0.3828478753566742, "learning_rate": 5.622647879491993e-07, "loss": 0.0399, "step": 904200 }, { "epoch": 8.89, "grad_norm": 0.9163019061088562, "learning_rate": 5.621406654949507e-07, "loss": 0.0315, "step": 904225 }, { "epoch": 8.89, "grad_norm": 13.369686126708984, "learning_rate": 5.620165430407023e-07, "loss": 0.0279, "step": 904250 }, { "epoch": 8.89, "grad_norm": 3.771613359451294, "learning_rate": 5.618924205864539e-07, "loss": 0.0302, "step": 904275 }, { "epoch": 8.89, "grad_norm": 11.855890274047852, "learning_rate": 5.617682981322054e-07, "loss": 0.0295, "step": 904300 }, { "epoch": 8.89, "grad_norm": 9.029797554016113, "learning_rate": 5.616441756779569e-07, "loss": 0.0473, "step": 904325 }, { "epoch": 8.89, "grad_norm": 10.91071891784668, "learning_rate": 5.615200532237084e-07, "loss": 0.0393, "step": 904350 }, { "epoch": 8.89, "grad_norm": 0.05706925317645073, "learning_rate": 5.614008956676299e-07, "loss": 0.0255, "step": 904375 }, { "epoch": 8.89, "grad_norm": 0.31998223066329956, "learning_rate": 5.612767732133814e-07, "loss": 0.0395, "step": 904400 }, { "epoch": 8.89, "grad_norm": 0.06381469964981079, "learning_rate": 5.611526507591329e-07, "loss": 0.0576, "step": 904425 }, { "epoch": 8.89, "grad_norm": 2.333566188812256, "learning_rate": 5.610285283048845e-07, "loss": 0.0297, "step": 904450 }, { "epoch": 8.89, "grad_norm": 0.2874661982059479, "learning_rate": 5.60904405850636e-07, "loss": 0.0239, "step": 904475 }, { "epoch": 8.89, "grad_norm": 1.024239420890808, "learning_rate": 5.607802833963876e-07, "loss": 0.0363, "step": 904500 }, { "epoch": 8.89, "grad_norm": 1.920070767402649, "learning_rate": 5.606561609421391e-07, "loss": 0.0381, "step": 904525 }, { "epoch": 8.89, "grad_norm": 2.4931116104125977, "learning_rate": 5.605320384878907e-07, "loss": 0.0374, "step": 904550 }, { "epoch": 8.89, "grad_norm": 4.282278060913086, "learning_rate": 5.604079160336421e-07, "loss": 0.0308, "step": 904575 }, { "epoch": 8.89, "grad_norm": 23.327577590942383, "learning_rate": 5.602837935793938e-07, "loss": 0.0252, "step": 904600 }, { "epoch": 8.89, "grad_norm": 0.5410047173500061, "learning_rate": 5.601596711251453e-07, "loss": 0.0416, "step": 904625 }, { "epoch": 8.89, "grad_norm": 7.957381725311279, "learning_rate": 5.600355486708968e-07, "loss": 0.0438, "step": 904650 }, { "epoch": 8.89, "grad_norm": 0.826742947101593, "learning_rate": 5.599114262166483e-07, "loss": 0.0369, "step": 904675 }, { "epoch": 8.9, "grad_norm": 6.294032573699951, "learning_rate": 5.597873037623999e-07, "loss": 0.0215, "step": 904700 }, { "epoch": 8.9, "grad_norm": 1.454184651374817, "learning_rate": 5.596631813081514e-07, "loss": 0.0283, "step": 904725 }, { "epoch": 8.9, "grad_norm": 1.5625429153442383, "learning_rate": 5.59539058853903e-07, "loss": 0.0255, "step": 904750 }, { "epoch": 8.9, "grad_norm": 0.9105526208877563, "learning_rate": 5.594149363996545e-07, "loss": 0.033, "step": 904775 }, { "epoch": 8.9, "grad_norm": 11.08834171295166, "learning_rate": 5.592908139454061e-07, "loss": 0.0225, "step": 904800 }, { "epoch": 8.9, "grad_norm": 1.3766005039215088, "learning_rate": 5.591666914911575e-07, "loss": 0.0259, "step": 904825 }, { "epoch": 8.9, "grad_norm": 12.0992431640625, "learning_rate": 5.590425690369092e-07, "loss": 0.0173, "step": 904850 }, { "epoch": 8.9, "grad_norm": 0.05199708417057991, "learning_rate": 5.589184465826606e-07, "loss": 0.0247, "step": 904875 }, { "epoch": 8.9, "grad_norm": 0.12992464005947113, "learning_rate": 5.587943241284122e-07, "loss": 0.0283, "step": 904900 }, { "epoch": 8.9, "grad_norm": 0.17492954432964325, "learning_rate": 5.586702016741637e-07, "loss": 0.022, "step": 904925 }, { "epoch": 8.9, "grad_norm": 11.982810974121094, "learning_rate": 5.585460792199153e-07, "loss": 0.0239, "step": 904950 }, { "epoch": 8.9, "grad_norm": 0.046803031116724014, "learning_rate": 5.584219567656668e-07, "loss": 0.0324, "step": 904975 }, { "epoch": 8.9, "grad_norm": 12.401798248291016, "learning_rate": 5.582978343114183e-07, "loss": 0.0254, "step": 905000 }, { "epoch": 8.9, "grad_norm": 3.2696268558502197, "learning_rate": 5.581737118571699e-07, "loss": 0.0301, "step": 905025 }, { "epoch": 8.9, "grad_norm": 15.919260025024414, "learning_rate": 5.580495894029214e-07, "loss": 0.0204, "step": 905050 }, { "epoch": 8.9, "grad_norm": 0.005204644054174423, "learning_rate": 5.579254669486729e-07, "loss": 0.0405, "step": 905075 }, { "epoch": 8.9, "grad_norm": 5.079540729522705, "learning_rate": 5.578013444944244e-07, "loss": 0.0171, "step": 905100 }, { "epoch": 8.9, "grad_norm": 0.03859758749604225, "learning_rate": 5.57677222040176e-07, "loss": 0.0247, "step": 905125 }, { "epoch": 8.9, "grad_norm": 6.324921131134033, "learning_rate": 5.575530995859275e-07, "loss": 0.0218, "step": 905150 }, { "epoch": 8.9, "grad_norm": 0.037397563457489014, "learning_rate": 5.574289771316791e-07, "loss": 0.0425, "step": 905175 }, { "epoch": 8.9, "grad_norm": 13.891924858093262, "learning_rate": 5.573048546774306e-07, "loss": 0.026, "step": 905200 }, { "epoch": 8.9, "grad_norm": 0.4991391897201538, "learning_rate": 5.571807322231822e-07, "loss": 0.0354, "step": 905225 }, { "epoch": 8.9, "grad_norm": 11.944905281066895, "learning_rate": 5.570566097689336e-07, "loss": 0.0171, "step": 905250 }, { "epoch": 8.9, "grad_norm": 0.1810847818851471, "learning_rate": 5.569324873146853e-07, "loss": 0.0108, "step": 905275 }, { "epoch": 8.9, "grad_norm": 0.05275358259677887, "learning_rate": 5.568083648604367e-07, "loss": 0.0295, "step": 905300 }, { "epoch": 8.9, "grad_norm": 0.02988417260348797, "learning_rate": 5.566842424061883e-07, "loss": 0.0363, "step": 905325 }, { "epoch": 8.9, "grad_norm": 12.019360542297363, "learning_rate": 5.565601199519398e-07, "loss": 0.0301, "step": 905350 }, { "epoch": 8.9, "grad_norm": 1.3265783786773682, "learning_rate": 5.564359974976914e-07, "loss": 0.0244, "step": 905375 }, { "epoch": 8.9, "grad_norm": 13.236251831054688, "learning_rate": 5.563118750434429e-07, "loss": 0.0301, "step": 905400 }, { "epoch": 8.9, "grad_norm": 0.06497681140899658, "learning_rate": 5.561877525891944e-07, "loss": 0.0491, "step": 905425 }, { "epoch": 8.9, "grad_norm": 11.128692626953125, "learning_rate": 5.56063630134946e-07, "loss": 0.0289, "step": 905450 }, { "epoch": 8.9, "grad_norm": 0.057264648377895355, "learning_rate": 5.559395076806976e-07, "loss": 0.0423, "step": 905475 }, { "epoch": 8.9, "grad_norm": 18.613374710083008, "learning_rate": 5.55815385226449e-07, "loss": 0.0276, "step": 905500 }, { "epoch": 8.9, "grad_norm": 0.21279478073120117, "learning_rate": 5.556912627722007e-07, "loss": 0.0406, "step": 905525 }, { "epoch": 8.9, "grad_norm": 14.03187370300293, "learning_rate": 5.555671403179521e-07, "loss": 0.0364, "step": 905550 }, { "epoch": 8.9, "grad_norm": 6.390647888183594, "learning_rate": 5.554430178637037e-07, "loss": 0.0297, "step": 905575 }, { "epoch": 8.9, "grad_norm": 19.50286293029785, "learning_rate": 5.553188954094552e-07, "loss": 0.0302, "step": 905600 }, { "epoch": 8.9, "grad_norm": 1.8749349117279053, "learning_rate": 5.551947729552068e-07, "loss": 0.0405, "step": 905625 }, { "epoch": 8.9, "grad_norm": 1.729366660118103, "learning_rate": 5.550706505009583e-07, "loss": 0.0431, "step": 905650 }, { "epoch": 8.9, "grad_norm": 4.480729579925537, "learning_rate": 5.549465280467098e-07, "loss": 0.0453, "step": 905675 }, { "epoch": 8.9, "grad_norm": 7.104419231414795, "learning_rate": 5.548224055924614e-07, "loss": 0.0288, "step": 905700 }, { "epoch": 8.91, "grad_norm": 3.104097366333008, "learning_rate": 5.546982831382129e-07, "loss": 0.0326, "step": 905725 }, { "epoch": 8.91, "grad_norm": 21.399377822875977, "learning_rate": 5.545741606839644e-07, "loss": 0.0283, "step": 905750 }, { "epoch": 8.91, "grad_norm": 0.48101404309272766, "learning_rate": 5.544500382297159e-07, "loss": 0.0341, "step": 905775 }, { "epoch": 8.91, "grad_norm": 7.4913010597229, "learning_rate": 5.543259157754675e-07, "loss": 0.0208, "step": 905800 }, { "epoch": 8.91, "grad_norm": 0.10531850159168243, "learning_rate": 5.54201793321219e-07, "loss": 0.0356, "step": 905825 }, { "epoch": 8.91, "grad_norm": 11.422346115112305, "learning_rate": 5.540776708669705e-07, "loss": 0.0439, "step": 905850 }, { "epoch": 8.91, "grad_norm": 0.042989615350961685, "learning_rate": 5.539535484127221e-07, "loss": 0.026, "step": 905875 }, { "epoch": 8.91, "grad_norm": 8.183691024780273, "learning_rate": 5.538294259584737e-07, "loss": 0.025, "step": 905900 }, { "epoch": 8.91, "grad_norm": 0.3361116349697113, "learning_rate": 5.537053035042251e-07, "loss": 0.0384, "step": 905925 }, { "epoch": 8.91, "grad_norm": 11.306451797485352, "learning_rate": 5.535811810499768e-07, "loss": 0.0313, "step": 905950 }, { "epoch": 8.91, "grad_norm": 0.0037919238675385714, "learning_rate": 5.534570585957282e-07, "loss": 0.0244, "step": 905975 }, { "epoch": 8.91, "grad_norm": 6.291296482086182, "learning_rate": 5.533329361414798e-07, "loss": 0.0339, "step": 906000 }, { "epoch": 8.91, "grad_norm": 0.07863509654998779, "learning_rate": 5.532088136872313e-07, "loss": 0.038, "step": 906025 }, { "epoch": 8.91, "grad_norm": 3.941296339035034, "learning_rate": 5.530846912329829e-07, "loss": 0.0227, "step": 906050 }, { "epoch": 8.91, "grad_norm": 2.717731237411499, "learning_rate": 5.529605687787344e-07, "loss": 0.0215, "step": 906075 }, { "epoch": 8.91, "grad_norm": 10.844634056091309, "learning_rate": 5.528364463244859e-07, "loss": 0.0225, "step": 906100 }, { "epoch": 8.91, "grad_norm": 0.18064583837985992, "learning_rate": 5.527123238702375e-07, "loss": 0.0347, "step": 906125 }, { "epoch": 8.91, "grad_norm": 0.2561708092689514, "learning_rate": 5.52588201415989e-07, "loss": 0.0324, "step": 906150 }, { "epoch": 8.91, "grad_norm": 0.7881852984428406, "learning_rate": 5.524640789617405e-07, "loss": 0.0353, "step": 906175 }, { "epoch": 8.91, "grad_norm": 10.428117752075195, "learning_rate": 5.523399565074921e-07, "loss": 0.0159, "step": 906200 }, { "epoch": 8.91, "grad_norm": 0.020893052220344543, "learning_rate": 5.522158340532436e-07, "loss": 0.0371, "step": 906225 }, { "epoch": 8.91, "grad_norm": 10.881829261779785, "learning_rate": 5.520917115989952e-07, "loss": 0.0234, "step": 906250 }, { "epoch": 8.91, "grad_norm": 8.868165969848633, "learning_rate": 5.519675891447466e-07, "loss": 0.0398, "step": 906275 }, { "epoch": 8.91, "grad_norm": 0.24714604020118713, "learning_rate": 5.518434666904983e-07, "loss": 0.0136, "step": 906300 }, { "epoch": 8.91, "grad_norm": 0.025574056431651115, "learning_rate": 5.517193442362498e-07, "loss": 0.0296, "step": 906325 }, { "epoch": 8.91, "grad_norm": 6.261375427246094, "learning_rate": 5.515952217820013e-07, "loss": 0.0294, "step": 906350 }, { "epoch": 8.91, "grad_norm": 0.31741422414779663, "learning_rate": 5.514710993277529e-07, "loss": 0.0277, "step": 906375 }, { "epoch": 8.91, "grad_norm": 13.042936325073242, "learning_rate": 5.513469768735044e-07, "loss": 0.016, "step": 906400 }, { "epoch": 8.91, "grad_norm": 9.184649467468262, "learning_rate": 5.512228544192559e-07, "loss": 0.0356, "step": 906425 }, { "epoch": 8.91, "grad_norm": 5.912083148956299, "learning_rate": 5.510987319650074e-07, "loss": 0.046, "step": 906450 }, { "epoch": 8.91, "grad_norm": 0.22559016942977905, "learning_rate": 5.50974609510759e-07, "loss": 0.0351, "step": 906475 }, { "epoch": 8.91, "grad_norm": 4.567338943481445, "learning_rate": 5.508504870565105e-07, "loss": 0.0282, "step": 906500 }, { "epoch": 8.91, "grad_norm": 7.333584785461426, "learning_rate": 5.50726364602262e-07, "loss": 0.0324, "step": 906525 }, { "epoch": 8.91, "grad_norm": 2.5096049308776855, "learning_rate": 5.506022421480136e-07, "loss": 0.0336, "step": 906550 }, { "epoch": 8.91, "grad_norm": 1.6503822803497314, "learning_rate": 5.504781196937651e-07, "loss": 0.0258, "step": 906575 }, { "epoch": 8.91, "grad_norm": 3.6641740798950195, "learning_rate": 5.503539972395166e-07, "loss": 0.0168, "step": 906600 }, { "epoch": 8.91, "grad_norm": 8.636068344116211, "learning_rate": 5.502298747852682e-07, "loss": 0.0313, "step": 906625 }, { "epoch": 8.91, "grad_norm": 11.989559173583984, "learning_rate": 5.501057523310197e-07, "loss": 0.0362, "step": 906650 }, { "epoch": 8.91, "grad_norm": 0.027534393593668938, "learning_rate": 5.499865947749412e-07, "loss": 0.0499, "step": 906675 }, { "epoch": 8.91, "grad_norm": 4.551797866821289, "learning_rate": 5.498624723206928e-07, "loss": 0.018, "step": 906700 }, { "epoch": 8.92, "grad_norm": 0.0319025032222271, "learning_rate": 5.497383498664443e-07, "loss": 0.0343, "step": 906725 }, { "epoch": 8.92, "grad_norm": 1.7319358587265015, "learning_rate": 5.496142274121958e-07, "loss": 0.0185, "step": 906750 }, { "epoch": 8.92, "grad_norm": 0.6331694722175598, "learning_rate": 5.494901049579473e-07, "loss": 0.0401, "step": 906775 }, { "epoch": 8.92, "grad_norm": 4.218278884887695, "learning_rate": 5.493659825036989e-07, "loss": 0.0132, "step": 906800 }, { "epoch": 8.92, "grad_norm": 0.20147119462490082, "learning_rate": 5.492418600494504e-07, "loss": 0.0299, "step": 906825 }, { "epoch": 8.92, "grad_norm": 13.33935260772705, "learning_rate": 5.491177375952019e-07, "loss": 0.0415, "step": 906850 }, { "epoch": 8.92, "grad_norm": 0.017925700172781944, "learning_rate": 5.489936151409535e-07, "loss": 0.0466, "step": 906875 }, { "epoch": 8.92, "grad_norm": 6.258260726928711, "learning_rate": 5.488694926867051e-07, "loss": 0.0115, "step": 906900 }, { "epoch": 8.92, "grad_norm": 5.854400157928467, "learning_rate": 5.487453702324566e-07, "loss": 0.0282, "step": 906925 }, { "epoch": 8.92, "grad_norm": 10.665094375610352, "learning_rate": 5.486212477782082e-07, "loss": 0.0323, "step": 906950 }, { "epoch": 8.92, "grad_norm": 9.570890426635742, "learning_rate": 5.484971253239597e-07, "loss": 0.0516, "step": 906975 }, { "epoch": 8.92, "grad_norm": 16.60346221923828, "learning_rate": 5.483730028697112e-07, "loss": 0.0391, "step": 907000 }, { "epoch": 8.92, "grad_norm": 0.06784713268280029, "learning_rate": 5.482488804154627e-07, "loss": 0.0276, "step": 907025 }, { "epoch": 8.92, "grad_norm": 12.629873275756836, "learning_rate": 5.481247579612143e-07, "loss": 0.0341, "step": 907050 }, { "epoch": 8.92, "grad_norm": 0.03785841166973114, "learning_rate": 5.480006355069658e-07, "loss": 0.0203, "step": 907075 }, { "epoch": 8.92, "grad_norm": 8.530325889587402, "learning_rate": 5.478765130527173e-07, "loss": 0.0215, "step": 907100 }, { "epoch": 8.92, "grad_norm": 0.743621826171875, "learning_rate": 5.477523905984689e-07, "loss": 0.0273, "step": 907125 }, { "epoch": 8.92, "grad_norm": 6.1596479415893555, "learning_rate": 5.476282681442204e-07, "loss": 0.0177, "step": 907150 }, { "epoch": 8.92, "grad_norm": 0.7444965243339539, "learning_rate": 5.475041456899719e-07, "loss": 0.0294, "step": 907175 }, { "epoch": 8.92, "grad_norm": 11.53023910522461, "learning_rate": 5.473800232357234e-07, "loss": 0.0408, "step": 907200 }, { "epoch": 8.92, "grad_norm": 4.3605170249938965, "learning_rate": 5.472559007814751e-07, "loss": 0.0222, "step": 907225 }, { "epoch": 8.92, "grad_norm": 18.22873878479004, "learning_rate": 5.471317783272265e-07, "loss": 0.0301, "step": 907250 }, { "epoch": 8.92, "grad_norm": 3.732057571411133, "learning_rate": 5.470076558729781e-07, "loss": 0.0302, "step": 907275 }, { "epoch": 8.92, "grad_norm": 1.765633463859558, "learning_rate": 5.468835334187296e-07, "loss": 0.0154, "step": 907300 }, { "epoch": 8.92, "grad_norm": 0.4859635829925537, "learning_rate": 5.467594109644812e-07, "loss": 0.0403, "step": 907325 }, { "epoch": 8.92, "grad_norm": 11.439559936523438, "learning_rate": 5.466352885102327e-07, "loss": 0.0288, "step": 907350 }, { "epoch": 8.92, "grad_norm": 0.03854489326477051, "learning_rate": 5.465111660559843e-07, "loss": 0.0274, "step": 907375 }, { "epoch": 8.92, "grad_norm": 4.3853678703308105, "learning_rate": 5.463870436017358e-07, "loss": 0.0208, "step": 907400 }, { "epoch": 8.92, "grad_norm": 0.044093791395425797, "learning_rate": 5.462629211474873e-07, "loss": 0.0243, "step": 907425 }, { "epoch": 8.92, "grad_norm": 9.832422256469727, "learning_rate": 5.461387986932388e-07, "loss": 0.025, "step": 907450 }, { "epoch": 8.92, "grad_norm": 2.392756223678589, "learning_rate": 5.460146762389904e-07, "loss": 0.0291, "step": 907475 }, { "epoch": 8.92, "grad_norm": 1.9956121444702148, "learning_rate": 5.458905537847419e-07, "loss": 0.0155, "step": 907500 }, { "epoch": 8.92, "grad_norm": 0.024482600390911102, "learning_rate": 5.457664313304934e-07, "loss": 0.0422, "step": 907525 }, { "epoch": 8.92, "grad_norm": 15.4697904586792, "learning_rate": 5.45642308876245e-07, "loss": 0.0296, "step": 907550 }, { "epoch": 8.92, "grad_norm": 0.49767106771469116, "learning_rate": 5.455181864219965e-07, "loss": 0.0227, "step": 907575 }, { "epoch": 8.92, "grad_norm": 4.756701946258545, "learning_rate": 5.45394063967748e-07, "loss": 0.0309, "step": 907600 }, { "epoch": 8.92, "grad_norm": 0.06292026489973068, "learning_rate": 5.452699415134997e-07, "loss": 0.0277, "step": 907625 }, { "epoch": 8.92, "grad_norm": 7.721576690673828, "learning_rate": 5.451458190592512e-07, "loss": 0.0327, "step": 907650 }, { "epoch": 8.92, "grad_norm": 0.010341450572013855, "learning_rate": 5.450216966050027e-07, "loss": 0.0401, "step": 907675 }, { "epoch": 8.92, "grad_norm": 11.856342315673828, "learning_rate": 5.448975741507542e-07, "loss": 0.0093, "step": 907700 }, { "epoch": 8.92, "grad_norm": 7.697281360626221, "learning_rate": 5.447734516965058e-07, "loss": 0.0426, "step": 907725 }, { "epoch": 8.93, "grad_norm": 1.853104829788208, "learning_rate": 5.446493292422573e-07, "loss": 0.0094, "step": 907750 }, { "epoch": 8.93, "grad_norm": 0.33629629015922546, "learning_rate": 5.445252067880088e-07, "loss": 0.0447, "step": 907775 }, { "epoch": 8.93, "grad_norm": 14.966174125671387, "learning_rate": 5.444010843337604e-07, "loss": 0.0242, "step": 907800 }, { "epoch": 8.93, "grad_norm": 0.007644537836313248, "learning_rate": 5.442769618795119e-07, "loss": 0.0341, "step": 907825 }, { "epoch": 8.93, "grad_norm": 6.280445098876953, "learning_rate": 5.441528394252634e-07, "loss": 0.0247, "step": 907850 }, { "epoch": 8.93, "grad_norm": 0.5562437772750854, "learning_rate": 5.440287169710149e-07, "loss": 0.0473, "step": 907875 }, { "epoch": 8.93, "grad_norm": 5.064219951629639, "learning_rate": 5.439045945167665e-07, "loss": 0.0153, "step": 907900 }, { "epoch": 8.93, "grad_norm": 0.0786309689283371, "learning_rate": 5.43780472062518e-07, "loss": 0.0508, "step": 907925 }, { "epoch": 8.93, "grad_norm": 5.272180557250977, "learning_rate": 5.436563496082696e-07, "loss": 0.0246, "step": 907950 }, { "epoch": 8.93, "grad_norm": 0.11426597833633423, "learning_rate": 5.435322271540211e-07, "loss": 0.0333, "step": 907975 }, { "epoch": 8.93, "grad_norm": 10.574079513549805, "learning_rate": 5.434081046997727e-07, "loss": 0.0241, "step": 908000 }, { "epoch": 8.93, "grad_norm": 0.02152268774807453, "learning_rate": 5.432839822455241e-07, "loss": 0.0356, "step": 908025 }, { "epoch": 8.93, "grad_norm": 12.827040672302246, "learning_rate": 5.431598597912758e-07, "loss": 0.0218, "step": 908050 }, { "epoch": 8.93, "grad_norm": 8.200298309326172, "learning_rate": 5.430357373370273e-07, "loss": 0.0325, "step": 908075 }, { "epoch": 8.93, "grad_norm": 8.825377464294434, "learning_rate": 5.429116148827788e-07, "loss": 0.0418, "step": 908100 }, { "epoch": 8.93, "grad_norm": 0.3160507082939148, "learning_rate": 5.427874924285303e-07, "loss": 0.0512, "step": 908125 }, { "epoch": 8.93, "grad_norm": 3.33937931060791, "learning_rate": 5.426633699742819e-07, "loss": 0.0212, "step": 908150 }, { "epoch": 8.93, "grad_norm": 0.05552854388952255, "learning_rate": 5.425392475200334e-07, "loss": 0.0236, "step": 908175 }, { "epoch": 8.93, "grad_norm": 7.978715896606445, "learning_rate": 5.424151250657849e-07, "loss": 0.0281, "step": 908200 }, { "epoch": 8.93, "grad_norm": 0.9282934665679932, "learning_rate": 5.422910026115365e-07, "loss": 0.0305, "step": 908225 }, { "epoch": 8.93, "grad_norm": 7.663494110107422, "learning_rate": 5.42166880157288e-07, "loss": 0.0294, "step": 908250 }, { "epoch": 8.93, "grad_norm": 0.03751017525792122, "learning_rate": 5.420427577030395e-07, "loss": 0.0312, "step": 908275 }, { "epoch": 8.93, "grad_norm": 3.766855239868164, "learning_rate": 5.419186352487911e-07, "loss": 0.0184, "step": 908300 }, { "epoch": 8.93, "grad_norm": 0.00699703861027956, "learning_rate": 5.417945127945427e-07, "loss": 0.0287, "step": 908325 }, { "epoch": 8.93, "grad_norm": 17.526166915893555, "learning_rate": 5.416703903402942e-07, "loss": 0.026, "step": 908350 }, { "epoch": 8.93, "grad_norm": 0.05180993676185608, "learning_rate": 5.415462678860457e-07, "loss": 0.0262, "step": 908375 }, { "epoch": 8.93, "grad_norm": 1.7432361841201782, "learning_rate": 5.414221454317973e-07, "loss": 0.0262, "step": 908400 }, { "epoch": 8.93, "grad_norm": 0.16595643758773804, "learning_rate": 5.412980229775488e-07, "loss": 0.0287, "step": 908425 }, { "epoch": 8.93, "grad_norm": 21.444900512695312, "learning_rate": 5.411739005233003e-07, "loss": 0.0275, "step": 908450 }, { "epoch": 8.93, "grad_norm": 0.9128702282905579, "learning_rate": 5.410497780690519e-07, "loss": 0.0336, "step": 908475 }, { "epoch": 8.93, "grad_norm": 19.30331802368164, "learning_rate": 5.409256556148034e-07, "loss": 0.0286, "step": 908500 }, { "epoch": 8.93, "grad_norm": 2.941547393798828, "learning_rate": 5.408015331605549e-07, "loss": 0.0511, "step": 908525 }, { "epoch": 8.93, "grad_norm": 10.070151329040527, "learning_rate": 5.406774107063064e-07, "loss": 0.0304, "step": 908550 }, { "epoch": 8.93, "grad_norm": 6.856816291809082, "learning_rate": 5.40553288252058e-07, "loss": 0.0295, "step": 908575 }, { "epoch": 8.93, "grad_norm": 2.4369423389434814, "learning_rate": 5.404291657978095e-07, "loss": 0.0307, "step": 908600 }, { "epoch": 8.93, "grad_norm": 0.2869569659233093, "learning_rate": 5.403050433435611e-07, "loss": 0.0299, "step": 908625 }, { "epoch": 8.93, "grad_norm": 1.1574666500091553, "learning_rate": 5.401809208893126e-07, "loss": 0.0235, "step": 908650 }, { "epoch": 8.93, "grad_norm": 0.020461298525333405, "learning_rate": 5.400567984350642e-07, "loss": 0.0317, "step": 908675 }, { "epoch": 8.93, "grad_norm": 5.21384334564209, "learning_rate": 5.399326759808156e-07, "loss": 0.0242, "step": 908700 }, { "epoch": 8.93, "grad_norm": 0.015244729816913605, "learning_rate": 5.398085535265672e-07, "loss": 0.0342, "step": 908725 }, { "epoch": 8.93, "grad_norm": 1.5356407165527344, "learning_rate": 5.396844310723188e-07, "loss": 0.0136, "step": 908750 }, { "epoch": 8.94, "grad_norm": 0.08515677601099014, "learning_rate": 5.395603086180703e-07, "loss": 0.0344, "step": 908775 }, { "epoch": 8.94, "grad_norm": 9.469529151916504, "learning_rate": 5.394361861638218e-07, "loss": 0.0289, "step": 908800 }, { "epoch": 8.94, "grad_norm": 1.8296705484390259, "learning_rate": 5.393120637095734e-07, "loss": 0.0287, "step": 908825 }, { "epoch": 8.94, "grad_norm": 1.1435546875, "learning_rate": 5.391879412553249e-07, "loss": 0.0345, "step": 908850 }, { "epoch": 8.94, "grad_norm": 0.006562379188835621, "learning_rate": 5.390638188010764e-07, "loss": 0.0228, "step": 908875 }, { "epoch": 8.94, "grad_norm": 0.8838368058204651, "learning_rate": 5.38939696346828e-07, "loss": 0.0282, "step": 908900 }, { "epoch": 8.94, "grad_norm": 0.02590576931834221, "learning_rate": 5.388155738925795e-07, "loss": 0.0229, "step": 908925 }, { "epoch": 8.94, "grad_norm": 18.84278678894043, "learning_rate": 5.38691451438331e-07, "loss": 0.0299, "step": 908950 }, { "epoch": 8.94, "grad_norm": 0.2789067029953003, "learning_rate": 5.385673289840826e-07, "loss": 0.0262, "step": 908975 }, { "epoch": 8.94, "grad_norm": 10.163534164428711, "learning_rate": 5.384432065298341e-07, "loss": 0.0279, "step": 909000 }, { "epoch": 8.94, "grad_norm": 0.28633105754852295, "learning_rate": 5.383190840755857e-07, "loss": 0.0243, "step": 909025 }, { "epoch": 8.94, "grad_norm": 1.4743094444274902, "learning_rate": 5.38199926519507e-07, "loss": 0.02, "step": 909050 }, { "epoch": 8.94, "grad_norm": 0.10560398548841476, "learning_rate": 5.380758040652587e-07, "loss": 0.0304, "step": 909075 }, { "epoch": 8.94, "grad_norm": 9.285579681396484, "learning_rate": 5.379516816110102e-07, "loss": 0.0298, "step": 909100 }, { "epoch": 8.94, "grad_norm": 1.6018904447555542, "learning_rate": 5.378275591567617e-07, "loss": 0.0381, "step": 909125 }, { "epoch": 8.94, "grad_norm": 8.162025451660156, "learning_rate": 5.377034367025133e-07, "loss": 0.0371, "step": 909150 }, { "epoch": 8.94, "grad_norm": 1.5397990942001343, "learning_rate": 5.375793142482648e-07, "loss": 0.0303, "step": 909175 }, { "epoch": 8.94, "grad_norm": 8.47806453704834, "learning_rate": 5.374551917940163e-07, "loss": 0.0403, "step": 909200 }, { "epoch": 8.94, "grad_norm": 4.287384986877441, "learning_rate": 5.37331069339768e-07, "loss": 0.0268, "step": 909225 }, { "epoch": 8.94, "grad_norm": 12.44381332397461, "learning_rate": 5.372069468855194e-07, "loss": 0.0315, "step": 909250 }, { "epoch": 8.94, "grad_norm": 0.017959339544177055, "learning_rate": 5.37082824431271e-07, "loss": 0.0361, "step": 909275 }, { "epoch": 8.94, "grad_norm": 6.6539306640625, "learning_rate": 5.369587019770224e-07, "loss": 0.0178, "step": 909300 }, { "epoch": 8.94, "grad_norm": 0.12313195317983627, "learning_rate": 5.368345795227741e-07, "loss": 0.0259, "step": 909325 }, { "epoch": 8.94, "grad_norm": 7.796919822692871, "learning_rate": 5.367104570685255e-07, "loss": 0.0328, "step": 909350 }, { "epoch": 8.94, "grad_norm": 0.7983201146125793, "learning_rate": 5.365863346142771e-07, "loss": 0.0431, "step": 909375 }, { "epoch": 8.94, "grad_norm": 14.241094589233398, "learning_rate": 5.364622121600287e-07, "loss": 0.0162, "step": 909400 }, { "epoch": 8.94, "grad_norm": 0.02789626084268093, "learning_rate": 5.363380897057802e-07, "loss": 0.0235, "step": 909425 }, { "epoch": 8.94, "grad_norm": 11.96815013885498, "learning_rate": 5.362139672515317e-07, "loss": 0.0252, "step": 909450 }, { "epoch": 8.94, "grad_norm": 0.026827719062566757, "learning_rate": 5.360898447972833e-07, "loss": 0.0362, "step": 909475 }, { "epoch": 8.94, "grad_norm": 10.002593040466309, "learning_rate": 5.359657223430348e-07, "loss": 0.0275, "step": 909500 }, { "epoch": 8.94, "grad_norm": 0.08064978569746017, "learning_rate": 5.358415998887863e-07, "loss": 0.0199, "step": 909525 }, { "epoch": 8.94, "grad_norm": 11.434704780578613, "learning_rate": 5.357174774345378e-07, "loss": 0.0201, "step": 909550 }, { "epoch": 8.94, "grad_norm": 1.8115370273590088, "learning_rate": 5.355933549802894e-07, "loss": 0.0241, "step": 909575 }, { "epoch": 8.94, "grad_norm": 3.4239442348480225, "learning_rate": 5.354692325260409e-07, "loss": 0.0316, "step": 909600 }, { "epoch": 8.94, "grad_norm": 0.19600428640842438, "learning_rate": 5.353451100717924e-07, "loss": 0.0449, "step": 909625 }, { "epoch": 8.94, "grad_norm": 1.269769310951233, "learning_rate": 5.352209876175441e-07, "loss": 0.026, "step": 909650 }, { "epoch": 8.94, "grad_norm": 1.8589259386062622, "learning_rate": 5.350968651632955e-07, "loss": 0.0351, "step": 909675 }, { "epoch": 8.94, "grad_norm": 5.563472747802734, "learning_rate": 5.349727427090471e-07, "loss": 0.0116, "step": 909700 }, { "epoch": 8.94, "grad_norm": 0.013735652901232243, "learning_rate": 5.348486202547985e-07, "loss": 0.0253, "step": 909725 }, { "epoch": 8.94, "grad_norm": 15.776728630065918, "learning_rate": 5.347244978005502e-07, "loss": 0.0196, "step": 909750 }, { "epoch": 8.95, "grad_norm": 0.34313416481018066, "learning_rate": 5.346003753463017e-07, "loss": 0.0265, "step": 909775 }, { "epoch": 8.95, "grad_norm": 20.275785446166992, "learning_rate": 5.344762528920532e-07, "loss": 0.0432, "step": 909800 }, { "epoch": 8.95, "grad_norm": 1.5773271322250366, "learning_rate": 5.343521304378048e-07, "loss": 0.0336, "step": 909825 }, { "epoch": 8.95, "grad_norm": 6.381083965301514, "learning_rate": 5.342280079835563e-07, "loss": 0.0358, "step": 909850 }, { "epoch": 8.95, "grad_norm": 1.3492186069488525, "learning_rate": 5.341038855293078e-07, "loss": 0.0314, "step": 909875 }, { "epoch": 8.95, "grad_norm": 7.290805339813232, "learning_rate": 5.339797630750594e-07, "loss": 0.0274, "step": 909900 }, { "epoch": 8.95, "grad_norm": 5.287987232208252, "learning_rate": 5.338556406208109e-07, "loss": 0.0407, "step": 909925 }, { "epoch": 8.95, "grad_norm": 15.118098258972168, "learning_rate": 5.337315181665625e-07, "loss": 0.0321, "step": 909950 }, { "epoch": 8.95, "grad_norm": 1.3547592163085938, "learning_rate": 5.336073957123139e-07, "loss": 0.0282, "step": 909975 }, { "epoch": 8.95, "grad_norm": 3.957721471786499, "learning_rate": 5.334832732580656e-07, "loss": 0.0271, "step": 910000 }, { "epoch": 8.95, "grad_norm": 0.037084951996803284, "learning_rate": 5.33359150803817e-07, "loss": 0.0238, "step": 910025 }, { "epoch": 8.95, "grad_norm": 7.933117866516113, "learning_rate": 5.332350283495686e-07, "loss": 0.0366, "step": 910050 }, { "epoch": 8.95, "grad_norm": 0.013040178455412388, "learning_rate": 5.331109058953202e-07, "loss": 0.0341, "step": 910075 }, { "epoch": 8.95, "grad_norm": 9.157965660095215, "learning_rate": 5.329867834410717e-07, "loss": 0.0213, "step": 910100 }, { "epoch": 8.95, "grad_norm": 0.012084100395441055, "learning_rate": 5.328626609868232e-07, "loss": 0.0335, "step": 910125 }, { "epoch": 8.95, "grad_norm": 16.766401290893555, "learning_rate": 5.327385385325748e-07, "loss": 0.0275, "step": 910150 }, { "epoch": 8.95, "grad_norm": 2.0244626998901367, "learning_rate": 5.326144160783263e-07, "loss": 0.0386, "step": 910175 }, { "epoch": 8.95, "grad_norm": 14.55936336517334, "learning_rate": 5.324902936240778e-07, "loss": 0.0202, "step": 910200 }, { "epoch": 8.95, "grad_norm": 4.882365703582764, "learning_rate": 5.323661711698293e-07, "loss": 0.0248, "step": 910225 }, { "epoch": 8.95, "grad_norm": 4.728213787078857, "learning_rate": 5.322420487155809e-07, "loss": 0.0225, "step": 910250 }, { "epoch": 8.95, "grad_norm": 0.23573951423168182, "learning_rate": 5.321179262613324e-07, "loss": 0.0224, "step": 910275 }, { "epoch": 8.95, "grad_norm": 10.76523494720459, "learning_rate": 5.319938038070839e-07, "loss": 0.0229, "step": 910300 }, { "epoch": 8.95, "grad_norm": 0.009844725951552391, "learning_rate": 5.318696813528355e-07, "loss": 0.0374, "step": 910325 }, { "epoch": 8.95, "grad_norm": 0.9916777610778809, "learning_rate": 5.31745558898587e-07, "loss": 0.0225, "step": 910350 }, { "epoch": 8.95, "grad_norm": 1.9777568578720093, "learning_rate": 5.316214364443386e-07, "loss": 0.0278, "step": 910375 }, { "epoch": 8.95, "grad_norm": 13.746742248535156, "learning_rate": 5.3149731399009e-07, "loss": 0.0305, "step": 910400 }, { "epoch": 8.95, "grad_norm": 0.29880350828170776, "learning_rate": 5.313731915358417e-07, "loss": 0.0297, "step": 910425 }, { "epoch": 8.95, "grad_norm": 10.114012718200684, "learning_rate": 5.312490690815931e-07, "loss": 0.0358, "step": 910450 }, { "epoch": 8.95, "grad_norm": 1.5110400915145874, "learning_rate": 5.311249466273447e-07, "loss": 0.0306, "step": 910475 }, { "epoch": 8.95, "grad_norm": 7.117454528808594, "learning_rate": 5.310008241730963e-07, "loss": 0.029, "step": 910500 }, { "epoch": 8.95, "grad_norm": 0.5851754546165466, "learning_rate": 5.308767017188478e-07, "loss": 0.0339, "step": 910525 }, { "epoch": 8.95, "grad_norm": 5.688192367553711, "learning_rate": 5.307525792645993e-07, "loss": 0.0213, "step": 910550 }, { "epoch": 8.95, "grad_norm": 0.14883479475975037, "learning_rate": 5.306284568103509e-07, "loss": 0.0292, "step": 910575 }, { "epoch": 8.95, "grad_norm": 15.653107643127441, "learning_rate": 5.305043343561024e-07, "loss": 0.0289, "step": 910600 }, { "epoch": 8.95, "grad_norm": 0.004634608514606953, "learning_rate": 5.303802119018539e-07, "loss": 0.0251, "step": 910625 }, { "epoch": 8.95, "grad_norm": 7.585204601287842, "learning_rate": 5.302560894476054e-07, "loss": 0.0353, "step": 910650 }, { "epoch": 8.95, "grad_norm": 0.009978381916880608, "learning_rate": 5.301319669933571e-07, "loss": 0.0258, "step": 910675 }, { "epoch": 8.95, "grad_norm": 0.5751104950904846, "learning_rate": 5.300078445391085e-07, "loss": 0.0265, "step": 910700 }, { "epoch": 8.95, "grad_norm": 2.087801218032837, "learning_rate": 5.298837220848601e-07, "loss": 0.044, "step": 910725 }, { "epoch": 8.95, "grad_norm": 4.27456521987915, "learning_rate": 5.297595996306116e-07, "loss": 0.0209, "step": 910750 }, { "epoch": 8.95, "grad_norm": 1.9217286109924316, "learning_rate": 5.296354771763632e-07, "loss": 0.0344, "step": 910775 }, { "epoch": 8.96, "grad_norm": 0.2668741047382355, "learning_rate": 5.295113547221147e-07, "loss": 0.0192, "step": 910800 }, { "epoch": 8.96, "grad_norm": 1.3910789489746094, "learning_rate": 5.293872322678663e-07, "loss": 0.037, "step": 910825 }, { "epoch": 8.96, "grad_norm": 0.5492849349975586, "learning_rate": 5.292631098136178e-07, "loss": 0.0225, "step": 910850 }, { "epoch": 8.96, "grad_norm": 0.09367325901985168, "learning_rate": 5.291389873593693e-07, "loss": 0.0277, "step": 910875 }, { "epoch": 8.96, "grad_norm": 10.250786781311035, "learning_rate": 5.290148649051208e-07, "loss": 0.0356, "step": 910900 }, { "epoch": 8.96, "grad_norm": 1.376188039779663, "learning_rate": 5.288907424508724e-07, "loss": 0.032, "step": 910925 }, { "epoch": 8.96, "grad_norm": 2.4137351512908936, "learning_rate": 5.287666199966239e-07, "loss": 0.0165, "step": 910950 }, { "epoch": 8.96, "grad_norm": 6.141378879547119, "learning_rate": 5.286424975423754e-07, "loss": 0.0265, "step": 910975 }, { "epoch": 8.96, "grad_norm": 10.517450332641602, "learning_rate": 5.28518375088127e-07, "loss": 0.0214, "step": 911000 }, { "epoch": 8.96, "grad_norm": 0.06618206202983856, "learning_rate": 5.283942526338785e-07, "loss": 0.0404, "step": 911025 }, { "epoch": 8.96, "grad_norm": 3.59664249420166, "learning_rate": 5.2827013017963e-07, "loss": 0.0232, "step": 911050 }, { "epoch": 8.96, "grad_norm": 4.260836601257324, "learning_rate": 5.281460077253815e-07, "loss": 0.0153, "step": 911075 }, { "epoch": 8.96, "grad_norm": 0.41081884503364563, "learning_rate": 5.280218852711332e-07, "loss": 0.0135, "step": 911100 }, { "epoch": 8.96, "grad_norm": 0.11758103966712952, "learning_rate": 5.278977628168846e-07, "loss": 0.0498, "step": 911125 }, { "epoch": 8.96, "grad_norm": 3.2487518787384033, "learning_rate": 5.277736403626362e-07, "loss": 0.0142, "step": 911150 }, { "epoch": 8.96, "grad_norm": 0.05195101723074913, "learning_rate": 5.276495179083878e-07, "loss": 0.0315, "step": 911175 }, { "epoch": 8.96, "grad_norm": 2.5488622188568115, "learning_rate": 5.275253954541393e-07, "loss": 0.0303, "step": 911200 }, { "epoch": 8.96, "grad_norm": 0.1859940141439438, "learning_rate": 5.274012729998908e-07, "loss": 0.0289, "step": 911225 }, { "epoch": 8.96, "grad_norm": 13.576987266540527, "learning_rate": 5.272771505456424e-07, "loss": 0.0323, "step": 911250 }, { "epoch": 8.96, "grad_norm": 0.5987634062767029, "learning_rate": 5.271530280913939e-07, "loss": 0.0246, "step": 911275 }, { "epoch": 8.96, "grad_norm": 6.448889255523682, "learning_rate": 5.270289056371454e-07, "loss": 0.0222, "step": 911300 }, { "epoch": 8.96, "grad_norm": 0.046092163771390915, "learning_rate": 5.26909748081067e-07, "loss": 0.033, "step": 911325 }, { "epoch": 8.96, "grad_norm": 10.247666358947754, "learning_rate": 5.267856256268184e-07, "loss": 0.0246, "step": 911350 }, { "epoch": 8.96, "grad_norm": 0.004307620692998171, "learning_rate": 5.2666150317257e-07, "loss": 0.0358, "step": 911375 }, { "epoch": 8.96, "grad_norm": 10.560384750366211, "learning_rate": 5.265373807183216e-07, "loss": 0.0246, "step": 911400 }, { "epoch": 8.96, "grad_norm": 1.0848463773727417, "learning_rate": 5.264132582640731e-07, "loss": 0.0193, "step": 911425 }, { "epoch": 8.96, "grad_norm": 3.1418111324310303, "learning_rate": 5.262891358098246e-07, "loss": 0.0251, "step": 911450 }, { "epoch": 8.96, "grad_norm": 0.057230692356824875, "learning_rate": 5.261650133555761e-07, "loss": 0.0397, "step": 911475 }, { "epoch": 8.96, "grad_norm": 16.297042846679688, "learning_rate": 5.260408909013277e-07, "loss": 0.0368, "step": 911500 }, { "epoch": 8.96, "grad_norm": 2.297893762588501, "learning_rate": 5.259167684470792e-07, "loss": 0.0205, "step": 911525 }, { "epoch": 8.96, "grad_norm": 19.970102310180664, "learning_rate": 5.257926459928307e-07, "loss": 0.0401, "step": 911550 }, { "epoch": 8.96, "grad_norm": 0.09254639595746994, "learning_rate": 5.256685235385823e-07, "loss": 0.0202, "step": 911575 }, { "epoch": 8.96, "grad_norm": 19.43694496154785, "learning_rate": 5.255444010843338e-07, "loss": 0.0322, "step": 911600 }, { "epoch": 8.96, "grad_norm": 0.16734446585178375, "learning_rate": 5.254202786300853e-07, "loss": 0.0365, "step": 911625 }, { "epoch": 8.96, "grad_norm": 1.5974798202514648, "learning_rate": 5.252961561758368e-07, "loss": 0.0168, "step": 911650 }, { "epoch": 8.96, "grad_norm": 0.005886830855160952, "learning_rate": 5.251720337215884e-07, "loss": 0.0327, "step": 911675 }, { "epoch": 8.96, "grad_norm": 19.228435516357422, "learning_rate": 5.2504791126734e-07, "loss": 0.0234, "step": 911700 }, { "epoch": 8.96, "grad_norm": 0.2768053114414215, "learning_rate": 5.249237888130914e-07, "loss": 0.0387, "step": 911725 }, { "epoch": 8.96, "grad_norm": 6.107675075531006, "learning_rate": 5.247996663588431e-07, "loss": 0.0206, "step": 911750 }, { "epoch": 8.96, "grad_norm": 0.040713999420404434, "learning_rate": 5.246755439045945e-07, "loss": 0.0245, "step": 911775 }, { "epoch": 8.96, "grad_norm": 3.94625186920166, "learning_rate": 5.245514214503461e-07, "loss": 0.024, "step": 911800 }, { "epoch": 8.97, "grad_norm": 0.0631839856505394, "learning_rate": 5.244272989960977e-07, "loss": 0.0279, "step": 911825 }, { "epoch": 8.97, "grad_norm": 24.681461334228516, "learning_rate": 5.243031765418492e-07, "loss": 0.0372, "step": 911850 }, { "epoch": 8.97, "grad_norm": 0.36461472511291504, "learning_rate": 5.241790540876007e-07, "loss": 0.0347, "step": 911875 }, { "epoch": 8.97, "grad_norm": 13.774343490600586, "learning_rate": 5.240549316333522e-07, "loss": 0.0149, "step": 911900 }, { "epoch": 8.97, "grad_norm": 0.059966426342725754, "learning_rate": 5.239308091791038e-07, "loss": 0.0368, "step": 911925 }, { "epoch": 8.97, "grad_norm": 13.59630012512207, "learning_rate": 5.238066867248553e-07, "loss": 0.0157, "step": 911950 }, { "epoch": 8.97, "grad_norm": 0.0034511538688093424, "learning_rate": 5.236825642706068e-07, "loss": 0.0265, "step": 911975 }, { "epoch": 8.97, "grad_norm": 16.642105102539062, "learning_rate": 5.235584418163585e-07, "loss": 0.0416, "step": 912000 }, { "epoch": 8.97, "grad_norm": 4.41714334487915, "learning_rate": 5.234343193621099e-07, "loss": 0.0519, "step": 912025 }, { "epoch": 8.97, "grad_norm": 1.5407005548477173, "learning_rate": 5.233101969078615e-07, "loss": 0.0265, "step": 912050 }, { "epoch": 8.97, "grad_norm": 1.6903440952301025, "learning_rate": 5.231860744536129e-07, "loss": 0.0305, "step": 912075 }, { "epoch": 8.97, "grad_norm": 10.208344459533691, "learning_rate": 5.230619519993646e-07, "loss": 0.0221, "step": 912100 }, { "epoch": 8.97, "grad_norm": 0.04874136298894882, "learning_rate": 5.229378295451161e-07, "loss": 0.0378, "step": 912125 }, { "epoch": 8.97, "grad_norm": 16.58241081237793, "learning_rate": 5.228137070908676e-07, "loss": 0.0169, "step": 912150 }, { "epoch": 8.97, "grad_norm": 0.05238473042845726, "learning_rate": 5.226895846366192e-07, "loss": 0.0396, "step": 912175 }, { "epoch": 8.97, "grad_norm": 5.587653160095215, "learning_rate": 5.225654621823707e-07, "loss": 0.0155, "step": 912200 }, { "epoch": 8.97, "grad_norm": 0.7437353730201721, "learning_rate": 5.224413397281222e-07, "loss": 0.0347, "step": 912225 }, { "epoch": 8.97, "grad_norm": 11.858413696289062, "learning_rate": 5.223172172738738e-07, "loss": 0.0231, "step": 912250 }, { "epoch": 8.97, "grad_norm": 3.065504312515259, "learning_rate": 5.221930948196253e-07, "loss": 0.0254, "step": 912275 }, { "epoch": 8.97, "grad_norm": 4.872738838195801, "learning_rate": 5.220689723653768e-07, "loss": 0.0252, "step": 912300 }, { "epoch": 8.97, "grad_norm": 0.005984611809253693, "learning_rate": 5.219448499111283e-07, "loss": 0.0279, "step": 912325 }, { "epoch": 8.97, "grad_norm": 2.6435492038726807, "learning_rate": 5.218207274568799e-07, "loss": 0.0245, "step": 912350 }, { "epoch": 8.97, "grad_norm": 1.8791836500167847, "learning_rate": 5.216966050026315e-07, "loss": 0.0272, "step": 912375 }, { "epoch": 8.97, "grad_norm": 6.542028427124023, "learning_rate": 5.215724825483829e-07, "loss": 0.0245, "step": 912400 }, { "epoch": 8.97, "grad_norm": 0.33170798420906067, "learning_rate": 5.214483600941346e-07, "loss": 0.0292, "step": 912425 }, { "epoch": 8.97, "grad_norm": 0.8926709890365601, "learning_rate": 5.21324237639886e-07, "loss": 0.0119, "step": 912450 }, { "epoch": 8.97, "grad_norm": 0.035729143768548965, "learning_rate": 5.212001151856376e-07, "loss": 0.0238, "step": 912475 }, { "epoch": 8.97, "grad_norm": 2.8621366024017334, "learning_rate": 5.21075992731389e-07, "loss": 0.0333, "step": 912500 }, { "epoch": 8.97, "grad_norm": 1.044508695602417, "learning_rate": 5.209518702771407e-07, "loss": 0.0298, "step": 912525 }, { "epoch": 8.97, "grad_norm": 5.378435134887695, "learning_rate": 5.208277478228922e-07, "loss": 0.0243, "step": 912550 }, { "epoch": 8.97, "grad_norm": 0.02657320164144039, "learning_rate": 5.207036253686437e-07, "loss": 0.0327, "step": 912575 }, { "epoch": 8.97, "grad_norm": 10.841507911682129, "learning_rate": 5.205795029143953e-07, "loss": 0.0143, "step": 912600 }, { "epoch": 8.97, "grad_norm": 0.010257575660943985, "learning_rate": 5.204553804601468e-07, "loss": 0.0366, "step": 912625 }, { "epoch": 8.97, "grad_norm": 2.7110676765441895, "learning_rate": 5.203312580058983e-07, "loss": 0.0119, "step": 912650 }, { "epoch": 8.97, "grad_norm": 0.427466481924057, "learning_rate": 5.2020713555165e-07, "loss": 0.0336, "step": 912675 }, { "epoch": 8.97, "grad_norm": 11.25735855102539, "learning_rate": 5.200830130974014e-07, "loss": 0.0357, "step": 912700 }, { "epoch": 8.97, "grad_norm": 0.27924197912216187, "learning_rate": 5.19958890643153e-07, "loss": 0.0197, "step": 912725 }, { "epoch": 8.97, "grad_norm": 13.488313674926758, "learning_rate": 5.198347681889044e-07, "loss": 0.0276, "step": 912750 }, { "epoch": 8.97, "grad_norm": 0.08499708026647568, "learning_rate": 5.197106457346561e-07, "loss": 0.0234, "step": 912775 }, { "epoch": 8.97, "grad_norm": 0.6756089329719543, "learning_rate": 5.195865232804076e-07, "loss": 0.0356, "step": 912800 }, { "epoch": 8.98, "grad_norm": 0.01963357999920845, "learning_rate": 5.194624008261591e-07, "loss": 0.026, "step": 912825 }, { "epoch": 8.98, "grad_norm": 12.08757495880127, "learning_rate": 5.193382783719107e-07, "loss": 0.0291, "step": 912850 }, { "epoch": 8.98, "grad_norm": 0.19665582478046417, "learning_rate": 5.192141559176622e-07, "loss": 0.0258, "step": 912875 }, { "epoch": 8.98, "grad_norm": 11.774094581604004, "learning_rate": 5.190900334634137e-07, "loss": 0.0363, "step": 912900 }, { "epoch": 8.98, "grad_norm": 3.1666722297668457, "learning_rate": 5.189659110091653e-07, "loss": 0.0252, "step": 912925 }, { "epoch": 8.98, "grad_norm": 15.815903663635254, "learning_rate": 5.188417885549168e-07, "loss": 0.0283, "step": 912950 }, { "epoch": 8.98, "grad_norm": 1.5139068365097046, "learning_rate": 5.187176661006683e-07, "loss": 0.0439, "step": 912975 }, { "epoch": 8.98, "grad_norm": 6.543767929077148, "learning_rate": 5.185935436464198e-07, "loss": 0.0157, "step": 913000 }, { "epoch": 8.98, "grad_norm": 1.5551574230194092, "learning_rate": 5.184694211921714e-07, "loss": 0.0351, "step": 913025 }, { "epoch": 8.98, "grad_norm": 5.2428483963012695, "learning_rate": 5.183452987379229e-07, "loss": 0.0441, "step": 913050 }, { "epoch": 8.98, "grad_norm": 8.859281539916992, "learning_rate": 5.182211762836744e-07, "loss": 0.0349, "step": 913075 }, { "epoch": 8.98, "grad_norm": 10.170483589172363, "learning_rate": 5.180970538294261e-07, "loss": 0.0302, "step": 913100 }, { "epoch": 8.98, "grad_norm": 0.03656449541449547, "learning_rate": 5.179729313751775e-07, "loss": 0.0311, "step": 913125 }, { "epoch": 8.98, "grad_norm": 11.346287727355957, "learning_rate": 5.178488089209291e-07, "loss": 0.0321, "step": 913150 }, { "epoch": 8.98, "grad_norm": 0.9465038776397705, "learning_rate": 5.177246864666805e-07, "loss": 0.025, "step": 913175 }, { "epoch": 8.98, "grad_norm": 3.648138999938965, "learning_rate": 5.176005640124322e-07, "loss": 0.0149, "step": 913200 }, { "epoch": 8.98, "grad_norm": 0.05435910075902939, "learning_rate": 5.174764415581837e-07, "loss": 0.0448, "step": 913225 }, { "epoch": 8.98, "grad_norm": 12.459701538085938, "learning_rate": 5.173523191039352e-07, "loss": 0.0339, "step": 913250 }, { "epoch": 8.98, "grad_norm": 0.28210654854774475, "learning_rate": 5.172281966496868e-07, "loss": 0.0483, "step": 913275 }, { "epoch": 8.98, "grad_norm": 9.888915061950684, "learning_rate": 5.171040741954383e-07, "loss": 0.03, "step": 913300 }, { "epoch": 8.98, "grad_norm": 0.013878279365599155, "learning_rate": 5.169799517411898e-07, "loss": 0.0317, "step": 913325 }, { "epoch": 8.98, "grad_norm": 0.9647323489189148, "learning_rate": 5.168558292869414e-07, "loss": 0.0067, "step": 913350 }, { "epoch": 8.98, "grad_norm": 0.009844662621617317, "learning_rate": 5.167317068326929e-07, "loss": 0.032, "step": 913375 }, { "epoch": 8.98, "grad_norm": 10.350228309631348, "learning_rate": 5.166075843784445e-07, "loss": 0.0305, "step": 913400 }, { "epoch": 8.98, "grad_norm": 0.12651567161083221, "learning_rate": 5.164834619241959e-07, "loss": 0.0478, "step": 913425 }, { "epoch": 8.98, "grad_norm": 13.700395584106445, "learning_rate": 5.163593394699476e-07, "loss": 0.0319, "step": 913450 }, { "epoch": 8.98, "grad_norm": 0.04330058768391609, "learning_rate": 5.16235217015699e-07, "loss": 0.0245, "step": 913475 }, { "epoch": 8.98, "grad_norm": 6.639640808105469, "learning_rate": 5.161110945614506e-07, "loss": 0.0275, "step": 913500 }, { "epoch": 8.98, "grad_norm": 5.257410049438477, "learning_rate": 5.159869721072022e-07, "loss": 0.04, "step": 913525 }, { "epoch": 8.98, "grad_norm": 22.863780975341797, "learning_rate": 5.158628496529537e-07, "loss": 0.0203, "step": 913550 }, { "epoch": 8.98, "grad_norm": 0.008051454089581966, "learning_rate": 5.157387271987052e-07, "loss": 0.0289, "step": 913575 }, { "epoch": 8.98, "grad_norm": 0.5523505806922913, "learning_rate": 5.156146047444567e-07, "loss": 0.0185, "step": 913600 }, { "epoch": 8.98, "grad_norm": 0.9733380675315857, "learning_rate": 5.154904822902083e-07, "loss": 0.0264, "step": 913625 }, { "epoch": 8.98, "grad_norm": 0.5517094731330872, "learning_rate": 5.153663598359598e-07, "loss": 0.0257, "step": 913650 }, { "epoch": 8.98, "grad_norm": 0.0058910297229886055, "learning_rate": 5.152472022798813e-07, "loss": 0.0334, "step": 913675 }, { "epoch": 8.98, "grad_norm": 0.5090511441230774, "learning_rate": 5.151230798256329e-07, "loss": 0.0263, "step": 913700 }, { "epoch": 8.98, "grad_norm": 0.7310657501220703, "learning_rate": 5.149989573713843e-07, "loss": 0.0244, "step": 913725 }, { "epoch": 8.98, "grad_norm": 7.715811252593994, "learning_rate": 5.14874834917136e-07, "loss": 0.0428, "step": 913750 }, { "epoch": 8.98, "grad_norm": 0.06634612381458282, "learning_rate": 5.147507124628874e-07, "loss": 0.0279, "step": 913775 }, { "epoch": 8.98, "grad_norm": 12.657910346984863, "learning_rate": 5.14626590008639e-07, "loss": 0.0279, "step": 913800 }, { "epoch": 8.98, "grad_norm": 2.738208532333374, "learning_rate": 5.145024675543904e-07, "loss": 0.0304, "step": 913825 }, { "epoch": 8.99, "grad_norm": 7.932710647583008, "learning_rate": 5.143783451001421e-07, "loss": 0.0385, "step": 913850 }, { "epoch": 8.99, "grad_norm": 4.312579154968262, "learning_rate": 5.142542226458936e-07, "loss": 0.0266, "step": 913875 }, { "epoch": 8.99, "grad_norm": 7.003180503845215, "learning_rate": 5.141301001916451e-07, "loss": 0.0257, "step": 913900 }, { "epoch": 8.99, "grad_norm": 0.813044548034668, "learning_rate": 5.140059777373967e-07, "loss": 0.0388, "step": 913925 }, { "epoch": 8.99, "grad_norm": 12.751848220825195, "learning_rate": 5.138818552831482e-07, "loss": 0.0389, "step": 913950 }, { "epoch": 8.99, "grad_norm": 1.512763261795044, "learning_rate": 5.137577328288997e-07, "loss": 0.0382, "step": 913975 }, { "epoch": 8.99, "grad_norm": 0.7483981251716614, "learning_rate": 5.136336103746512e-07, "loss": 0.0191, "step": 914000 }, { "epoch": 8.99, "grad_norm": 0.022680265828967094, "learning_rate": 5.135094879204028e-07, "loss": 0.0449, "step": 914025 }, { "epoch": 8.99, "grad_norm": 4.986266613006592, "learning_rate": 5.133853654661543e-07, "loss": 0.0246, "step": 914050 }, { "epoch": 8.99, "grad_norm": 0.07508832961320877, "learning_rate": 5.132612430119058e-07, "loss": 0.0283, "step": 914075 }, { "epoch": 8.99, "grad_norm": 11.047903060913086, "learning_rate": 5.131371205576574e-07, "loss": 0.0153, "step": 914100 }, { "epoch": 8.99, "grad_norm": 6.275654315948486, "learning_rate": 5.13012998103409e-07, "loss": 0.0529, "step": 914125 }, { "epoch": 8.99, "grad_norm": 6.615871429443359, "learning_rate": 5.128888756491604e-07, "loss": 0.0347, "step": 914150 }, { "epoch": 8.99, "grad_norm": 1.6020697355270386, "learning_rate": 5.12764753194912e-07, "loss": 0.0349, "step": 914175 }, { "epoch": 8.99, "grad_norm": 0.5573551654815674, "learning_rate": 5.126406307406636e-07, "loss": 0.0344, "step": 914200 }, { "epoch": 8.99, "grad_norm": 5.105366230010986, "learning_rate": 5.125165082864151e-07, "loss": 0.0396, "step": 914225 }, { "epoch": 8.99, "grad_norm": 1.1827253103256226, "learning_rate": 5.123923858321666e-07, "loss": 0.0348, "step": 914250 }, { "epoch": 8.99, "grad_norm": 0.07833743095397949, "learning_rate": 5.122682633779182e-07, "loss": 0.0501, "step": 914275 }, { "epoch": 8.99, "grad_norm": 7.142005443572998, "learning_rate": 5.121441409236697e-07, "loss": 0.0332, "step": 914300 }, { "epoch": 8.99, "grad_norm": 0.02484828792512417, "learning_rate": 5.120200184694212e-07, "loss": 0.0386, "step": 914325 }, { "epoch": 8.99, "grad_norm": 13.17291259765625, "learning_rate": 5.118958960151728e-07, "loss": 0.0306, "step": 914350 }, { "epoch": 8.99, "grad_norm": 2.5453295707702637, "learning_rate": 5.117717735609243e-07, "loss": 0.023, "step": 914375 }, { "epoch": 8.99, "grad_norm": 6.4437336921691895, "learning_rate": 5.116476511066758e-07, "loss": 0.0312, "step": 914400 }, { "epoch": 8.99, "grad_norm": 9.447240829467773, "learning_rate": 5.115235286524274e-07, "loss": 0.0366, "step": 914425 }, { "epoch": 8.99, "grad_norm": 7.0716729164123535, "learning_rate": 5.113994061981789e-07, "loss": 0.0165, "step": 914450 }, { "epoch": 8.99, "grad_norm": 8.917189598083496, "learning_rate": 5.112752837439305e-07, "loss": 0.0274, "step": 914475 }, { "epoch": 8.99, "grad_norm": 3.625318765640259, "learning_rate": 5.111511612896819e-07, "loss": 0.0375, "step": 914500 }, { "epoch": 8.99, "grad_norm": 0.012033636681735516, "learning_rate": 5.110270388354336e-07, "loss": 0.0297, "step": 914525 }, { "epoch": 8.99, "grad_norm": 9.798622131347656, "learning_rate": 5.109029163811851e-07, "loss": 0.0361, "step": 914550 }, { "epoch": 8.99, "grad_norm": 0.07983005046844482, "learning_rate": 5.107787939269366e-07, "loss": 0.0285, "step": 914575 }, { "epoch": 8.99, "grad_norm": 4.691633701324463, "learning_rate": 5.106546714726882e-07, "loss": 0.0097, "step": 914600 }, { "epoch": 8.99, "grad_norm": 6.918308258056641, "learning_rate": 5.105305490184397e-07, "loss": 0.0447, "step": 914625 }, { "epoch": 8.99, "grad_norm": 15.697391510009766, "learning_rate": 5.104064265641912e-07, "loss": 0.0306, "step": 914650 }, { "epoch": 8.99, "grad_norm": 0.02383442223072052, "learning_rate": 5.102823041099427e-07, "loss": 0.0247, "step": 914675 }, { "epoch": 8.99, "grad_norm": 15.834593772888184, "learning_rate": 5.101581816556943e-07, "loss": 0.0211, "step": 914700 }, { "epoch": 8.99, "grad_norm": 10.450569152832031, "learning_rate": 5.100340592014458e-07, "loss": 0.0382, "step": 914725 }, { "epoch": 8.99, "grad_norm": 0.24071060121059418, "learning_rate": 5.099099367471973e-07, "loss": 0.0318, "step": 914750 }, { "epoch": 8.99, "grad_norm": 0.3051915466785431, "learning_rate": 5.097858142929489e-07, "loss": 0.0433, "step": 914775 }, { "epoch": 8.99, "grad_norm": 6.951786041259766, "learning_rate": 5.096616918387004e-07, "loss": 0.029, "step": 914800 }, { "epoch": 8.99, "grad_norm": 1.788148283958435, "learning_rate": 5.095375693844519e-07, "loss": 0.0345, "step": 914825 }, { "epoch": 8.99, "grad_norm": 16.573488235473633, "learning_rate": 5.094134469302035e-07, "loss": 0.0265, "step": 914850 }, { "epoch": 9.0, "grad_norm": 6.770223617553711, "learning_rate": 5.092893244759551e-07, "loss": 0.0397, "step": 914875 }, { "epoch": 9.0, "grad_norm": 5.757437229156494, "learning_rate": 5.091652020217066e-07, "loss": 0.0152, "step": 914900 }, { "epoch": 9.0, "grad_norm": 6.026277542114258, "learning_rate": 5.090410795674581e-07, "loss": 0.024, "step": 914925 }, { "epoch": 9.0, "grad_norm": 4.0807085037231445, "learning_rate": 5.089169571132097e-07, "loss": 0.0504, "step": 914950 }, { "epoch": 9.0, "grad_norm": 0.033402685075998306, "learning_rate": 5.087928346589612e-07, "loss": 0.044, "step": 914975 }, { "epoch": 9.0, "grad_norm": 6.285048961639404, "learning_rate": 5.086687122047127e-07, "loss": 0.0235, "step": 915000 }, { "epoch": 9.0, "grad_norm": 0.006561297457665205, "learning_rate": 5.085445897504643e-07, "loss": 0.0325, "step": 915025 }, { "epoch": 9.0, "grad_norm": 15.709677696228027, "learning_rate": 5.084204672962158e-07, "loss": 0.0231, "step": 915050 }, { "epoch": 9.0, "grad_norm": 3.7898616790771484, "learning_rate": 5.082963448419673e-07, "loss": 0.0277, "step": 915075 }, { "epoch": 9.0, "grad_norm": 7.468293190002441, "learning_rate": 5.081722223877188e-07, "loss": 0.0162, "step": 915100 }, { "epoch": 9.0, "grad_norm": 0.009063673205673695, "learning_rate": 5.080480999334704e-07, "loss": 0.0374, "step": 915125 }, { "epoch": 9.0, "grad_norm": 13.872583389282227, "learning_rate": 5.07923977479222e-07, "loss": 0.0283, "step": 915150 }, { "epoch": 9.0, "grad_norm": 0.0786590576171875, "learning_rate": 5.077998550249734e-07, "loss": 0.0342, "step": 915175 }, { "epoch": 9.0, "grad_norm": 18.07705307006836, "learning_rate": 5.076757325707251e-07, "loss": 0.0363, "step": 915200 }, { "epoch": 9.0, "grad_norm": 0.00270282244309783, "learning_rate": 5.075516101164765e-07, "loss": 0.0279, "step": 915225 }, { "epoch": 9.0, "grad_norm": 0.21332694590091705, "learning_rate": 5.074274876622281e-07, "loss": 0.0267, "step": 915250 }, { "epoch": 9.0, "grad_norm": 0.7987383604049683, "learning_rate": 5.073033652079796e-07, "loss": 0.0289, "step": 915275 }, { "epoch": 9.0, "grad_norm": 10.041281700134277, "learning_rate": 5.071792427537312e-07, "loss": 0.0243, "step": 915300 }, { "epoch": 9.0, "grad_norm": 0.012201715260744095, "learning_rate": 5.070551202994827e-07, "loss": 0.043, "step": 915325 }, { "epoch": 9.0, "grad_norm": 10.049445152282715, "learning_rate": 5.069309978452342e-07, "loss": 0.015, "step": 915350 }, { "epoch": 9.0, "grad_norm": 4.571189880371094, "learning_rate": 5.068068753909858e-07, "loss": 0.049, "step": 915375 }, { "epoch": 9.0, "grad_norm": 12.877796173095703, "learning_rate": 5.066827529367373e-07, "loss": 0.0243, "step": 915400 }, { "epoch": 9.0, "grad_norm": 0.35731351375579834, "learning_rate": 5.065586304824888e-07, "loss": 0.0574, "step": 915425 }, { "epoch": 9.0, "grad_norm": 1.5475106239318848, "learning_rate": 5.064345080282404e-07, "loss": 0.0148, "step": 915450 }, { "epoch": 9.0, "grad_norm": 0.13178299367427826, "learning_rate": 5.063103855739919e-07, "loss": 0.0263, "step": 915475 }, { "epoch": 9.0, "grad_norm": 15.552762985229492, "learning_rate": 5.061862631197434e-07, "loss": 0.0186, "step": 915500 }, { "epoch": 9.0, "grad_norm": 4.146471977233887, "learning_rate": 5.060621406654949e-07, "loss": 0.0467, "step": 915525 }, { "epoch": 9.0, "grad_norm": 2.3043105602264404, "learning_rate": 5.059380182112466e-07, "loss": 0.0052, "step": 915550 }, { "epoch": 9.0, "grad_norm": 0.10697407275438309, "learning_rate": 5.058138957569981e-07, "loss": 0.0236, "step": 915575 }, { "epoch": 9.0, "grad_norm": 1.3659229278564453, "learning_rate": 5.056897733027496e-07, "loss": 0.0291, "step": 915600 }, { "epoch": 9.0, "grad_norm": 0.05609021708369255, "learning_rate": 5.055656508485012e-07, "loss": 0.0222, "step": 915625 }, { "epoch": 9.0, "grad_norm": 0.03096293844282627, "learning_rate": 5.054415283942527e-07, "loss": 0.0073, "step": 915650 }, { "epoch": 9.0, "grad_norm": 0.09309718757867813, "learning_rate": 5.053174059400042e-07, "loss": 0.0341, "step": 915675 }, { "epoch": 9.0, "grad_norm": 14.200810432434082, "learning_rate": 5.051932834857558e-07, "loss": 0.0226, "step": 915700 }, { "epoch": 9.0, "grad_norm": 4.4414286613464355, "learning_rate": 5.050691610315073e-07, "loss": 0.0501, "step": 915725 }, { "epoch": 9.0, "grad_norm": 0.8010725975036621, "learning_rate": 5.049450385772588e-07, "loss": 0.0162, "step": 915750 }, { "epoch": 9.0, "grad_norm": 0.5483012795448303, "learning_rate": 5.048209161230103e-07, "loss": 0.0418, "step": 915775 }, { "epoch": 9.0, "grad_norm": 4.702243804931641, "learning_rate": 5.046967936687619e-07, "loss": 0.0265, "step": 915800 }, { "epoch": 9.0, "grad_norm": 0.359118789434433, "learning_rate": 5.045726712145135e-07, "loss": 0.0325, "step": 915825 }, { "epoch": 9.0, "grad_norm": 6.063174724578857, "learning_rate": 5.044485487602649e-07, "loss": 0.0236, "step": 915850 }, { "epoch": 9.01, "grad_norm": 0.005843437742441893, "learning_rate": 5.043244263060166e-07, "loss": 0.0438, "step": 915875 }, { "epoch": 9.01, "grad_norm": 1.1870577335357666, "learning_rate": 5.04200303851768e-07, "loss": 0.0121, "step": 915900 }, { "epoch": 9.01, "grad_norm": 0.1306631714105606, "learning_rate": 5.040761813975196e-07, "loss": 0.0531, "step": 915925 }, { "epoch": 9.01, "grad_norm": 10.422179222106934, "learning_rate": 5.03952058943271e-07, "loss": 0.0191, "step": 915950 }, { "epoch": 9.01, "grad_norm": 0.02792527712881565, "learning_rate": 5.038279364890227e-07, "loss": 0.0348, "step": 915975 }, { "epoch": 9.01, "grad_norm": 9.882925987243652, "learning_rate": 5.037038140347742e-07, "loss": 0.0173, "step": 916000 }, { "epoch": 9.01, "grad_norm": 6.343934535980225, "learning_rate": 5.035796915805257e-07, "loss": 0.0275, "step": 916025 }, { "epoch": 9.01, "grad_norm": 0.3305860161781311, "learning_rate": 5.034555691262773e-07, "loss": 0.0118, "step": 916050 }, { "epoch": 9.01, "grad_norm": 0.016504447907209396, "learning_rate": 5.033314466720288e-07, "loss": 0.0488, "step": 916075 }, { "epoch": 9.01, "grad_norm": 6.804201602935791, "learning_rate": 5.032073242177803e-07, "loss": 0.0171, "step": 916100 }, { "epoch": 9.01, "grad_norm": 0.0632447674870491, "learning_rate": 5.030832017635319e-07, "loss": 0.0305, "step": 916125 }, { "epoch": 9.01, "grad_norm": 4.742175579071045, "learning_rate": 5.029590793092834e-07, "loss": 0.0074, "step": 916150 }, { "epoch": 9.01, "grad_norm": 0.0706481784582138, "learning_rate": 5.028349568550349e-07, "loss": 0.0362, "step": 916175 }, { "epoch": 9.01, "grad_norm": 0.5556209683418274, "learning_rate": 5.027108344007864e-07, "loss": 0.0118, "step": 916200 }, { "epoch": 9.01, "grad_norm": 0.0395590141415596, "learning_rate": 5.02586711946538e-07, "loss": 0.0317, "step": 916225 }, { "epoch": 9.01, "grad_norm": 3.3101584911346436, "learning_rate": 5.024625894922896e-07, "loss": 0.0081, "step": 916250 }, { "epoch": 9.01, "grad_norm": 1.1539857387542725, "learning_rate": 5.023384670380411e-07, "loss": 0.0322, "step": 916275 }, { "epoch": 9.01, "grad_norm": 4.299191951751709, "learning_rate": 5.022143445837927e-07, "loss": 0.0305, "step": 916300 }, { "epoch": 9.01, "grad_norm": 4.134121894836426, "learning_rate": 5.020902221295442e-07, "loss": 0.0318, "step": 916325 }, { "epoch": 9.01, "grad_norm": 5.62091588973999, "learning_rate": 5.019660996752957e-07, "loss": 0.0161, "step": 916350 }, { "epoch": 9.01, "grad_norm": 1.660119891166687, "learning_rate": 5.018419772210472e-07, "loss": 0.0324, "step": 916375 }, { "epoch": 9.01, "grad_norm": 3.425507068634033, "learning_rate": 5.017178547667988e-07, "loss": 0.0096, "step": 916400 }, { "epoch": 9.01, "grad_norm": 0.019656594842672348, "learning_rate": 5.015937323125503e-07, "loss": 0.0595, "step": 916425 }, { "epoch": 9.01, "grad_norm": 2.7851710319519043, "learning_rate": 5.014696098583018e-07, "loss": 0.0123, "step": 916450 }, { "epoch": 9.01, "grad_norm": 0.0174669548869133, "learning_rate": 5.013454874040534e-07, "loss": 0.0276, "step": 916475 }, { "epoch": 9.01, "grad_norm": 0.9417455792427063, "learning_rate": 5.012213649498049e-07, "loss": 0.0134, "step": 916500 }, { "epoch": 9.01, "grad_norm": 0.022153837606310844, "learning_rate": 5.010972424955564e-07, "loss": 0.0236, "step": 916525 }, { "epoch": 9.01, "grad_norm": 7.372007846832275, "learning_rate": 5.009731200413081e-07, "loss": 0.0109, "step": 916550 }, { "epoch": 9.01, "grad_norm": 0.34821680188179016, "learning_rate": 5.008489975870595e-07, "loss": 0.0289, "step": 916575 }, { "epoch": 9.01, "grad_norm": 7.8849005699157715, "learning_rate": 5.007248751328111e-07, "loss": 0.0356, "step": 916600 }, { "epoch": 9.01, "grad_norm": 1.2194653749465942, "learning_rate": 5.006007526785625e-07, "loss": 0.0165, "step": 916625 }, { "epoch": 9.01, "grad_norm": 1.0586433410644531, "learning_rate": 5.004766302243142e-07, "loss": 0.0064, "step": 916650 }, { "epoch": 9.01, "grad_norm": 0.1705983430147171, "learning_rate": 5.003525077700657e-07, "loss": 0.0451, "step": 916675 }, { "epoch": 9.01, "grad_norm": 4.901199817657471, "learning_rate": 5.002283853158172e-07, "loss": 0.0229, "step": 916700 }, { "epoch": 9.01, "grad_norm": 0.11718466877937317, "learning_rate": 5.001042628615688e-07, "loss": 0.0256, "step": 916725 }, { "epoch": 9.01, "grad_norm": 7.24092435836792, "learning_rate": 4.999801404073203e-07, "loss": 0.0097, "step": 916750 }, { "epoch": 9.01, "grad_norm": 0.5686687231063843, "learning_rate": 4.998560179530718e-07, "loss": 0.0351, "step": 916775 }, { "epoch": 9.01, "grad_norm": 9.040738105773926, "learning_rate": 4.997318954988233e-07, "loss": 0.0114, "step": 916800 }, { "epoch": 9.01, "grad_norm": 0.026253335177898407, "learning_rate": 4.996077730445749e-07, "loss": 0.0365, "step": 916825 }, { "epoch": 9.01, "grad_norm": 0.7575132250785828, "learning_rate": 4.994836505903264e-07, "loss": 0.0121, "step": 916850 }, { "epoch": 9.01, "grad_norm": 0.009248919785022736, "learning_rate": 4.993595281360779e-07, "loss": 0.0171, "step": 916875 }, { "epoch": 9.02, "grad_norm": 0.19375047087669373, "learning_rate": 4.992354056818295e-07, "loss": 0.0161, "step": 916900 }, { "epoch": 9.02, "grad_norm": 0.20769266784191132, "learning_rate": 4.99111283227581e-07, "loss": 0.0266, "step": 916925 }, { "epoch": 9.02, "grad_norm": 0.2895141839981079, "learning_rate": 4.989871607733326e-07, "loss": 0.0105, "step": 916950 }, { "epoch": 9.02, "grad_norm": 0.048111703246831894, "learning_rate": 4.988630383190842e-07, "loss": 0.0437, "step": 916975 }, { "epoch": 9.02, "grad_norm": 4.907221794128418, "learning_rate": 4.987389158648357e-07, "loss": 0.0187, "step": 917000 }, { "epoch": 9.02, "grad_norm": 0.07976824045181274, "learning_rate": 4.986147934105872e-07, "loss": 0.038, "step": 917025 }, { "epoch": 9.02, "grad_norm": 12.580374717712402, "learning_rate": 4.984906709563387e-07, "loss": 0.0154, "step": 917050 }, { "epoch": 9.02, "grad_norm": 4.764440536499023, "learning_rate": 4.983715134002602e-07, "loss": 0.0458, "step": 917075 }, { "epoch": 9.02, "grad_norm": 1.1544287204742432, "learning_rate": 4.982473909460117e-07, "loss": 0.0111, "step": 917100 }, { "epoch": 9.02, "grad_norm": 0.4134962856769562, "learning_rate": 4.981232684917633e-07, "loss": 0.0391, "step": 917125 }, { "epoch": 9.02, "grad_norm": 13.85571575164795, "learning_rate": 4.979991460375149e-07, "loss": 0.0137, "step": 917150 }, { "epoch": 9.02, "grad_norm": 0.12191595137119293, "learning_rate": 4.978750235832663e-07, "loss": 0.0304, "step": 917175 }, { "epoch": 9.02, "grad_norm": 2.865189552307129, "learning_rate": 4.97750901129018e-07, "loss": 0.009, "step": 917200 }, { "epoch": 9.02, "grad_norm": 1.5827776193618774, "learning_rate": 4.976267786747694e-07, "loss": 0.0409, "step": 917225 }, { "epoch": 9.02, "grad_norm": 8.561121940612793, "learning_rate": 4.97502656220521e-07, "loss": 0.0176, "step": 917250 }, { "epoch": 9.02, "grad_norm": 4.58278751373291, "learning_rate": 4.973785337662725e-07, "loss": 0.0404, "step": 917275 }, { "epoch": 9.02, "grad_norm": 0.8662003874778748, "learning_rate": 4.972544113120241e-07, "loss": 0.0184, "step": 917300 }, { "epoch": 9.02, "grad_norm": 2.2530171871185303, "learning_rate": 4.971302888577756e-07, "loss": 0.0422, "step": 917325 }, { "epoch": 9.02, "grad_norm": 0.6603002548217773, "learning_rate": 4.970061664035271e-07, "loss": 0.0076, "step": 917350 }, { "epoch": 9.02, "grad_norm": 4.462314605712891, "learning_rate": 4.968820439492787e-07, "loss": 0.0569, "step": 917375 }, { "epoch": 9.02, "grad_norm": 0.08439162373542786, "learning_rate": 4.967579214950302e-07, "loss": 0.0085, "step": 917400 }, { "epoch": 9.02, "grad_norm": 0.042873576283454895, "learning_rate": 4.966337990407817e-07, "loss": 0.0357, "step": 917425 }, { "epoch": 9.02, "grad_norm": 0.4676170349121094, "learning_rate": 4.965096765865332e-07, "loss": 0.0111, "step": 917450 }, { "epoch": 9.02, "grad_norm": 7.027566432952881, "learning_rate": 4.963855541322848e-07, "loss": 0.0312, "step": 917475 }, { "epoch": 9.02, "grad_norm": 1.3855684995651245, "learning_rate": 4.962614316780363e-07, "loss": 0.0178, "step": 917500 }, { "epoch": 9.02, "grad_norm": 0.026406781747937202, "learning_rate": 4.961373092237878e-07, "loss": 0.0293, "step": 917525 }, { "epoch": 9.02, "grad_norm": 5.325442790985107, "learning_rate": 4.960131867695394e-07, "loss": 0.0133, "step": 917550 }, { "epoch": 9.02, "grad_norm": 1.3301012516021729, "learning_rate": 4.95889064315291e-07, "loss": 0.0365, "step": 917575 }, { "epoch": 9.02, "grad_norm": 0.581240177154541, "learning_rate": 4.957649418610424e-07, "loss": 0.0074, "step": 917600 }, { "epoch": 9.02, "grad_norm": 2.175522804260254, "learning_rate": 4.95640819406794e-07, "loss": 0.0541, "step": 917625 }, { "epoch": 9.02, "grad_norm": 4.138981342315674, "learning_rate": 4.955166969525455e-07, "loss": 0.0064, "step": 917650 }, { "epoch": 9.02, "grad_norm": 0.06621590256690979, "learning_rate": 4.953925744982971e-07, "loss": 0.0377, "step": 917675 }, { "epoch": 9.02, "grad_norm": 0.24860240519046783, "learning_rate": 4.952684520440486e-07, "loss": 0.0077, "step": 917700 }, { "epoch": 9.02, "grad_norm": 1.73341965675354, "learning_rate": 4.951443295898002e-07, "loss": 0.0387, "step": 917725 }, { "epoch": 9.02, "grad_norm": 0.14914168417453766, "learning_rate": 4.950202071355517e-07, "loss": 0.0091, "step": 917750 }, { "epoch": 9.02, "grad_norm": 0.25916826725006104, "learning_rate": 4.948960846813032e-07, "loss": 0.0593, "step": 917775 }, { "epoch": 9.02, "grad_norm": 0.19984959065914154, "learning_rate": 4.947719622270548e-07, "loss": 0.015, "step": 917800 }, { "epoch": 9.02, "grad_norm": 4.124955177307129, "learning_rate": 4.946478397728063e-07, "loss": 0.0548, "step": 917825 }, { "epoch": 9.02, "grad_norm": 15.15617561340332, "learning_rate": 4.945237173185578e-07, "loss": 0.0122, "step": 917850 }, { "epoch": 9.02, "grad_norm": 0.4820738434791565, "learning_rate": 4.943995948643094e-07, "loss": 0.0218, "step": 917875 }, { "epoch": 9.02, "grad_norm": 2.1208572387695312, "learning_rate": 4.942754724100609e-07, "loss": 0.0177, "step": 917900 }, { "epoch": 9.03, "grad_norm": 2.114551067352295, "learning_rate": 4.941513499558125e-07, "loss": 0.0348, "step": 917925 }, { "epoch": 9.03, "grad_norm": 3.2456088066101074, "learning_rate": 4.940272275015639e-07, "loss": 0.02, "step": 917950 }, { "epoch": 9.03, "grad_norm": 0.07624716311693192, "learning_rate": 4.939031050473156e-07, "loss": 0.0656, "step": 917975 }, { "epoch": 9.03, "grad_norm": 0.26588964462280273, "learning_rate": 4.937789825930671e-07, "loss": 0.016, "step": 918000 }, { "epoch": 9.03, "grad_norm": 0.17521189153194427, "learning_rate": 4.936548601388186e-07, "loss": 0.0226, "step": 918025 }, { "epoch": 9.03, "grad_norm": 0.7162297368049622, "learning_rate": 4.935307376845701e-07, "loss": 0.0101, "step": 918050 }, { "epoch": 9.03, "grad_norm": 0.020456120371818542, "learning_rate": 4.934066152303217e-07, "loss": 0.0245, "step": 918075 }, { "epoch": 9.03, "grad_norm": 4.054889678955078, "learning_rate": 4.932824927760732e-07, "loss": 0.0134, "step": 918100 }, { "epoch": 9.03, "grad_norm": 1.0851058959960938, "learning_rate": 4.931583703218247e-07, "loss": 0.0498, "step": 918125 }, { "epoch": 9.03, "grad_norm": 7.240026950836182, "learning_rate": 4.930342478675763e-07, "loss": 0.0097, "step": 918150 }, { "epoch": 9.03, "grad_norm": 0.0077100773341953754, "learning_rate": 4.929101254133278e-07, "loss": 0.0464, "step": 918175 }, { "epoch": 9.03, "grad_norm": 4.784705638885498, "learning_rate": 4.927860029590793e-07, "loss": 0.0059, "step": 918200 }, { "epoch": 9.03, "grad_norm": 2.3126368522644043, "learning_rate": 4.926618805048309e-07, "loss": 0.0483, "step": 918225 }, { "epoch": 9.03, "grad_norm": 7.9335246086120605, "learning_rate": 4.925377580505824e-07, "loss": 0.0165, "step": 918250 }, { "epoch": 9.03, "grad_norm": 0.028018353506922722, "learning_rate": 4.924136355963339e-07, "loss": 0.0296, "step": 918275 }, { "epoch": 9.03, "grad_norm": 5.474747657775879, "learning_rate": 4.922895131420855e-07, "loss": 0.0138, "step": 918300 }, { "epoch": 9.03, "grad_norm": 0.03879866749048233, "learning_rate": 4.92165390687837e-07, "loss": 0.0212, "step": 918325 }, { "epoch": 9.03, "grad_norm": 3.243518590927124, "learning_rate": 4.920412682335886e-07, "loss": 0.0113, "step": 918350 }, { "epoch": 9.03, "grad_norm": 0.1596335619688034, "learning_rate": 4.9191714577934e-07, "loss": 0.0307, "step": 918375 }, { "epoch": 9.03, "grad_norm": 7.291873931884766, "learning_rate": 4.917930233250917e-07, "loss": 0.0065, "step": 918400 }, { "epoch": 9.03, "grad_norm": 0.03110687807202339, "learning_rate": 4.916689008708432e-07, "loss": 0.0288, "step": 918425 }, { "epoch": 9.03, "grad_norm": 1.010017991065979, "learning_rate": 4.915447784165947e-07, "loss": 0.0151, "step": 918450 }, { "epoch": 9.03, "grad_norm": 0.015667978674173355, "learning_rate": 4.914206559623462e-07, "loss": 0.0401, "step": 918475 }, { "epoch": 9.03, "grad_norm": 0.18340492248535156, "learning_rate": 4.912965335080978e-07, "loss": 0.0127, "step": 918500 }, { "epoch": 9.03, "grad_norm": 0.15718510746955872, "learning_rate": 4.911724110538493e-07, "loss": 0.0281, "step": 918525 }, { "epoch": 9.03, "grad_norm": 0.6981723308563232, "learning_rate": 4.910482885996009e-07, "loss": 0.0095, "step": 918550 }, { "epoch": 9.03, "grad_norm": 0.05555054172873497, "learning_rate": 4.909241661453524e-07, "loss": 0.0207, "step": 918575 }, { "epoch": 9.03, "grad_norm": 1.8967106342315674, "learning_rate": 4.90800043691104e-07, "loss": 0.0151, "step": 918600 }, { "epoch": 9.03, "grad_norm": 1.4104970693588257, "learning_rate": 4.906759212368554e-07, "loss": 0.0449, "step": 918625 }, { "epoch": 9.03, "grad_norm": 21.133575439453125, "learning_rate": 4.905517987826071e-07, "loss": 0.0169, "step": 918650 }, { "epoch": 9.03, "grad_norm": 0.23793424665927887, "learning_rate": 4.904276763283585e-07, "loss": 0.03, "step": 918675 }, { "epoch": 9.03, "grad_norm": 17.530864715576172, "learning_rate": 4.903035538741101e-07, "loss": 0.0238, "step": 918700 }, { "epoch": 9.03, "grad_norm": 1.3370797634124756, "learning_rate": 4.901794314198616e-07, "loss": 0.0318, "step": 918725 }, { "epoch": 9.03, "grad_norm": 0.07015843689441681, "learning_rate": 4.900553089656132e-07, "loss": 0.0094, "step": 918750 }, { "epoch": 9.03, "grad_norm": 2.3372139930725098, "learning_rate": 4.899311865113647e-07, "loss": 0.0409, "step": 918775 }, { "epoch": 9.03, "grad_norm": 0.04742710292339325, "learning_rate": 4.898070640571162e-07, "loss": 0.0097, "step": 918800 }, { "epoch": 9.03, "grad_norm": 1.4214227199554443, "learning_rate": 4.896829416028678e-07, "loss": 0.0512, "step": 918825 }, { "epoch": 9.03, "grad_norm": 2.2879395484924316, "learning_rate": 4.895588191486193e-07, "loss": 0.0178, "step": 918850 }, { "epoch": 9.03, "grad_norm": 0.9194859266281128, "learning_rate": 4.894346966943708e-07, "loss": 0.0265, "step": 918875 }, { "epoch": 9.03, "grad_norm": 6.035226821899414, "learning_rate": 4.893105742401223e-07, "loss": 0.0081, "step": 918900 }, { "epoch": 9.04, "grad_norm": 0.20032647252082825, "learning_rate": 4.891864517858739e-07, "loss": 0.038, "step": 918925 }, { "epoch": 9.04, "grad_norm": 7.071817874908447, "learning_rate": 4.890623293316254e-07, "loss": 0.0187, "step": 918950 }, { "epoch": 9.04, "grad_norm": 1.071465253829956, "learning_rate": 4.88938206877377e-07, "loss": 0.049, "step": 918975 }, { "epoch": 9.04, "grad_norm": 8.385201454162598, "learning_rate": 4.888140844231285e-07, "loss": 0.0213, "step": 919000 }, { "epoch": 9.04, "grad_norm": 0.0026133821811527014, "learning_rate": 4.886899619688801e-07, "loss": 0.0414, "step": 919025 }, { "epoch": 9.04, "grad_norm": 6.4267497062683105, "learning_rate": 4.885658395146315e-07, "loss": 0.0132, "step": 919050 }, { "epoch": 9.04, "grad_norm": 0.08415622264146805, "learning_rate": 4.884417170603832e-07, "loss": 0.0529, "step": 919075 }, { "epoch": 9.04, "grad_norm": 4.771482467651367, "learning_rate": 4.883175946061346e-07, "loss": 0.0255, "step": 919100 }, { "epoch": 9.04, "grad_norm": 5.60465669631958, "learning_rate": 4.881934721518862e-07, "loss": 0.0311, "step": 919125 }, { "epoch": 9.04, "grad_norm": 0.5040131211280823, "learning_rate": 4.880693496976377e-07, "loss": 0.0136, "step": 919150 }, { "epoch": 9.04, "grad_norm": 0.11345177888870239, "learning_rate": 4.879452272433893e-07, "loss": 0.0401, "step": 919175 }, { "epoch": 9.04, "grad_norm": 3.368725061416626, "learning_rate": 4.878211047891408e-07, "loss": 0.0136, "step": 919200 }, { "epoch": 9.04, "grad_norm": 8.736335754394531, "learning_rate": 4.876969823348923e-07, "loss": 0.0329, "step": 919225 }, { "epoch": 9.04, "grad_norm": 0.5645102262496948, "learning_rate": 4.875728598806439e-07, "loss": 0.0097, "step": 919250 }, { "epoch": 9.04, "grad_norm": 0.10757757723331451, "learning_rate": 4.874487374263955e-07, "loss": 0.0415, "step": 919275 }, { "epoch": 9.04, "grad_norm": 19.9606876373291, "learning_rate": 4.873246149721469e-07, "loss": 0.0153, "step": 919300 }, { "epoch": 9.04, "grad_norm": 0.08101800084114075, "learning_rate": 4.872004925178986e-07, "loss": 0.0272, "step": 919325 }, { "epoch": 9.04, "grad_norm": 0.237361878156662, "learning_rate": 4.8707637006365e-07, "loss": 0.0144, "step": 919350 }, { "epoch": 9.04, "grad_norm": 0.12307149171829224, "learning_rate": 4.869572125075715e-07, "loss": 0.0416, "step": 919375 }, { "epoch": 9.04, "grad_norm": 2.513784408569336, "learning_rate": 4.868330900533231e-07, "loss": 0.0062, "step": 919400 }, { "epoch": 9.04, "grad_norm": 0.1037159338593483, "learning_rate": 4.867089675990746e-07, "loss": 0.0252, "step": 919425 }, { "epoch": 9.04, "grad_norm": 0.06782932579517365, "learning_rate": 4.865848451448261e-07, "loss": 0.0247, "step": 919450 }, { "epoch": 9.04, "grad_norm": 6.335842609405518, "learning_rate": 4.864607226905777e-07, "loss": 0.0338, "step": 919475 }, { "epoch": 9.04, "grad_norm": 0.5538426041603088, "learning_rate": 4.863366002363292e-07, "loss": 0.0159, "step": 919500 }, { "epoch": 9.04, "grad_norm": 3.5966267585754395, "learning_rate": 4.862124777820807e-07, "loss": 0.0371, "step": 919525 }, { "epoch": 9.04, "grad_norm": 6.772326469421387, "learning_rate": 4.860883553278322e-07, "loss": 0.0226, "step": 919550 }, { "epoch": 9.04, "grad_norm": 0.8985859751701355, "learning_rate": 4.859642328735838e-07, "loss": 0.0501, "step": 919575 }, { "epoch": 9.04, "grad_norm": 12.550320625305176, "learning_rate": 4.858401104193353e-07, "loss": 0.0097, "step": 919600 }, { "epoch": 9.04, "grad_norm": 0.02346949651837349, "learning_rate": 4.857159879650869e-07, "loss": 0.0529, "step": 919625 }, { "epoch": 9.04, "grad_norm": 8.709150314331055, "learning_rate": 4.855918655108384e-07, "loss": 0.0182, "step": 919650 }, { "epoch": 9.04, "grad_norm": 1.338154673576355, "learning_rate": 4.8546774305659e-07, "loss": 0.0334, "step": 919675 }, { "epoch": 9.04, "grad_norm": 0.30203112959861755, "learning_rate": 4.853436206023414e-07, "loss": 0.0066, "step": 919700 }, { "epoch": 9.04, "grad_norm": 0.030276743695139885, "learning_rate": 4.85219498148093e-07, "loss": 0.0302, "step": 919725 }, { "epoch": 9.04, "grad_norm": 5.701660633087158, "learning_rate": 4.850953756938446e-07, "loss": 0.0245, "step": 919750 }, { "epoch": 9.04, "grad_norm": 0.1020033210515976, "learning_rate": 4.849712532395961e-07, "loss": 0.0189, "step": 919775 }, { "epoch": 9.04, "grad_norm": 9.946678161621094, "learning_rate": 4.848471307853476e-07, "loss": 0.0142, "step": 919800 }, { "epoch": 9.04, "grad_norm": 0.007373516913503408, "learning_rate": 4.847230083310992e-07, "loss": 0.0304, "step": 919825 }, { "epoch": 9.04, "grad_norm": 9.395197868347168, "learning_rate": 4.845988858768507e-07, "loss": 0.0255, "step": 919850 }, { "epoch": 9.04, "grad_norm": 0.042361337691545486, "learning_rate": 4.844747634226022e-07, "loss": 0.0456, "step": 919875 }, { "epoch": 9.04, "grad_norm": 1.0906978845596313, "learning_rate": 4.843506409683538e-07, "loss": 0.0139, "step": 919900 }, { "epoch": 9.04, "grad_norm": 0.07662077993154526, "learning_rate": 4.842265185141053e-07, "loss": 0.0378, "step": 919925 }, { "epoch": 9.05, "grad_norm": 2.0056798458099365, "learning_rate": 4.841023960598568e-07, "loss": 0.0111, "step": 919950 }, { "epoch": 9.05, "grad_norm": 0.006604175549000502, "learning_rate": 4.839782736056084e-07, "loss": 0.0456, "step": 919975 }, { "epoch": 9.05, "grad_norm": 2.9381160736083984, "learning_rate": 4.838541511513599e-07, "loss": 0.0146, "step": 920000 }, { "epoch": 9.05, "eval_loss": 0.9084303379058838, "eval_runtime": 6065.1701, "eval_samples_per_second": 1.561, "eval_steps_per_second": 0.195, "eval_wer": 0.11068370565891972, "step": 920000 }, { "epoch": 9.05, "grad_norm": 0.0996423214673996, "learning_rate": 4.837300286971115e-07, "loss": 0.0253, "step": 920025 }, { "epoch": 9.05, "grad_norm": 1.2285819053649902, "learning_rate": 4.83605906242863e-07, "loss": 0.0124, "step": 920050 }, { "epoch": 9.05, "grad_norm": 0.025040652602910995, "learning_rate": 4.834817837886146e-07, "loss": 0.0269, "step": 920075 }, { "epoch": 9.05, "grad_norm": 6.115046977996826, "learning_rate": 4.833576613343661e-07, "loss": 0.0211, "step": 920100 }, { "epoch": 9.05, "grad_norm": 0.007733456790447235, "learning_rate": 4.832335388801176e-07, "loss": 0.0294, "step": 920125 }, { "epoch": 9.05, "grad_norm": 2.4501149654388428, "learning_rate": 4.831094164258691e-07, "loss": 0.0139, "step": 920150 }, { "epoch": 9.05, "grad_norm": 2.3202383518218994, "learning_rate": 4.829852939716207e-07, "loss": 0.063, "step": 920175 }, { "epoch": 9.05, "grad_norm": 0.7753275632858276, "learning_rate": 4.828611715173722e-07, "loss": 0.0098, "step": 920200 }, { "epoch": 9.05, "grad_norm": 1.2365869283676147, "learning_rate": 4.827370490631237e-07, "loss": 0.0232, "step": 920225 }, { "epoch": 9.05, "grad_norm": 10.304566383361816, "learning_rate": 4.826129266088753e-07, "loss": 0.0216, "step": 920250 }, { "epoch": 9.05, "grad_norm": 5.1207380294799805, "learning_rate": 4.824888041546268e-07, "loss": 0.0368, "step": 920275 }, { "epoch": 9.05, "grad_norm": 1.60844087600708, "learning_rate": 4.823646817003784e-07, "loss": 0.0095, "step": 920300 }, { "epoch": 9.05, "grad_norm": 0.02895377390086651, "learning_rate": 4.822405592461299e-07, "loss": 0.0437, "step": 920325 }, { "epoch": 9.05, "grad_norm": 3.5180277824401855, "learning_rate": 4.821164367918815e-07, "loss": 0.0168, "step": 920350 }, { "epoch": 9.05, "grad_norm": 0.07046739757061005, "learning_rate": 4.819923143376329e-07, "loss": 0.0312, "step": 920375 }, { "epoch": 9.05, "grad_norm": 0.4188987612724304, "learning_rate": 4.818681918833845e-07, "loss": 0.0129, "step": 920400 }, { "epoch": 9.05, "grad_norm": 0.0692535936832428, "learning_rate": 4.81744069429136e-07, "loss": 0.035, "step": 920425 }, { "epoch": 9.05, "grad_norm": 9.932710647583008, "learning_rate": 4.816199469748876e-07, "loss": 0.025, "step": 920450 }, { "epoch": 9.05, "grad_norm": 0.046309586614370346, "learning_rate": 4.814958245206391e-07, "loss": 0.0205, "step": 920475 }, { "epoch": 9.05, "grad_norm": 13.40135669708252, "learning_rate": 4.813717020663907e-07, "loss": 0.009, "step": 920500 }, { "epoch": 9.05, "grad_norm": 0.011104801669716835, "learning_rate": 4.812475796121422e-07, "loss": 0.0416, "step": 920525 }, { "epoch": 9.05, "grad_norm": 3.434753179550171, "learning_rate": 4.811234571578937e-07, "loss": 0.0101, "step": 920550 }, { "epoch": 9.05, "grad_norm": 0.11425534635782242, "learning_rate": 4.809993347036452e-07, "loss": 0.0372, "step": 920575 }, { "epoch": 9.05, "grad_norm": 0.12133833765983582, "learning_rate": 4.808752122493968e-07, "loss": 0.0152, "step": 920600 }, { "epoch": 9.05, "grad_norm": 0.038728319108486176, "learning_rate": 4.807510897951483e-07, "loss": 0.0274, "step": 920625 }, { "epoch": 9.05, "grad_norm": 8.550514221191406, "learning_rate": 4.806269673408999e-07, "loss": 0.0194, "step": 920650 }, { "epoch": 9.05, "grad_norm": 0.009068232960999012, "learning_rate": 4.805028448866514e-07, "loss": 0.045, "step": 920675 }, { "epoch": 9.05, "grad_norm": 0.911614179611206, "learning_rate": 4.80378722432403e-07, "loss": 0.0118, "step": 920700 }, { "epoch": 9.05, "grad_norm": 1.3780707120895386, "learning_rate": 4.802545999781545e-07, "loss": 0.033, "step": 920725 }, { "epoch": 9.05, "grad_norm": 0.30201256275177, "learning_rate": 4.801304775239061e-07, "loss": 0.0118, "step": 920750 }, { "epoch": 9.05, "grad_norm": 0.46179935336112976, "learning_rate": 4.800063550696576e-07, "loss": 0.0318, "step": 920775 }, { "epoch": 9.05, "grad_norm": 21.154678344726562, "learning_rate": 4.798822326154091e-07, "loss": 0.0102, "step": 920800 }, { "epoch": 9.05, "grad_norm": 0.09007701277732849, "learning_rate": 4.797581101611606e-07, "loss": 0.0279, "step": 920825 }, { "epoch": 9.05, "grad_norm": 0.38314902782440186, "learning_rate": 4.796339877069122e-07, "loss": 0.015, "step": 920850 }, { "epoch": 9.05, "grad_norm": 1.226188063621521, "learning_rate": 4.795098652526637e-07, "loss": 0.0402, "step": 920875 }, { "epoch": 9.05, "grad_norm": 4.81010103225708, "learning_rate": 4.793857427984152e-07, "loss": 0.0113, "step": 920900 }, { "epoch": 9.05, "grad_norm": 0.43532323837280273, "learning_rate": 4.792616203441668e-07, "loss": 0.038, "step": 920925 }, { "epoch": 9.05, "grad_norm": 5.265471935272217, "learning_rate": 4.791374978899183e-07, "loss": 0.0179, "step": 920950 }, { "epoch": 9.06, "grad_norm": 0.07917194068431854, "learning_rate": 4.790133754356698e-07, "loss": 0.0412, "step": 920975 }, { "epoch": 9.06, "grad_norm": 11.471835136413574, "learning_rate": 4.788892529814214e-07, "loss": 0.0127, "step": 921000 }, { "epoch": 9.06, "grad_norm": 0.3487916886806488, "learning_rate": 4.78765130527173e-07, "loss": 0.0236, "step": 921025 }, { "epoch": 9.06, "grad_norm": 4.584184646606445, "learning_rate": 4.786410080729244e-07, "loss": 0.0191, "step": 921050 }, { "epoch": 9.06, "grad_norm": 0.04219849780201912, "learning_rate": 4.78516885618676e-07, "loss": 0.0203, "step": 921075 }, { "epoch": 9.06, "grad_norm": 8.654852867126465, "learning_rate": 4.783927631644275e-07, "loss": 0.0111, "step": 921100 }, { "epoch": 9.06, "grad_norm": 0.23834817111492157, "learning_rate": 4.782686407101791e-07, "loss": 0.0334, "step": 921125 }, { "epoch": 9.06, "grad_norm": 4.033243179321289, "learning_rate": 4.781445182559306e-07, "loss": 0.01, "step": 921150 }, { "epoch": 9.06, "grad_norm": 7.8889899253845215, "learning_rate": 4.780203958016822e-07, "loss": 0.0284, "step": 921175 }, { "epoch": 9.06, "grad_norm": 0.17755912244319916, "learning_rate": 4.778962733474337e-07, "loss": 0.0039, "step": 921200 }, { "epoch": 9.06, "grad_norm": 0.14432114362716675, "learning_rate": 4.777721508931852e-07, "loss": 0.0249, "step": 921225 }, { "epoch": 9.06, "grad_norm": 6.811509609222412, "learning_rate": 4.776480284389367e-07, "loss": 0.0143, "step": 921250 }, { "epoch": 9.06, "grad_norm": 0.013093579560518265, "learning_rate": 4.775239059846883e-07, "loss": 0.0575, "step": 921275 }, { "epoch": 9.06, "grad_norm": 3.4732093811035156, "learning_rate": 4.773997835304398e-07, "loss": 0.0214, "step": 921300 }, { "epoch": 9.06, "grad_norm": 1.5004932880401611, "learning_rate": 4.772756610761914e-07, "loss": 0.0419, "step": 921325 }, { "epoch": 9.06, "grad_norm": 15.042865753173828, "learning_rate": 4.771515386219429e-07, "loss": 0.0081, "step": 921350 }, { "epoch": 9.06, "grad_norm": 0.1414535939693451, "learning_rate": 4.770274161676945e-07, "loss": 0.0336, "step": 921375 }, { "epoch": 9.06, "grad_norm": 2.7055306434631348, "learning_rate": 4.769032937134459e-07, "loss": 0.0146, "step": 921400 }, { "epoch": 9.06, "grad_norm": 0.015919383615255356, "learning_rate": 4.767791712591975e-07, "loss": 0.0291, "step": 921425 }, { "epoch": 9.06, "grad_norm": 15.005759239196777, "learning_rate": 4.766550488049491e-07, "loss": 0.0147, "step": 921450 }, { "epoch": 9.06, "grad_norm": 2.0548927783966064, "learning_rate": 4.7653092635070056e-07, "loss": 0.0313, "step": 921475 }, { "epoch": 9.06, "grad_norm": 0.3365832269191742, "learning_rate": 4.7640680389645214e-07, "loss": 0.0118, "step": 921500 }, { "epoch": 9.06, "grad_norm": 0.16260717809200287, "learning_rate": 4.762826814422036e-07, "loss": 0.0547, "step": 921525 }, { "epoch": 9.06, "grad_norm": 1.2435826063156128, "learning_rate": 4.761585589879552e-07, "loss": 0.026, "step": 921550 }, { "epoch": 9.06, "grad_norm": 0.24683500826358795, "learning_rate": 4.760344365337068e-07, "loss": 0.057, "step": 921575 }, { "epoch": 9.06, "grad_norm": 11.322367668151855, "learning_rate": 4.7591031407945826e-07, "loss": 0.0166, "step": 921600 }, { "epoch": 9.06, "grad_norm": 0.07556551694869995, "learning_rate": 4.7578619162520984e-07, "loss": 0.0187, "step": 921625 }, { "epoch": 9.06, "grad_norm": 2.4818334579467773, "learning_rate": 4.756620691709613e-07, "loss": 0.0116, "step": 921650 }, { "epoch": 9.06, "grad_norm": 1.7187702655792236, "learning_rate": 4.755379467167129e-07, "loss": 0.0338, "step": 921675 }, { "epoch": 9.06, "grad_norm": 7.537903308868408, "learning_rate": 4.7541382426246443e-07, "loss": 0.0137, "step": 921700 }, { "epoch": 9.06, "grad_norm": 0.0829424113035202, "learning_rate": 4.7528970180821596e-07, "loss": 0.0308, "step": 921725 }, { "epoch": 9.06, "grad_norm": 6.642635822296143, "learning_rate": 4.751655793539675e-07, "loss": 0.0216, "step": 921750 }, { "epoch": 9.06, "grad_norm": 0.024578146636486053, "learning_rate": 4.75041456899719e-07, "loss": 0.0352, "step": 921775 }, { "epoch": 9.06, "grad_norm": 11.696599006652832, "learning_rate": 4.7491733444547055e-07, "loss": 0.0308, "step": 921800 }, { "epoch": 9.06, "grad_norm": 0.017146296799182892, "learning_rate": 4.74798176889392e-07, "loss": 0.0296, "step": 921825 }, { "epoch": 9.06, "grad_norm": 8.676491737365723, "learning_rate": 4.746740544351436e-07, "loss": 0.0208, "step": 921850 }, { "epoch": 9.06, "grad_norm": 0.006032215431332588, "learning_rate": 4.7454993198089506e-07, "loss": 0.0301, "step": 921875 }, { "epoch": 9.06, "grad_norm": 7.954611301422119, "learning_rate": 4.7442580952664664e-07, "loss": 0.0122, "step": 921900 }, { "epoch": 9.06, "grad_norm": 1.212768793106079, "learning_rate": 4.743016870723982e-07, "loss": 0.0269, "step": 921925 }, { "epoch": 9.06, "grad_norm": 0.5512964129447937, "learning_rate": 4.741775646181497e-07, "loss": 0.0141, "step": 921950 }, { "epoch": 9.07, "grad_norm": 0.033999305218458176, "learning_rate": 4.740534421639013e-07, "loss": 0.043, "step": 921975 }, { "epoch": 9.07, "grad_norm": 6.648555755615234, "learning_rate": 4.7392931970965275e-07, "loss": 0.0112, "step": 922000 }, { "epoch": 9.07, "grad_norm": 0.024681080132722855, "learning_rate": 4.7380519725540434e-07, "loss": 0.0381, "step": 922025 }, { "epoch": 9.07, "grad_norm": 0.3254460096359253, "learning_rate": 4.736810748011559e-07, "loss": 0.0173, "step": 922050 }, { "epoch": 9.07, "grad_norm": 0.16409902274608612, "learning_rate": 4.735569523469074e-07, "loss": 0.0349, "step": 922075 }, { "epoch": 9.07, "grad_norm": 7.93756103515625, "learning_rate": 4.73432829892659e-07, "loss": 0.0088, "step": 922100 }, { "epoch": 9.07, "grad_norm": 0.30400028824806213, "learning_rate": 4.7330870743841045e-07, "loss": 0.0293, "step": 922125 }, { "epoch": 9.07, "grad_norm": 5.325823783874512, "learning_rate": 4.7318458498416203e-07, "loss": 0.0111, "step": 922150 }, { "epoch": 9.07, "grad_norm": 0.007009274326264858, "learning_rate": 4.7306046252991356e-07, "loss": 0.0304, "step": 922175 }, { "epoch": 9.07, "grad_norm": 0.9877474308013916, "learning_rate": 4.729363400756651e-07, "loss": 0.008, "step": 922200 }, { "epoch": 9.07, "grad_norm": 21.215965270996094, "learning_rate": 4.728122176214166e-07, "loss": 0.0406, "step": 922225 }, { "epoch": 9.07, "grad_norm": 0.08733905851840973, "learning_rate": 4.7268809516716815e-07, "loss": 0.0165, "step": 922250 }, { "epoch": 9.07, "grad_norm": 0.0785241648554802, "learning_rate": 4.725639727129197e-07, "loss": 0.0262, "step": 922275 }, { "epoch": 9.07, "grad_norm": 0.5676885843276978, "learning_rate": 4.724398502586712e-07, "loss": 0.0118, "step": 922300 }, { "epoch": 9.07, "grad_norm": 4.146206855773926, "learning_rate": 4.723157278044228e-07, "loss": 0.0267, "step": 922325 }, { "epoch": 9.07, "grad_norm": 0.5620045065879822, "learning_rate": 4.721916053501743e-07, "loss": 0.0218, "step": 922350 }, { "epoch": 9.07, "grad_norm": 0.19550779461860657, "learning_rate": 4.7206748289592585e-07, "loss": 0.0241, "step": 922375 }, { "epoch": 9.07, "grad_norm": 6.798030376434326, "learning_rate": 4.719433604416774e-07, "loss": 0.0112, "step": 922400 }, { "epoch": 9.07, "grad_norm": 0.01682652346789837, "learning_rate": 4.718192379874289e-07, "loss": 0.0552, "step": 922425 }, { "epoch": 9.07, "grad_norm": 7.090040683746338, "learning_rate": 4.7169511553318044e-07, "loss": 0.0178, "step": 922450 }, { "epoch": 9.07, "grad_norm": 1.064408540725708, "learning_rate": 4.71570993078932e-07, "loss": 0.0269, "step": 922475 }, { "epoch": 9.07, "grad_norm": 2.1167356967926025, "learning_rate": 4.714468706246835e-07, "loss": 0.0154, "step": 922500 }, { "epoch": 9.07, "grad_norm": 0.018085379153490067, "learning_rate": 4.713227481704351e-07, "loss": 0.0319, "step": 922525 }, { "epoch": 9.07, "grad_norm": 1.1171892881393433, "learning_rate": 4.7119862571618655e-07, "loss": 0.0062, "step": 922550 }, { "epoch": 9.07, "grad_norm": 1.1700115203857422, "learning_rate": 4.7107450326193813e-07, "loss": 0.0588, "step": 922575 }, { "epoch": 9.07, "grad_norm": 4.107875347137451, "learning_rate": 4.709503808076897e-07, "loss": 0.0111, "step": 922600 }, { "epoch": 9.07, "grad_norm": 0.028872238472104073, "learning_rate": 4.708262583534412e-07, "loss": 0.0501, "step": 922625 }, { "epoch": 9.07, "grad_norm": 11.560994148254395, "learning_rate": 4.707021358991928e-07, "loss": 0.0138, "step": 922650 }, { "epoch": 9.07, "grad_norm": 16.31546974182129, "learning_rate": 4.7057801344494425e-07, "loss": 0.0255, "step": 922675 }, { "epoch": 9.07, "grad_norm": 2.0577754974365234, "learning_rate": 4.7045389099069583e-07, "loss": 0.0187, "step": 922700 }, { "epoch": 9.07, "grad_norm": 0.08158509433269501, "learning_rate": 4.703297685364473e-07, "loss": 0.0356, "step": 922725 }, { "epoch": 9.07, "grad_norm": 6.7139410972595215, "learning_rate": 4.702056460821989e-07, "loss": 0.0199, "step": 922750 }, { "epoch": 9.07, "grad_norm": 0.1860790103673935, "learning_rate": 4.7008152362795047e-07, "loss": 0.0413, "step": 922775 }, { "epoch": 9.07, "grad_norm": 9.832050323486328, "learning_rate": 4.6995740117370195e-07, "loss": 0.014, "step": 922800 }, { "epoch": 9.07, "grad_norm": 0.10861571878194809, "learning_rate": 4.6983327871945353e-07, "loss": 0.0511, "step": 922825 }, { "epoch": 9.07, "grad_norm": 5.178389072418213, "learning_rate": 4.69709156265205e-07, "loss": 0.0147, "step": 922850 }, { "epoch": 9.07, "grad_norm": 1.2129135131835938, "learning_rate": 4.695850338109566e-07, "loss": 0.0458, "step": 922875 }, { "epoch": 9.07, "grad_norm": 11.88515853881836, "learning_rate": 4.694609113567081e-07, "loss": 0.0195, "step": 922900 }, { "epoch": 9.07, "grad_norm": 0.028492817655205727, "learning_rate": 4.6933678890245965e-07, "loss": 0.0286, "step": 922925 }, { "epoch": 9.07, "grad_norm": 0.40245118737220764, "learning_rate": 4.692126664482112e-07, "loss": 0.0129, "step": 922950 }, { "epoch": 9.07, "grad_norm": 0.02743840217590332, "learning_rate": 4.690885439939627e-07, "loss": 0.0399, "step": 922975 }, { "epoch": 9.08, "grad_norm": 5.178391933441162, "learning_rate": 4.689644215397143e-07, "loss": 0.0202, "step": 923000 }, { "epoch": 9.08, "grad_norm": 0.03597934916615486, "learning_rate": 4.688402990854658e-07, "loss": 0.04, "step": 923025 }, { "epoch": 9.08, "grad_norm": 1.1016329526901245, "learning_rate": 4.6871617663121735e-07, "loss": 0.0049, "step": 923050 }, { "epoch": 9.08, "grad_norm": 0.02542862296104431, "learning_rate": 4.685920541769689e-07, "loss": 0.0402, "step": 923075 }, { "epoch": 9.08, "grad_norm": 8.685866355895996, "learning_rate": 4.684679317227204e-07, "loss": 0.0076, "step": 923100 }, { "epoch": 9.08, "grad_norm": 0.027883928269147873, "learning_rate": 4.6834380926847193e-07, "loss": 0.0457, "step": 923125 }, { "epoch": 9.08, "grad_norm": 6.336518287658691, "learning_rate": 4.6821968681422346e-07, "loss": 0.0109, "step": 923150 }, { "epoch": 9.08, "grad_norm": 0.3165484666824341, "learning_rate": 4.68095564359975e-07, "loss": 0.0395, "step": 923175 }, { "epoch": 9.08, "grad_norm": 13.514191627502441, "learning_rate": 4.679714419057266e-07, "loss": 0.0169, "step": 923200 }, { "epoch": 9.08, "grad_norm": 0.007832563482224941, "learning_rate": 4.6784731945147805e-07, "loss": 0.028, "step": 923225 }, { "epoch": 9.08, "grad_norm": 25.884639739990234, "learning_rate": 4.6772319699722963e-07, "loss": 0.0204, "step": 923250 }, { "epoch": 9.08, "grad_norm": 0.02971944771707058, "learning_rate": 4.675990745429811e-07, "loss": 0.0337, "step": 923275 }, { "epoch": 9.08, "grad_norm": 8.960545539855957, "learning_rate": 4.674749520887327e-07, "loss": 0.0227, "step": 923300 }, { "epoch": 9.08, "grad_norm": 0.04369194805622101, "learning_rate": 4.6735082963448427e-07, "loss": 0.0249, "step": 923325 }, { "epoch": 9.08, "grad_norm": 1.269158124923706, "learning_rate": 4.6722670718023575e-07, "loss": 0.0178, "step": 923350 }, { "epoch": 9.08, "grad_norm": 4.251699447631836, "learning_rate": 4.6710258472598733e-07, "loss": 0.0502, "step": 923375 }, { "epoch": 9.08, "grad_norm": 7.912493705749512, "learning_rate": 4.669784622717388e-07, "loss": 0.016, "step": 923400 }, { "epoch": 9.08, "grad_norm": 0.004548731725662947, "learning_rate": 4.668543398174904e-07, "loss": 0.0324, "step": 923425 }, { "epoch": 9.08, "grad_norm": 0.17406725883483887, "learning_rate": 4.6673021736324197e-07, "loss": 0.0093, "step": 923450 }, { "epoch": 9.08, "grad_norm": 0.05388536676764488, "learning_rate": 4.6660609490899345e-07, "loss": 0.0542, "step": 923475 }, { "epoch": 9.08, "grad_norm": 4.737306594848633, "learning_rate": 4.6648197245474503e-07, "loss": 0.0197, "step": 923500 }, { "epoch": 9.08, "grad_norm": 0.49471527338027954, "learning_rate": 4.663578500004965e-07, "loss": 0.0386, "step": 923525 }, { "epoch": 9.08, "grad_norm": 0.9012957811355591, "learning_rate": 4.662337275462481e-07, "loss": 0.0147, "step": 923550 }, { "epoch": 9.08, "grad_norm": 0.6421432495117188, "learning_rate": 4.6610960509199956e-07, "loss": 0.0366, "step": 923575 }, { "epoch": 9.08, "grad_norm": 1.614811897277832, "learning_rate": 4.6598548263775115e-07, "loss": 0.0069, "step": 923600 }, { "epoch": 9.08, "grad_norm": 0.04830736294388771, "learning_rate": 4.658613601835027e-07, "loss": 0.0361, "step": 923625 }, { "epoch": 9.08, "grad_norm": 0.11610908061265945, "learning_rate": 4.657372377292542e-07, "loss": 0.0197, "step": 923650 }, { "epoch": 9.08, "grad_norm": 5.789834499359131, "learning_rate": 4.6561311527500573e-07, "loss": 0.0362, "step": 923675 }, { "epoch": 9.08, "grad_norm": 7.347861289978027, "learning_rate": 4.6548899282075726e-07, "loss": 0.0147, "step": 923700 }, { "epoch": 9.08, "grad_norm": 0.09739407896995544, "learning_rate": 4.6536487036650884e-07, "loss": 0.056, "step": 923725 }, { "epoch": 9.08, "grad_norm": 7.202101707458496, "learning_rate": 4.6524074791226037e-07, "loss": 0.0144, "step": 923750 }, { "epoch": 9.08, "grad_norm": 0.3199789524078369, "learning_rate": 4.651166254580119e-07, "loss": 0.0419, "step": 923775 }, { "epoch": 9.08, "grad_norm": 5.731049060821533, "learning_rate": 4.6499250300376343e-07, "loss": 0.0139, "step": 923800 }, { "epoch": 9.08, "grad_norm": 0.003706438234075904, "learning_rate": 4.6486838054951496e-07, "loss": 0.0495, "step": 923825 }, { "epoch": 9.08, "grad_norm": 4.450525760650635, "learning_rate": 4.647442580952665e-07, "loss": 0.0117, "step": 923850 }, { "epoch": 9.08, "grad_norm": 0.04498596489429474, "learning_rate": 4.6462013564101807e-07, "loss": 0.0483, "step": 923875 }, { "epoch": 9.08, "grad_norm": 4.167149543762207, "learning_rate": 4.6449601318676955e-07, "loss": 0.0198, "step": 923900 }, { "epoch": 9.08, "grad_norm": 0.014597521163523197, "learning_rate": 4.6437189073252113e-07, "loss": 0.0456, "step": 923925 }, { "epoch": 9.08, "grad_norm": 5.402431011199951, "learning_rate": 4.642477682782726e-07, "loss": 0.0165, "step": 923950 }, { "epoch": 9.08, "grad_norm": 0.9529085755348206, "learning_rate": 4.641236458240242e-07, "loss": 0.0265, "step": 923975 }, { "epoch": 9.08, "grad_norm": 5.65389347076416, "learning_rate": 4.6399952336977566e-07, "loss": 0.0071, "step": 924000 }, { "epoch": 9.09, "grad_norm": 1.188003659248352, "learning_rate": 4.6387540091552725e-07, "loss": 0.0325, "step": 924025 }, { "epoch": 9.09, "grad_norm": 6.592013359069824, "learning_rate": 4.6375127846127883e-07, "loss": 0.0172, "step": 924050 }, { "epoch": 9.09, "grad_norm": 0.013879889622330666, "learning_rate": 4.636271560070303e-07, "loss": 0.0313, "step": 924075 }, { "epoch": 9.09, "grad_norm": 2.0044476985931396, "learning_rate": 4.635030335527819e-07, "loss": 0.0122, "step": 924100 }, { "epoch": 9.09, "grad_norm": 4.792665481567383, "learning_rate": 4.6337891109853336e-07, "loss": 0.0464, "step": 924125 }, { "epoch": 9.09, "grad_norm": 0.5057010054588318, "learning_rate": 4.6325478864428494e-07, "loss": 0.0299, "step": 924150 }, { "epoch": 9.09, "grad_norm": 0.022564154118299484, "learning_rate": 4.631306661900365e-07, "loss": 0.0444, "step": 924175 }, { "epoch": 9.09, "grad_norm": 3.177125930786133, "learning_rate": 4.63006543735788e-07, "loss": 0.0067, "step": 924200 }, { "epoch": 9.09, "grad_norm": 3.9142820835113525, "learning_rate": 4.628824212815396e-07, "loss": 0.0495, "step": 924225 }, { "epoch": 9.09, "grad_norm": 1.8551723957061768, "learning_rate": 4.6275829882729106e-07, "loss": 0.0115, "step": 924250 }, { "epoch": 9.09, "grad_norm": 0.08277270942926407, "learning_rate": 4.6263417637304264e-07, "loss": 0.0293, "step": 924275 }, { "epoch": 9.09, "grad_norm": 0.8761470913887024, "learning_rate": 4.6251005391879417e-07, "loss": 0.0121, "step": 924300 }, { "epoch": 9.09, "grad_norm": 0.21537674963474274, "learning_rate": 4.623859314645457e-07, "loss": 0.0422, "step": 924325 }, { "epoch": 9.09, "grad_norm": 0.26082322001457214, "learning_rate": 4.6226180901029723e-07, "loss": 0.0117, "step": 924350 }, { "epoch": 9.09, "grad_norm": 0.016681712120771408, "learning_rate": 4.6213768655604876e-07, "loss": 0.0356, "step": 924375 }, { "epoch": 9.09, "grad_norm": 12.562530517578125, "learning_rate": 4.6201356410180034e-07, "loss": 0.0121, "step": 924400 }, { "epoch": 9.09, "grad_norm": 0.05048614740371704, "learning_rate": 4.6188944164755187e-07, "loss": 0.0312, "step": 924425 }, { "epoch": 9.09, "grad_norm": 0.602446436882019, "learning_rate": 4.617653191933034e-07, "loss": 0.0169, "step": 924450 }, { "epoch": 9.09, "grad_norm": 0.015154268592596054, "learning_rate": 4.6164119673905493e-07, "loss": 0.038, "step": 924475 }, { "epoch": 9.09, "grad_norm": 1.71742582321167, "learning_rate": 4.6151707428480646e-07, "loss": 0.0179, "step": 924500 }, { "epoch": 9.09, "grad_norm": 0.10904189944267273, "learning_rate": 4.61392951830558e-07, "loss": 0.0442, "step": 924525 }, { "epoch": 9.09, "grad_norm": 16.395898818969727, "learning_rate": 4.612688293763095e-07, "loss": 0.0131, "step": 924550 }, { "epoch": 9.09, "grad_norm": 0.29660722613334656, "learning_rate": 4.6114470692206104e-07, "loss": 0.03, "step": 924575 }, { "epoch": 9.09, "grad_norm": 1.459912896156311, "learning_rate": 4.6102058446781263e-07, "loss": 0.0262, "step": 924600 }, { "epoch": 9.09, "grad_norm": 0.008253945969045162, "learning_rate": 4.608964620135641e-07, "loss": 0.0247, "step": 924625 }, { "epoch": 9.09, "grad_norm": 0.8273477554321289, "learning_rate": 4.607723395593157e-07, "loss": 0.0052, "step": 924650 }, { "epoch": 9.09, "grad_norm": 1.2718619108200073, "learning_rate": 4.6064821710506716e-07, "loss": 0.0437, "step": 924675 }, { "epoch": 9.09, "grad_norm": 3.722717523574829, "learning_rate": 4.6052409465081874e-07, "loss": 0.0044, "step": 924700 }, { "epoch": 9.09, "grad_norm": 0.11229505389928818, "learning_rate": 4.603999721965703e-07, "loss": 0.0485, "step": 924725 }, { "epoch": 9.09, "grad_norm": 6.5449934005737305, "learning_rate": 4.602758497423218e-07, "loss": 0.012, "step": 924750 }, { "epoch": 9.09, "grad_norm": 0.5339746475219727, "learning_rate": 4.601566921862433e-07, "loss": 0.0399, "step": 924775 }, { "epoch": 9.09, "grad_norm": 13.963911056518555, "learning_rate": 4.6003256973199483e-07, "loss": 0.0274, "step": 924800 }, { "epoch": 9.09, "grad_norm": 0.04177822172641754, "learning_rate": 4.5990844727774636e-07, "loss": 0.0322, "step": 924825 }, { "epoch": 9.09, "grad_norm": 11.520458221435547, "learning_rate": 4.597843248234979e-07, "loss": 0.0123, "step": 924850 }, { "epoch": 9.09, "grad_norm": 0.012334582395851612, "learning_rate": 4.596602023692495e-07, "loss": 0.0467, "step": 924875 }, { "epoch": 9.09, "grad_norm": 5.1653218269348145, "learning_rate": 4.59536079915001e-07, "loss": 0.0077, "step": 924900 }, { "epoch": 9.09, "grad_norm": 0.05829111486673355, "learning_rate": 4.5941195746075253e-07, "loss": 0.0439, "step": 924925 }, { "epoch": 9.09, "grad_norm": 0.07027851790189743, "learning_rate": 4.5928783500650406e-07, "loss": 0.0118, "step": 924950 }, { "epoch": 9.09, "grad_norm": 1.1466580629348755, "learning_rate": 4.591637125522556e-07, "loss": 0.049, "step": 924975 }, { "epoch": 9.09, "grad_norm": 5.3366379737854, "learning_rate": 4.590395900980071e-07, "loss": 0.0197, "step": 925000 }, { "epoch": 9.09, "grad_norm": 0.601900577545166, "learning_rate": 4.5891546764375865e-07, "loss": 0.024, "step": 925025 }, { "epoch": 9.1, "grad_norm": 4.53823709487915, "learning_rate": 4.587913451895102e-07, "loss": 0.0096, "step": 925050 }, { "epoch": 9.1, "grad_norm": 0.032524894922971725, "learning_rate": 4.5866722273526176e-07, "loss": 0.0435, "step": 925075 }, { "epoch": 9.1, "grad_norm": 0.07524483650922775, "learning_rate": 4.5854310028101324e-07, "loss": 0.012, "step": 925100 }, { "epoch": 9.1, "grad_norm": 0.4514818787574768, "learning_rate": 4.584189778267648e-07, "loss": 0.0275, "step": 925125 }, { "epoch": 9.1, "grad_norm": 1.3229509592056274, "learning_rate": 4.582948553725163e-07, "loss": 0.0055, "step": 925150 }, { "epoch": 9.1, "grad_norm": 1.7547640800476074, "learning_rate": 4.581707329182679e-07, "loss": 0.0473, "step": 925175 }, { "epoch": 9.1, "grad_norm": 0.29482302069664, "learning_rate": 4.5804661046401946e-07, "loss": 0.0131, "step": 925200 }, { "epoch": 9.1, "grad_norm": 0.022777622565627098, "learning_rate": 4.5792248800977093e-07, "loss": 0.0514, "step": 925225 }, { "epoch": 9.1, "grad_norm": 8.723822593688965, "learning_rate": 4.577983655555225e-07, "loss": 0.0051, "step": 925250 }, { "epoch": 9.1, "grad_norm": 0.1357770711183548, "learning_rate": 4.57674243101274e-07, "loss": 0.0385, "step": 925275 }, { "epoch": 9.1, "grad_norm": 6.798525333404541, "learning_rate": 4.575501206470256e-07, "loss": 0.015, "step": 925300 }, { "epoch": 9.1, "grad_norm": 1.0962262153625488, "learning_rate": 4.5742599819277716e-07, "loss": 0.0391, "step": 925325 }, { "epoch": 9.1, "grad_norm": 6.902313709259033, "learning_rate": 4.5730187573852863e-07, "loss": 0.0227, "step": 925350 }, { "epoch": 9.1, "grad_norm": 0.7990739941596985, "learning_rate": 4.571777532842802e-07, "loss": 0.0334, "step": 925375 }, { "epoch": 9.1, "grad_norm": 17.163564682006836, "learning_rate": 4.570536308300317e-07, "loss": 0.0194, "step": 925400 }, { "epoch": 9.1, "grad_norm": 0.029175614938139915, "learning_rate": 4.5692950837578327e-07, "loss": 0.0349, "step": 925425 }, { "epoch": 9.1, "grad_norm": 10.631885528564453, "learning_rate": 4.5680538592153475e-07, "loss": 0.0096, "step": 925450 }, { "epoch": 9.1, "grad_norm": 0.20426082611083984, "learning_rate": 4.5668126346728633e-07, "loss": 0.0264, "step": 925475 }, { "epoch": 9.1, "grad_norm": 0.4905374050140381, "learning_rate": 4.5655714101303786e-07, "loss": 0.0195, "step": 925500 }, { "epoch": 9.1, "grad_norm": 7.787735462188721, "learning_rate": 4.564330185587894e-07, "loss": 0.0495, "step": 925525 }, { "epoch": 9.1, "grad_norm": 13.771371841430664, "learning_rate": 4.5630889610454097e-07, "loss": 0.0331, "step": 925550 }, { "epoch": 9.1, "grad_norm": 14.999710083007812, "learning_rate": 4.5618477365029245e-07, "loss": 0.0447, "step": 925575 }, { "epoch": 9.1, "grad_norm": 11.296426773071289, "learning_rate": 4.5606065119604403e-07, "loss": 0.0033, "step": 925600 }, { "epoch": 9.1, "grad_norm": 0.012771266512572765, "learning_rate": 4.5593652874179556e-07, "loss": 0.0378, "step": 925625 }, { "epoch": 9.1, "grad_norm": 2.8740594387054443, "learning_rate": 4.558124062875471e-07, "loss": 0.0138, "step": 925650 }, { "epoch": 9.1, "grad_norm": 12.187349319458008, "learning_rate": 4.556882838332986e-07, "loss": 0.0457, "step": 925675 }, { "epoch": 9.1, "grad_norm": 1.520827054977417, "learning_rate": 4.5556416137905015e-07, "loss": 0.0092, "step": 925700 }, { "epoch": 9.1, "grad_norm": 1.511949896812439, "learning_rate": 4.554400389248017e-07, "loss": 0.0244, "step": 925725 }, { "epoch": 9.1, "grad_norm": 1.3301559686660767, "learning_rate": 4.5531591647055326e-07, "loss": 0.0098, "step": 925750 }, { "epoch": 9.1, "grad_norm": 0.03619244694709778, "learning_rate": 4.5519179401630473e-07, "loss": 0.0479, "step": 925775 }, { "epoch": 9.1, "grad_norm": 0.06911228597164154, "learning_rate": 4.550676715620563e-07, "loss": 0.0097, "step": 925800 }, { "epoch": 9.1, "grad_norm": 0.13081273436546326, "learning_rate": 4.549435491078078e-07, "loss": 0.0499, "step": 925825 }, { "epoch": 9.1, "grad_norm": 1.3606404066085815, "learning_rate": 4.548194266535594e-07, "loss": 0.0198, "step": 925850 }, { "epoch": 9.1, "grad_norm": 0.2450239360332489, "learning_rate": 4.546953041993109e-07, "loss": 0.0269, "step": 925875 }, { "epoch": 9.1, "grad_norm": 0.12992897629737854, "learning_rate": 4.5457118174506243e-07, "loss": 0.009, "step": 925900 }, { "epoch": 9.1, "grad_norm": 0.0061544207856059074, "learning_rate": 4.54447059290814e-07, "loss": 0.0589, "step": 925925 }, { "epoch": 9.1, "grad_norm": 0.37091708183288574, "learning_rate": 4.543229368365655e-07, "loss": 0.0089, "step": 925950 }, { "epoch": 9.1, "grad_norm": 0.8721484541893005, "learning_rate": 4.5419881438231707e-07, "loss": 0.0359, "step": 925975 }, { "epoch": 9.1, "grad_norm": 14.84079647064209, "learning_rate": 4.5407469192806855e-07, "loss": 0.0232, "step": 926000 }, { "epoch": 9.1, "grad_norm": 0.4885573983192444, "learning_rate": 4.5395056947382013e-07, "loss": 0.0264, "step": 926025 }, { "epoch": 9.11, "grad_norm": 3.960968255996704, "learning_rate": 4.538264470195717e-07, "loss": 0.0131, "step": 926050 }, { "epoch": 9.11, "grad_norm": 0.12791694700717926, "learning_rate": 4.537023245653232e-07, "loss": 0.0302, "step": 926075 }, { "epoch": 9.11, "grad_norm": 10.982938766479492, "learning_rate": 4.5357820211107477e-07, "loss": 0.0168, "step": 926100 }, { "epoch": 9.11, "grad_norm": 2.5813663005828857, "learning_rate": 4.5345407965682625e-07, "loss": 0.0271, "step": 926125 }, { "epoch": 9.11, "grad_norm": 0.09626254439353943, "learning_rate": 4.5332995720257783e-07, "loss": 0.0068, "step": 926150 }, { "epoch": 9.11, "grad_norm": 0.34988126158714294, "learning_rate": 4.5320583474832936e-07, "loss": 0.0202, "step": 926175 }, { "epoch": 9.11, "grad_norm": 2.005150079727173, "learning_rate": 4.530817122940809e-07, "loss": 0.0113, "step": 926200 }, { "epoch": 9.11, "grad_norm": 0.04118146002292633, "learning_rate": 4.5295758983983247e-07, "loss": 0.0225, "step": 926225 }, { "epoch": 9.11, "grad_norm": 6.009421348571777, "learning_rate": 4.5283346738558395e-07, "loss": 0.0202, "step": 926250 }, { "epoch": 9.11, "grad_norm": 1.230404019355774, "learning_rate": 4.5270934493133553e-07, "loss": 0.0364, "step": 926275 }, { "epoch": 9.11, "grad_norm": 1.724025845527649, "learning_rate": 4.52585222477087e-07, "loss": 0.0284, "step": 926300 }, { "epoch": 9.11, "grad_norm": 0.24077877402305603, "learning_rate": 4.524611000228386e-07, "loss": 0.0266, "step": 926325 }, { "epoch": 9.11, "grad_norm": 7.914829730987549, "learning_rate": 4.523369775685901e-07, "loss": 0.0151, "step": 926350 }, { "epoch": 9.11, "grad_norm": 1.3567031621932983, "learning_rate": 4.5221285511434164e-07, "loss": 0.0375, "step": 926375 }, { "epoch": 9.11, "grad_norm": 1.286478042602539, "learning_rate": 4.5208873266009317e-07, "loss": 0.0209, "step": 926400 }, { "epoch": 9.11, "grad_norm": 8.83934497833252, "learning_rate": 4.519646102058447e-07, "loss": 0.035, "step": 926425 }, { "epoch": 9.11, "grad_norm": 6.56996488571167, "learning_rate": 4.5184048775159623e-07, "loss": 0.0224, "step": 926450 }, { "epoch": 9.11, "grad_norm": 0.18252520263195038, "learning_rate": 4.517163652973478e-07, "loss": 0.0276, "step": 926475 }, { "epoch": 9.11, "grad_norm": 26.323823928833008, "learning_rate": 4.515922428430993e-07, "loss": 0.0178, "step": 926500 }, { "epoch": 9.11, "grad_norm": 0.10981740057468414, "learning_rate": 4.5146812038885087e-07, "loss": 0.0231, "step": 926525 }, { "epoch": 9.11, "grad_norm": 0.35320180654525757, "learning_rate": 4.5134399793460235e-07, "loss": 0.013, "step": 926550 }, { "epoch": 9.11, "grad_norm": 0.030797837302088737, "learning_rate": 4.5121987548035393e-07, "loss": 0.024, "step": 926575 }, { "epoch": 9.11, "grad_norm": 7.859211444854736, "learning_rate": 4.510957530261055e-07, "loss": 0.0114, "step": 926600 }, { "epoch": 9.11, "grad_norm": 0.6169464588165283, "learning_rate": 4.50971630571857e-07, "loss": 0.0375, "step": 926625 }, { "epoch": 9.11, "grad_norm": 11.842011451721191, "learning_rate": 4.5084750811760857e-07, "loss": 0.0137, "step": 926650 }, { "epoch": 9.11, "grad_norm": 0.09187722951173782, "learning_rate": 4.5072338566336005e-07, "loss": 0.0602, "step": 926675 }, { "epoch": 9.11, "grad_norm": 9.662467956542969, "learning_rate": 4.5059926320911163e-07, "loss": 0.0149, "step": 926700 }, { "epoch": 9.11, "grad_norm": 0.04114983603358269, "learning_rate": 4.504751407548631e-07, "loss": 0.0308, "step": 926725 }, { "epoch": 9.11, "grad_norm": 2.3909921646118164, "learning_rate": 4.503510183006147e-07, "loss": 0.0069, "step": 926750 }, { "epoch": 9.11, "grad_norm": 1.0843738317489624, "learning_rate": 4.5022689584636627e-07, "loss": 0.0312, "step": 926775 }, { "epoch": 9.11, "grad_norm": 10.764249801635742, "learning_rate": 4.5010277339211774e-07, "loss": 0.0158, "step": 926800 }, { "epoch": 9.11, "grad_norm": 1.0774071216583252, "learning_rate": 4.499786509378693e-07, "loss": 0.0489, "step": 926825 }, { "epoch": 9.11, "grad_norm": 0.0661860853433609, "learning_rate": 4.498545284836208e-07, "loss": 0.0089, "step": 926850 }, { "epoch": 9.11, "grad_norm": 0.04476643353700638, "learning_rate": 4.497304060293724e-07, "loss": 0.0251, "step": 926875 }, { "epoch": 9.11, "grad_norm": 1.2820323705673218, "learning_rate": 4.496062835751239e-07, "loss": 0.0151, "step": 926900 }, { "epoch": 9.11, "grad_norm": 0.1785481870174408, "learning_rate": 4.4948216112087544e-07, "loss": 0.0238, "step": 926925 }, { "epoch": 9.11, "grad_norm": 2.124424457550049, "learning_rate": 4.49358038666627e-07, "loss": 0.0119, "step": 926950 }, { "epoch": 9.11, "grad_norm": 0.2520398497581482, "learning_rate": 4.492339162123785e-07, "loss": 0.0338, "step": 926975 }, { "epoch": 9.11, "grad_norm": 3.1077821254730225, "learning_rate": 4.491097937581301e-07, "loss": 0.009, "step": 927000 }, { "epoch": 9.11, "grad_norm": 1.4599621295928955, "learning_rate": 4.489856713038816e-07, "loss": 0.0282, "step": 927025 }, { "epoch": 9.11, "grad_norm": 2.162937641143799, "learning_rate": 4.4886154884963314e-07, "loss": 0.0145, "step": 927050 }, { "epoch": 9.12, "grad_norm": 0.001064293086528778, "learning_rate": 4.4873742639538467e-07, "loss": 0.0575, "step": 927075 }, { "epoch": 9.12, "grad_norm": 5.502865314483643, "learning_rate": 4.486133039411362e-07, "loss": 0.0096, "step": 927100 }, { "epoch": 9.12, "grad_norm": 4.285560131072998, "learning_rate": 4.4848918148688773e-07, "loss": 0.0343, "step": 927125 }, { "epoch": 9.12, "grad_norm": 6.00518798828125, "learning_rate": 4.4836505903263926e-07, "loss": 0.0113, "step": 927150 }, { "epoch": 9.12, "grad_norm": 0.4882805347442627, "learning_rate": 4.482409365783908e-07, "loss": 0.0286, "step": 927175 }, { "epoch": 9.12, "grad_norm": 1.959733486175537, "learning_rate": 4.4811681412414237e-07, "loss": 0.0137, "step": 927200 }, { "epoch": 9.12, "grad_norm": 0.7011270523071289, "learning_rate": 4.4799269166989384e-07, "loss": 0.0257, "step": 927225 }, { "epoch": 9.12, "grad_norm": 16.99051856994629, "learning_rate": 4.4786856921564543e-07, "loss": 0.011, "step": 927250 }, { "epoch": 9.12, "grad_norm": 0.07335295528173447, "learning_rate": 4.4774444676139696e-07, "loss": 0.0339, "step": 927275 }, { "epoch": 9.12, "grad_norm": 0.1564202755689621, "learning_rate": 4.476203243071485e-07, "loss": 0.0072, "step": 927300 }, { "epoch": 9.12, "grad_norm": 0.3153967261314392, "learning_rate": 4.4749620185290007e-07, "loss": 0.0299, "step": 927325 }, { "epoch": 9.12, "grad_norm": 12.481038093566895, "learning_rate": 4.4737207939865154e-07, "loss": 0.0054, "step": 927350 }, { "epoch": 9.12, "grad_norm": 0.022735929116606712, "learning_rate": 4.472479569444031e-07, "loss": 0.0366, "step": 927375 }, { "epoch": 9.12, "grad_norm": 5.262846946716309, "learning_rate": 4.471238344901546e-07, "loss": 0.0082, "step": 927400 }, { "epoch": 9.12, "grad_norm": 0.005524861626327038, "learning_rate": 4.469997120359062e-07, "loss": 0.0382, "step": 927425 }, { "epoch": 9.12, "grad_norm": 14.130728721618652, "learning_rate": 4.4687558958165777e-07, "loss": 0.0184, "step": 927450 }, { "epoch": 9.12, "grad_norm": 0.9204270839691162, "learning_rate": 4.4675146712740924e-07, "loss": 0.0411, "step": 927475 }, { "epoch": 9.12, "grad_norm": 0.16782429814338684, "learning_rate": 4.466273446731608e-07, "loss": 0.011, "step": 927500 }, { "epoch": 9.12, "grad_norm": 0.07883083820343018, "learning_rate": 4.465032222189123e-07, "loss": 0.0279, "step": 927525 }, { "epoch": 9.12, "grad_norm": 11.52759075164795, "learning_rate": 4.463790997646639e-07, "loss": 0.0129, "step": 927550 }, { "epoch": 9.12, "grad_norm": 0.5035866498947144, "learning_rate": 4.4625497731041536e-07, "loss": 0.0448, "step": 927575 }, { "epoch": 9.12, "grad_norm": 5.4579572677612305, "learning_rate": 4.4613085485616694e-07, "loss": 0.0181, "step": 927600 }, { "epoch": 9.12, "grad_norm": 0.13215360045433044, "learning_rate": 4.460067324019185e-07, "loss": 0.0261, "step": 927625 }, { "epoch": 9.12, "grad_norm": 10.177242279052734, "learning_rate": 4.4588260994767e-07, "loss": 0.008, "step": 927650 }, { "epoch": 9.12, "grad_norm": 1.6062536239624023, "learning_rate": 4.457584874934216e-07, "loss": 0.0389, "step": 927675 }, { "epoch": 9.12, "grad_norm": 3.4412357807159424, "learning_rate": 4.4563436503917306e-07, "loss": 0.0073, "step": 927700 }, { "epoch": 9.12, "grad_norm": 0.021299123764038086, "learning_rate": 4.4551024258492464e-07, "loss": 0.0376, "step": 927725 }, { "epoch": 9.12, "grad_norm": 0.10629022121429443, "learning_rate": 4.4538612013067617e-07, "loss": 0.0177, "step": 927750 }, { "epoch": 9.12, "grad_norm": 5.426258563995361, "learning_rate": 4.452619976764277e-07, "loss": 0.0246, "step": 927775 }, { "epoch": 9.12, "grad_norm": 10.625131607055664, "learning_rate": 4.451378752221792e-07, "loss": 0.0122, "step": 927800 }, { "epoch": 9.12, "grad_norm": 5.161781311035156, "learning_rate": 4.4501375276793075e-07, "loss": 0.0403, "step": 927825 }, { "epoch": 9.12, "grad_norm": 1.3051540851593018, "learning_rate": 4.448896303136823e-07, "loss": 0.0054, "step": 927850 }, { "epoch": 9.12, "grad_norm": 1.364913821220398, "learning_rate": 4.4476550785943387e-07, "loss": 0.0332, "step": 927875 }, { "epoch": 9.12, "grad_norm": 2.3647749423980713, "learning_rate": 4.4464138540518534e-07, "loss": 0.0136, "step": 927900 }, { "epoch": 9.12, "grad_norm": 0.05104174092411995, "learning_rate": 4.445172629509369e-07, "loss": 0.039, "step": 927925 }, { "epoch": 9.12, "grad_norm": 0.2547615170478821, "learning_rate": 4.4439314049668845e-07, "loss": 0.0153, "step": 927950 }, { "epoch": 9.12, "grad_norm": 1.150376319885254, "learning_rate": 4.4426901804244e-07, "loss": 0.0375, "step": 927975 }, { "epoch": 9.12, "grad_norm": 5.302060604095459, "learning_rate": 4.441448955881915e-07, "loss": 0.0063, "step": 928000 }, { "epoch": 9.12, "grad_norm": 3.4332263469696045, "learning_rate": 4.4402077313394304e-07, "loss": 0.0371, "step": 928025 }, { "epoch": 9.12, "grad_norm": 0.4116879999637604, "learning_rate": 4.438966506796946e-07, "loss": 0.0228, "step": 928050 }, { "epoch": 9.12, "grad_norm": 0.04674920067191124, "learning_rate": 4.437725282254461e-07, "loss": 0.0482, "step": 928075 }, { "epoch": 9.13, "grad_norm": 0.26710397005081177, "learning_rate": 4.436484057711977e-07, "loss": 0.0145, "step": 928100 }, { "epoch": 9.13, "grad_norm": 0.013101851567626, "learning_rate": 4.4352428331694916e-07, "loss": 0.0213, "step": 928125 }, { "epoch": 9.13, "grad_norm": 5.561372756958008, "learning_rate": 4.4340016086270074e-07, "loss": 0.0167, "step": 928150 }, { "epoch": 9.13, "grad_norm": 0.42969810962677, "learning_rate": 4.432810033066222e-07, "loss": 0.0491, "step": 928175 }, { "epoch": 9.13, "grad_norm": 0.4363515377044678, "learning_rate": 4.4315688085237377e-07, "loss": 0.0069, "step": 928200 }, { "epoch": 9.13, "grad_norm": 3.4599218368530273, "learning_rate": 4.430327583981253e-07, "loss": 0.0381, "step": 928225 }, { "epoch": 9.13, "grad_norm": 1.5202581882476807, "learning_rate": 4.4290863594387683e-07, "loss": 0.0139, "step": 928250 }, { "epoch": 9.13, "grad_norm": 4.878743648529053, "learning_rate": 4.4278451348962836e-07, "loss": 0.0165, "step": 928275 }, { "epoch": 9.13, "grad_norm": 3.42702317237854, "learning_rate": 4.426603910353799e-07, "loss": 0.0143, "step": 928300 }, { "epoch": 9.13, "grad_norm": 1.6414083242416382, "learning_rate": 4.425362685811314e-07, "loss": 0.0462, "step": 928325 }, { "epoch": 9.13, "grad_norm": 11.246736526489258, "learning_rate": 4.42412146126883e-07, "loss": 0.0151, "step": 928350 }, { "epoch": 9.13, "grad_norm": 0.03198961541056633, "learning_rate": 4.422880236726345e-07, "loss": 0.0445, "step": 928375 }, { "epoch": 9.13, "grad_norm": 13.220455169677734, "learning_rate": 4.4216390121838606e-07, "loss": 0.0185, "step": 928400 }, { "epoch": 9.13, "grad_norm": 0.014398385770618916, "learning_rate": 4.420397787641376e-07, "loss": 0.0376, "step": 928425 }, { "epoch": 9.13, "grad_norm": 7.49616813659668, "learning_rate": 4.419156563098891e-07, "loss": 0.0104, "step": 928450 }, { "epoch": 9.13, "grad_norm": 0.010547245852649212, "learning_rate": 4.4179153385564064e-07, "loss": 0.0485, "step": 928475 }, { "epoch": 9.13, "grad_norm": 0.757317066192627, "learning_rate": 4.416674114013922e-07, "loss": 0.0199, "step": 928500 }, { "epoch": 9.13, "grad_norm": 0.028950678184628487, "learning_rate": 4.4154328894714376e-07, "loss": 0.0493, "step": 928525 }, { "epoch": 9.13, "grad_norm": 0.9050789475440979, "learning_rate": 4.4141916649289523e-07, "loss": 0.0108, "step": 928550 }, { "epoch": 9.13, "grad_norm": 2.4034605026245117, "learning_rate": 4.412950440386468e-07, "loss": 0.0512, "step": 928575 }, { "epoch": 9.13, "grad_norm": 3.2152652740478516, "learning_rate": 4.411709215843983e-07, "loss": 0.0176, "step": 928600 }, { "epoch": 9.13, "grad_norm": 2.2314329147338867, "learning_rate": 4.4104679913014987e-07, "loss": 0.034, "step": 928625 }, { "epoch": 9.13, "grad_norm": 11.263885498046875, "learning_rate": 4.4092267667590145e-07, "loss": 0.0112, "step": 928650 }, { "epoch": 9.13, "grad_norm": 0.14078964293003082, "learning_rate": 4.4079855422165293e-07, "loss": 0.0277, "step": 928675 }, { "epoch": 9.13, "grad_norm": 16.38594627380371, "learning_rate": 4.406744317674045e-07, "loss": 0.0068, "step": 928700 }, { "epoch": 9.13, "grad_norm": 0.03111123852431774, "learning_rate": 4.40550309313156e-07, "loss": 0.03, "step": 928725 }, { "epoch": 9.13, "grad_norm": 0.49882325530052185, "learning_rate": 4.4042618685890757e-07, "loss": 0.0059, "step": 928750 }, { "epoch": 9.13, "grad_norm": 0.00852329470217228, "learning_rate": 4.4030206440465915e-07, "loss": 0.0288, "step": 928775 }, { "epoch": 9.13, "grad_norm": 6.330587863922119, "learning_rate": 4.4017794195041063e-07, "loss": 0.008, "step": 928800 }, { "epoch": 9.13, "grad_norm": 0.1399105191230774, "learning_rate": 4.400538194961622e-07, "loss": 0.0334, "step": 928825 }, { "epoch": 9.13, "grad_norm": 0.3431715965270996, "learning_rate": 4.399296970419137e-07, "loss": 0.0077, "step": 928850 }, { "epoch": 9.13, "grad_norm": 0.6368245482444763, "learning_rate": 4.3980557458766527e-07, "loss": 0.0603, "step": 928875 }, { "epoch": 9.13, "grad_norm": 5.315914154052734, "learning_rate": 4.396814521334168e-07, "loss": 0.0173, "step": 928900 }, { "epoch": 9.13, "grad_norm": 0.03623177111148834, "learning_rate": 4.3955732967916833e-07, "loss": 0.0306, "step": 928925 }, { "epoch": 9.13, "grad_norm": 7.052168846130371, "learning_rate": 4.3943320722491986e-07, "loss": 0.0213, "step": 928950 }, { "epoch": 9.13, "grad_norm": 0.13197818398475647, "learning_rate": 4.393090847706714e-07, "loss": 0.0352, "step": 928975 }, { "epoch": 9.13, "grad_norm": 6.855654716491699, "learning_rate": 4.391849623164229e-07, "loss": 0.0162, "step": 929000 }, { "epoch": 9.13, "grad_norm": 0.30586642026901245, "learning_rate": 4.3906083986217444e-07, "loss": 0.0305, "step": 929025 }, { "epoch": 9.13, "grad_norm": 7.904665470123291, "learning_rate": 4.3893671740792597e-07, "loss": 0.0091, "step": 929050 }, { "epoch": 9.13, "grad_norm": 3.737417697906494, "learning_rate": 4.3881259495367755e-07, "loss": 0.0441, "step": 929075 }, { "epoch": 9.14, "grad_norm": 15.59249496459961, "learning_rate": 4.386884724994291e-07, "loss": 0.0277, "step": 929100 }, { "epoch": 9.14, "grad_norm": 0.023003624752163887, "learning_rate": 4.385643500451806e-07, "loss": 0.0394, "step": 929125 }, { "epoch": 9.14, "grad_norm": 0.04701133444905281, "learning_rate": 4.3844022759093214e-07, "loss": 0.0117, "step": 929150 }, { "epoch": 9.14, "grad_norm": 0.07369279861450195, "learning_rate": 4.3831610513668367e-07, "loss": 0.0376, "step": 929175 }, { "epoch": 9.14, "grad_norm": 8.733198165893555, "learning_rate": 4.3819198268243525e-07, "loss": 0.014, "step": 929200 }, { "epoch": 9.14, "grad_norm": 0.06878639757633209, "learning_rate": 4.3806786022818673e-07, "loss": 0.0368, "step": 929225 }, { "epoch": 9.14, "grad_norm": 9.026420593261719, "learning_rate": 4.379437377739383e-07, "loss": 0.0167, "step": 929250 }, { "epoch": 9.14, "grad_norm": 2.932797908782959, "learning_rate": 4.378196153196898e-07, "loss": 0.034, "step": 929275 }, { "epoch": 9.14, "grad_norm": 0.41805487871170044, "learning_rate": 4.3769549286544137e-07, "loss": 0.0223, "step": 929300 }, { "epoch": 9.14, "grad_norm": 0.12848715484142303, "learning_rate": 4.3757137041119295e-07, "loss": 0.021, "step": 929325 }, { "epoch": 9.14, "grad_norm": 11.599222183227539, "learning_rate": 4.3744724795694443e-07, "loss": 0.0085, "step": 929350 }, { "epoch": 9.14, "grad_norm": 0.0858023539185524, "learning_rate": 4.37323125502696e-07, "loss": 0.0526, "step": 929375 }, { "epoch": 9.14, "grad_norm": 15.718901634216309, "learning_rate": 4.371990030484475e-07, "loss": 0.0137, "step": 929400 }, { "epoch": 9.14, "grad_norm": 3.4280200004577637, "learning_rate": 4.3707488059419907e-07, "loss": 0.0515, "step": 929425 }, { "epoch": 9.14, "grad_norm": 1.2033872604370117, "learning_rate": 4.3695075813995054e-07, "loss": 0.015, "step": 929450 }, { "epoch": 9.14, "grad_norm": 0.037191104143857956, "learning_rate": 4.368266356857021e-07, "loss": 0.0361, "step": 929475 }, { "epoch": 9.14, "grad_norm": 4.774495601654053, "learning_rate": 4.367025132314537e-07, "loss": 0.016, "step": 929500 }, { "epoch": 9.14, "grad_norm": 0.04788856580853462, "learning_rate": 4.365783907772052e-07, "loss": 0.0487, "step": 929525 }, { "epoch": 9.14, "grad_norm": 12.73564624786377, "learning_rate": 4.3645426832295677e-07, "loss": 0.0303, "step": 929550 }, { "epoch": 9.14, "grad_norm": 0.013550042174756527, "learning_rate": 4.3633014586870824e-07, "loss": 0.0399, "step": 929575 }, { "epoch": 9.14, "grad_norm": 9.976283073425293, "learning_rate": 4.362060234144598e-07, "loss": 0.0105, "step": 929600 }, { "epoch": 9.14, "grad_norm": 0.2918657660484314, "learning_rate": 4.3608190096021135e-07, "loss": 0.0429, "step": 929625 }, { "epoch": 9.14, "grad_norm": 0.5388253927230835, "learning_rate": 4.359577785059629e-07, "loss": 0.013, "step": 929650 }, { "epoch": 9.14, "grad_norm": 0.025192739441990852, "learning_rate": 4.358336560517144e-07, "loss": 0.0436, "step": 929675 }, { "epoch": 9.14, "grad_norm": 5.788212299346924, "learning_rate": 4.3570953359746594e-07, "loss": 0.0185, "step": 929700 }, { "epoch": 9.14, "grad_norm": 3.1794936656951904, "learning_rate": 4.3558541114321747e-07, "loss": 0.0283, "step": 929725 }, { "epoch": 9.14, "grad_norm": 7.209086894989014, "learning_rate": 4.3546128868896905e-07, "loss": 0.0236, "step": 929750 }, { "epoch": 9.14, "grad_norm": 0.03363094478845596, "learning_rate": 4.3533716623472053e-07, "loss": 0.0367, "step": 929775 }, { "epoch": 9.14, "grad_norm": 9.334099769592285, "learning_rate": 4.352130437804721e-07, "loss": 0.0205, "step": 929800 }, { "epoch": 9.14, "grad_norm": 0.03191467747092247, "learning_rate": 4.3508892132622364e-07, "loss": 0.0322, "step": 929825 }, { "epoch": 9.14, "grad_norm": 15.05541706085205, "learning_rate": 4.3496479887197517e-07, "loss": 0.0201, "step": 929850 }, { "epoch": 9.14, "grad_norm": 9.10078239440918, "learning_rate": 4.348406764177267e-07, "loss": 0.0367, "step": 929875 }, { "epoch": 9.14, "grad_norm": 4.270903587341309, "learning_rate": 4.3471655396347823e-07, "loss": 0.0193, "step": 929900 }, { "epoch": 9.14, "grad_norm": 0.399221807718277, "learning_rate": 4.345924315092298e-07, "loss": 0.0565, "step": 929925 }, { "epoch": 9.14, "grad_norm": 4.971441745758057, "learning_rate": 4.344683090549813e-07, "loss": 0.025, "step": 929950 }, { "epoch": 9.14, "grad_norm": 2.3217601776123047, "learning_rate": 4.3434418660073287e-07, "loss": 0.05, "step": 929975 }, { "epoch": 9.14, "grad_norm": 0.5428724884986877, "learning_rate": 4.3422006414648434e-07, "loss": 0.0207, "step": 930000 }, { "epoch": 9.14, "grad_norm": 0.07660321891307831, "learning_rate": 4.340959416922359e-07, "loss": 0.0326, "step": 930025 }, { "epoch": 9.14, "grad_norm": 16.83544158935547, "learning_rate": 4.339718192379875e-07, "loss": 0.0073, "step": 930050 }, { "epoch": 9.14, "grad_norm": 0.012015985324978828, "learning_rate": 4.33847696783739e-07, "loss": 0.0454, "step": 930075 }, { "epoch": 9.14, "grad_norm": 0.21826519072055817, "learning_rate": 4.3372357432949057e-07, "loss": 0.0162, "step": 930100 }, { "epoch": 9.15, "grad_norm": 16.36217498779297, "learning_rate": 4.3359945187524204e-07, "loss": 0.0488, "step": 930125 }, { "epoch": 9.15, "grad_norm": 9.886670112609863, "learning_rate": 4.334753294209936e-07, "loss": 0.013, "step": 930150 }, { "epoch": 9.15, "grad_norm": 0.3539389371871948, "learning_rate": 4.333512069667452e-07, "loss": 0.0304, "step": 930175 }, { "epoch": 9.15, "grad_norm": 0.34718483686447144, "learning_rate": 4.332270845124967e-07, "loss": 0.0093, "step": 930200 }, { "epoch": 9.15, "grad_norm": 4.567315578460693, "learning_rate": 4.3310296205824826e-07, "loss": 0.0257, "step": 930225 }, { "epoch": 9.15, "grad_norm": 1.1936442852020264, "learning_rate": 4.3297883960399974e-07, "loss": 0.0134, "step": 930250 }, { "epoch": 9.15, "grad_norm": 0.020330337807536125, "learning_rate": 4.328547171497513e-07, "loss": 0.0446, "step": 930275 }, { "epoch": 9.15, "grad_norm": 8.631246566772461, "learning_rate": 4.327305946955028e-07, "loss": 0.0201, "step": 930300 }, { "epoch": 9.15, "grad_norm": 0.16536714136600494, "learning_rate": 4.326064722412544e-07, "loss": 0.0483, "step": 930325 }, { "epoch": 9.15, "grad_norm": 7.185934066772461, "learning_rate": 4.324823497870059e-07, "loss": 0.0121, "step": 930350 }, { "epoch": 9.15, "grad_norm": 0.2076655924320221, "learning_rate": 4.3235822733275744e-07, "loss": 0.0317, "step": 930375 }, { "epoch": 9.15, "grad_norm": 6.30726432800293, "learning_rate": 4.3223410487850897e-07, "loss": 0.0148, "step": 930400 }, { "epoch": 9.15, "grad_norm": 1.1327807903289795, "learning_rate": 4.321099824242605e-07, "loss": 0.0286, "step": 930425 }, { "epoch": 9.15, "grad_norm": 0.6334787607192993, "learning_rate": 4.31985859970012e-07, "loss": 0.0168, "step": 930450 }, { "epoch": 9.15, "grad_norm": 1.8032898902893066, "learning_rate": 4.318617375157636e-07, "loss": 0.0312, "step": 930475 }, { "epoch": 9.15, "grad_norm": 0.6465566754341125, "learning_rate": 4.3173761506151514e-07, "loss": 0.019, "step": 930500 }, { "epoch": 9.15, "grad_norm": 13.541828155517578, "learning_rate": 4.3161349260726667e-07, "loss": 0.041, "step": 930525 }, { "epoch": 9.15, "grad_norm": 9.459108352661133, "learning_rate": 4.314893701530182e-07, "loss": 0.0172, "step": 930550 }, { "epoch": 9.15, "grad_norm": 1.4466562271118164, "learning_rate": 4.313652476987697e-07, "loss": 0.0355, "step": 930575 }, { "epoch": 9.15, "grad_norm": 0.01627563126385212, "learning_rate": 4.312411252445213e-07, "loss": 0.0088, "step": 930600 }, { "epoch": 9.15, "grad_norm": 0.026665786281228065, "learning_rate": 4.311170027902728e-07, "loss": 0.0275, "step": 930625 }, { "epoch": 9.15, "grad_norm": 6.142623424530029, "learning_rate": 4.3099288033602436e-07, "loss": 0.0196, "step": 930650 }, { "epoch": 9.15, "grad_norm": 1.1701279878616333, "learning_rate": 4.3086875788177584e-07, "loss": 0.0323, "step": 930675 }, { "epoch": 9.15, "grad_norm": 3.982006788253784, "learning_rate": 4.307446354275274e-07, "loss": 0.0216, "step": 930700 }, { "epoch": 9.15, "grad_norm": 0.025048650801181793, "learning_rate": 4.306205129732789e-07, "loss": 0.0253, "step": 930725 }, { "epoch": 9.15, "grad_norm": 0.3851405084133148, "learning_rate": 4.304963905190305e-07, "loss": 0.0143, "step": 930750 }, { "epoch": 9.15, "grad_norm": 1.8200273513793945, "learning_rate": 4.3037226806478206e-07, "loss": 0.0255, "step": 930775 }, { "epoch": 9.15, "grad_norm": 17.898834228515625, "learning_rate": 4.3024814561053354e-07, "loss": 0.0138, "step": 930800 }, { "epoch": 9.15, "grad_norm": 0.00866745412349701, "learning_rate": 4.301240231562851e-07, "loss": 0.0354, "step": 930825 }, { "epoch": 9.15, "grad_norm": 0.5100966095924377, "learning_rate": 4.299999007020366e-07, "loss": 0.0134, "step": 930850 }, { "epoch": 9.15, "grad_norm": 0.21218644082546234, "learning_rate": 4.298757782477882e-07, "loss": 0.0344, "step": 930875 }, { "epoch": 9.15, "grad_norm": 3.5559141635894775, "learning_rate": 4.2975165579353976e-07, "loss": 0.0068, "step": 930900 }, { "epoch": 9.15, "grad_norm": 9.270153999328613, "learning_rate": 4.2962753333929124e-07, "loss": 0.0521, "step": 930925 }, { "epoch": 9.15, "grad_norm": 0.9046471118927002, "learning_rate": 4.295034108850428e-07, "loss": 0.0098, "step": 930950 }, { "epoch": 9.15, "grad_norm": 0.06003836542367935, "learning_rate": 4.293792884307943e-07, "loss": 0.0237, "step": 930975 }, { "epoch": 9.15, "grad_norm": 3.1891722679138184, "learning_rate": 4.292551659765459e-07, "loss": 0.0134, "step": 931000 }, { "epoch": 9.15, "grad_norm": 0.5460155010223389, "learning_rate": 4.291310435222974e-07, "loss": 0.0504, "step": 931025 }, { "epoch": 9.15, "grad_norm": 1.0185580253601074, "learning_rate": 4.2900692106804894e-07, "loss": 0.0082, "step": 931050 }, { "epoch": 9.15, "grad_norm": 0.9844990968704224, "learning_rate": 4.2888279861380046e-07, "loss": 0.0481, "step": 931075 }, { "epoch": 9.15, "grad_norm": 0.6608739495277405, "learning_rate": 4.28758676159552e-07, "loss": 0.0102, "step": 931100 }, { "epoch": 9.15, "grad_norm": 0.002877083607017994, "learning_rate": 4.286345537053035e-07, "loss": 0.025, "step": 931125 }, { "epoch": 9.16, "grad_norm": 10.050980567932129, "learning_rate": 4.2851043125105505e-07, "loss": 0.0114, "step": 931150 }, { "epoch": 9.16, "grad_norm": 0.8325684666633606, "learning_rate": 4.2838630879680663e-07, "loss": 0.0308, "step": 931175 }, { "epoch": 9.16, "grad_norm": 0.08914327621459961, "learning_rate": 4.2826218634255816e-07, "loss": 0.0185, "step": 931200 }, { "epoch": 9.16, "grad_norm": 0.05265095829963684, "learning_rate": 4.281380638883097e-07, "loss": 0.026, "step": 931225 }, { "epoch": 9.16, "grad_norm": 8.510601043701172, "learning_rate": 4.280139414340612e-07, "loss": 0.0227, "step": 931250 }, { "epoch": 9.16, "grad_norm": 0.437777042388916, "learning_rate": 4.2788981897981275e-07, "loss": 0.0261, "step": 931275 }, { "epoch": 9.16, "grad_norm": 13.486859321594238, "learning_rate": 4.277656965255643e-07, "loss": 0.0174, "step": 931300 }, { "epoch": 9.16, "grad_norm": 0.009664984419941902, "learning_rate": 4.2764157407131586e-07, "loss": 0.0432, "step": 931325 }, { "epoch": 9.16, "grad_norm": 13.815766334533691, "learning_rate": 4.2751745161706734e-07, "loss": 0.018, "step": 931350 }, { "epoch": 9.16, "grad_norm": 0.0331263393163681, "learning_rate": 4.273933291628189e-07, "loss": 0.0532, "step": 931375 }, { "epoch": 9.16, "grad_norm": 4.287411212921143, "learning_rate": 4.272692067085704e-07, "loss": 0.0115, "step": 931400 }, { "epoch": 9.16, "grad_norm": 3.642177104949951, "learning_rate": 4.27145084254322e-07, "loss": 0.0283, "step": 931425 }, { "epoch": 9.16, "grad_norm": 6.355688095092773, "learning_rate": 4.2702096180007356e-07, "loss": 0.008, "step": 931450 }, { "epoch": 9.16, "grad_norm": 4.838616847991943, "learning_rate": 4.2689683934582504e-07, "loss": 0.0314, "step": 931475 }, { "epoch": 9.16, "grad_norm": 6.951895713806152, "learning_rate": 4.267727168915766e-07, "loss": 0.0158, "step": 931500 }, { "epoch": 9.16, "grad_norm": 0.06811895221471786, "learning_rate": 4.266485944373281e-07, "loss": 0.0252, "step": 931525 }, { "epoch": 9.16, "grad_norm": 8.392945289611816, "learning_rate": 4.265244719830797e-07, "loss": 0.0212, "step": 931550 }, { "epoch": 9.16, "grad_norm": 5.081818580627441, "learning_rate": 4.2640034952883115e-07, "loss": 0.0579, "step": 931575 }, { "epoch": 9.16, "grad_norm": 5.735293388366699, "learning_rate": 4.2627622707458273e-07, "loss": 0.0145, "step": 931600 }, { "epoch": 9.16, "grad_norm": 1.4447760581970215, "learning_rate": 4.261570695185042e-07, "loss": 0.046, "step": 931625 }, { "epoch": 9.16, "grad_norm": 9.257885932922363, "learning_rate": 4.2603294706425577e-07, "loss": 0.0234, "step": 931650 }, { "epoch": 9.16, "grad_norm": 0.650765597820282, "learning_rate": 4.259088246100073e-07, "loss": 0.0486, "step": 931675 }, { "epoch": 9.16, "grad_norm": 1.0752699375152588, "learning_rate": 4.257847021557588e-07, "loss": 0.0117, "step": 931700 }, { "epoch": 9.16, "grad_norm": 7.795879364013672, "learning_rate": 4.2566057970151035e-07, "loss": 0.0539, "step": 931725 }, { "epoch": 9.16, "grad_norm": 0.08746222406625748, "learning_rate": 4.255364572472619e-07, "loss": 0.0107, "step": 931750 }, { "epoch": 9.16, "grad_norm": 0.019571710377931595, "learning_rate": 4.254123347930134e-07, "loss": 0.0482, "step": 931775 }, { "epoch": 9.16, "grad_norm": 7.107607364654541, "learning_rate": 4.25288212338765e-07, "loss": 0.0171, "step": 931800 }, { "epoch": 9.16, "grad_norm": 0.020493578165769577, "learning_rate": 4.2516408988451647e-07, "loss": 0.0229, "step": 931825 }, { "epoch": 9.16, "grad_norm": 1.093677282333374, "learning_rate": 4.2503996743026805e-07, "loss": 0.0165, "step": 931850 }, { "epoch": 9.16, "grad_norm": 0.041435495018959045, "learning_rate": 4.2491584497601953e-07, "loss": 0.0426, "step": 931875 }, { "epoch": 9.16, "grad_norm": 0.482244074344635, "learning_rate": 4.247917225217711e-07, "loss": 0.015, "step": 931900 }, { "epoch": 9.16, "grad_norm": 0.011822858825325966, "learning_rate": 4.246676000675227e-07, "loss": 0.0411, "step": 931925 }, { "epoch": 9.16, "grad_norm": 3.3191614151000977, "learning_rate": 4.2454347761327417e-07, "loss": 0.0133, "step": 931950 }, { "epoch": 9.16, "grad_norm": 4.135058879852295, "learning_rate": 4.2441935515902575e-07, "loss": 0.0385, "step": 931975 }, { "epoch": 9.16, "grad_norm": 1.2886019945144653, "learning_rate": 4.2429523270477723e-07, "loss": 0.0095, "step": 932000 }, { "epoch": 9.16, "grad_norm": 0.008662587031722069, "learning_rate": 4.241711102505288e-07, "loss": 0.0447, "step": 932025 }, { "epoch": 9.16, "grad_norm": 7.283030033111572, "learning_rate": 4.240469877962803e-07, "loss": 0.0109, "step": 932050 }, { "epoch": 9.16, "grad_norm": 0.027689792215824127, "learning_rate": 4.2392286534203187e-07, "loss": 0.0375, "step": 932075 }, { "epoch": 9.16, "grad_norm": 12.758932113647461, "learning_rate": 4.2379874288778345e-07, "loss": 0.0053, "step": 932100 }, { "epoch": 9.16, "grad_norm": 0.2233530879020691, "learning_rate": 4.236746204335349e-07, "loss": 0.036, "step": 932125 }, { "epoch": 9.17, "grad_norm": 2.357159376144409, "learning_rate": 4.235504979792865e-07, "loss": 0.0184, "step": 932150 }, { "epoch": 9.17, "grad_norm": 0.013551365584135056, "learning_rate": 4.23426375525038e-07, "loss": 0.0378, "step": 932175 }, { "epoch": 9.17, "grad_norm": 2.768364906311035, "learning_rate": 4.2330225307078957e-07, "loss": 0.0139, "step": 932200 }, { "epoch": 9.17, "grad_norm": 0.045589227229356766, "learning_rate": 4.231781306165411e-07, "loss": 0.037, "step": 932225 }, { "epoch": 9.17, "grad_norm": 5.868406295776367, "learning_rate": 4.230540081622926e-07, "loss": 0.0188, "step": 932250 }, { "epoch": 9.17, "grad_norm": 0.8474772572517395, "learning_rate": 4.2292988570804415e-07, "loss": 0.018, "step": 932275 }, { "epoch": 9.17, "grad_norm": 6.226428985595703, "learning_rate": 4.228057632537957e-07, "loss": 0.0221, "step": 932300 }, { "epoch": 9.17, "grad_norm": 0.0016840194584801793, "learning_rate": 4.226816407995472e-07, "loss": 0.0245, "step": 932325 }, { "epoch": 9.17, "grad_norm": 0.19951458275318146, "learning_rate": 4.225575183452988e-07, "loss": 0.0242, "step": 932350 }, { "epoch": 9.17, "grad_norm": 0.297151118516922, "learning_rate": 4.224333958910503e-07, "loss": 0.0442, "step": 932375 }, { "epoch": 9.17, "grad_norm": 0.1035156175494194, "learning_rate": 4.2230927343680185e-07, "loss": 0.011, "step": 932400 }, { "epoch": 9.17, "grad_norm": 3.2703030109405518, "learning_rate": 4.221851509825534e-07, "loss": 0.0348, "step": 932425 }, { "epoch": 9.17, "grad_norm": 1.3963775634765625, "learning_rate": 4.220610285283049e-07, "loss": 0.0105, "step": 932450 }, { "epoch": 9.17, "grad_norm": 4.210253715515137, "learning_rate": 4.2193690607405644e-07, "loss": 0.0375, "step": 932475 }, { "epoch": 9.17, "grad_norm": 1.7775791883468628, "learning_rate": 4.2181278361980797e-07, "loss": 0.0103, "step": 932500 }, { "epoch": 9.17, "grad_norm": 0.5908092856407166, "learning_rate": 4.2168866116555955e-07, "loss": 0.0292, "step": 932525 }, { "epoch": 9.17, "grad_norm": 0.35219043493270874, "learning_rate": 4.2156453871131103e-07, "loss": 0.0118, "step": 932550 }, { "epoch": 9.17, "grad_norm": 1.8836735486984253, "learning_rate": 4.214404162570626e-07, "loss": 0.0255, "step": 932575 }, { "epoch": 9.17, "grad_norm": 2.0716991424560547, "learning_rate": 4.213162938028141e-07, "loss": 0.0212, "step": 932600 }, { "epoch": 9.17, "grad_norm": 5.243760585784912, "learning_rate": 4.2119217134856567e-07, "loss": 0.0472, "step": 932625 }, { "epoch": 9.17, "grad_norm": 9.633824348449707, "learning_rate": 4.2106804889431725e-07, "loss": 0.0182, "step": 932650 }, { "epoch": 9.17, "grad_norm": 1.8802510499954224, "learning_rate": 4.209439264400687e-07, "loss": 0.0351, "step": 932675 }, { "epoch": 9.17, "grad_norm": 0.12697520852088928, "learning_rate": 4.208198039858203e-07, "loss": 0.0201, "step": 932700 }, { "epoch": 9.17, "grad_norm": 0.33103957772254944, "learning_rate": 4.206956815315718e-07, "loss": 0.0257, "step": 932725 }, { "epoch": 9.17, "grad_norm": 10.42032241821289, "learning_rate": 4.2057155907732337e-07, "loss": 0.0206, "step": 932750 }, { "epoch": 9.17, "grad_norm": 2.3913514614105225, "learning_rate": 4.2044743662307495e-07, "loss": 0.0469, "step": 932775 }, { "epoch": 9.17, "grad_norm": 18.367542266845703, "learning_rate": 4.203233141688264e-07, "loss": 0.0222, "step": 932800 }, { "epoch": 9.17, "grad_norm": 0.04522284120321274, "learning_rate": 4.20199191714578e-07, "loss": 0.0309, "step": 932825 }, { "epoch": 9.17, "grad_norm": 0.5220342874526978, "learning_rate": 4.200750692603295e-07, "loss": 0.0145, "step": 932850 }, { "epoch": 9.17, "grad_norm": 0.07993384450674057, "learning_rate": 4.1995094680608106e-07, "loss": 0.0323, "step": 932875 }, { "epoch": 9.17, "grad_norm": 5.489573001861572, "learning_rate": 4.198268243518326e-07, "loss": 0.0132, "step": 932900 }, { "epoch": 9.17, "grad_norm": 0.7633044123649597, "learning_rate": 4.197027018975841e-07, "loss": 0.0241, "step": 932925 }, { "epoch": 9.17, "grad_norm": 1.9614428281784058, "learning_rate": 4.1957857944333565e-07, "loss": 0.0181, "step": 932950 }, { "epoch": 9.17, "grad_norm": 0.3444480001926422, "learning_rate": 4.194544569890872e-07, "loss": 0.0516, "step": 932975 }, { "epoch": 9.17, "grad_norm": 8.199882507324219, "learning_rate": 4.193303345348387e-07, "loss": 0.0165, "step": 933000 }, { "epoch": 9.17, "grad_norm": 0.9835983514785767, "learning_rate": 4.1920621208059024e-07, "loss": 0.0359, "step": 933025 }, { "epoch": 9.17, "grad_norm": 2.566556692123413, "learning_rate": 4.190820896263418e-07, "loss": 0.0129, "step": 933050 }, { "epoch": 9.17, "grad_norm": 0.06475381553173065, "learning_rate": 4.1895796717209335e-07, "loss": 0.038, "step": 933075 }, { "epoch": 9.17, "grad_norm": 3.9887938499450684, "learning_rate": 4.188338447178449e-07, "loss": 0.0126, "step": 933100 }, { "epoch": 9.17, "grad_norm": 0.009691814891994, "learning_rate": 4.187097222635964e-07, "loss": 0.0348, "step": 933125 }, { "epoch": 9.17, "grad_norm": 0.9073042273521423, "learning_rate": 4.1858559980934794e-07, "loss": 0.0159, "step": 933150 }, { "epoch": 9.18, "grad_norm": 7.4632415771484375, "learning_rate": 4.1846147735509947e-07, "loss": 0.0295, "step": 933175 }, { "epoch": 9.18, "grad_norm": 14.78523063659668, "learning_rate": 4.1833735490085105e-07, "loss": 0.0174, "step": 933200 }, { "epoch": 9.18, "grad_norm": 0.031735677272081375, "learning_rate": 4.182132324466025e-07, "loss": 0.0407, "step": 933225 }, { "epoch": 9.18, "grad_norm": 0.9674609899520874, "learning_rate": 4.180891099923541e-07, "loss": 0.02, "step": 933250 }, { "epoch": 9.18, "grad_norm": 0.07081567496061325, "learning_rate": 4.179649875381056e-07, "loss": 0.0435, "step": 933275 }, { "epoch": 9.18, "grad_norm": 20.590787887573242, "learning_rate": 4.1784086508385716e-07, "loss": 0.0119, "step": 933300 }, { "epoch": 9.18, "grad_norm": 0.06791716068983078, "learning_rate": 4.1771674262960875e-07, "loss": 0.024, "step": 933325 }, { "epoch": 9.18, "grad_norm": 4.295447826385498, "learning_rate": 4.175926201753602e-07, "loss": 0.0163, "step": 933350 }, { "epoch": 9.18, "grad_norm": 0.0153951495885849, "learning_rate": 4.174684977211118e-07, "loss": 0.0273, "step": 933375 }, { "epoch": 9.18, "grad_norm": 1.5615228414535522, "learning_rate": 4.173443752668633e-07, "loss": 0.0152, "step": 933400 }, { "epoch": 9.18, "grad_norm": 4.4278950691223145, "learning_rate": 4.1722025281261486e-07, "loss": 0.0275, "step": 933425 }, { "epoch": 9.18, "grad_norm": 3.0714592933654785, "learning_rate": 4.1709613035836634e-07, "loss": 0.0078, "step": 933450 }, { "epoch": 9.18, "grad_norm": 0.6743959188461304, "learning_rate": 4.169720079041179e-07, "loss": 0.0385, "step": 933475 }, { "epoch": 9.18, "grad_norm": 0.5226961374282837, "learning_rate": 4.168478854498695e-07, "loss": 0.0079, "step": 933500 }, { "epoch": 9.18, "grad_norm": 0.7921640872955322, "learning_rate": 4.16723762995621e-07, "loss": 0.0239, "step": 933525 }, { "epoch": 9.18, "grad_norm": 0.20308789610862732, "learning_rate": 4.1659964054137256e-07, "loss": 0.0077, "step": 933550 }, { "epoch": 9.18, "grad_norm": 0.01440398022532463, "learning_rate": 4.1647551808712404e-07, "loss": 0.0529, "step": 933575 }, { "epoch": 9.18, "grad_norm": 5.760373592376709, "learning_rate": 4.163513956328756e-07, "loss": 0.0093, "step": 933600 }, { "epoch": 9.18, "grad_norm": 0.0776086151599884, "learning_rate": 4.1622727317862715e-07, "loss": 0.0399, "step": 933625 }, { "epoch": 9.18, "grad_norm": 1.2398287057876587, "learning_rate": 4.161031507243787e-07, "loss": 0.0105, "step": 933650 }, { "epoch": 9.18, "grad_norm": 0.04209814965724945, "learning_rate": 4.159790282701302e-07, "loss": 0.0457, "step": 933675 }, { "epoch": 9.18, "grad_norm": 8.873477935791016, "learning_rate": 4.1585490581588174e-07, "loss": 0.0158, "step": 933700 }, { "epoch": 9.18, "grad_norm": 0.9864941239356995, "learning_rate": 4.157307833616333e-07, "loss": 0.0398, "step": 933725 }, { "epoch": 9.18, "grad_norm": 16.637632369995117, "learning_rate": 4.1560666090738485e-07, "loss": 0.0119, "step": 933750 }, { "epoch": 9.18, "grad_norm": 1.194525957107544, "learning_rate": 4.154825384531364e-07, "loss": 0.0277, "step": 933775 }, { "epoch": 9.18, "grad_norm": 11.897810935974121, "learning_rate": 4.153584159988879e-07, "loss": 0.0136, "step": 933800 }, { "epoch": 9.18, "grad_norm": 1.3045283555984497, "learning_rate": 4.1523429354463943e-07, "loss": 0.0219, "step": 933825 }, { "epoch": 9.18, "grad_norm": 6.109053611755371, "learning_rate": 4.1511017109039096e-07, "loss": 0.0171, "step": 933850 }, { "epoch": 9.18, "grad_norm": 0.6798023581504822, "learning_rate": 4.149860486361425e-07, "loss": 0.0461, "step": 933875 }, { "epoch": 9.18, "grad_norm": 11.060731887817383, "learning_rate": 4.14861926181894e-07, "loss": 0.0161, "step": 933900 }, { "epoch": 9.18, "grad_norm": 2.038543701171875, "learning_rate": 4.147378037276456e-07, "loss": 0.029, "step": 933925 }, { "epoch": 9.18, "grad_norm": 4.097234725952148, "learning_rate": 4.146136812733971e-07, "loss": 0.0175, "step": 933950 }, { "epoch": 9.18, "grad_norm": 0.07106848061084747, "learning_rate": 4.1448955881914866e-07, "loss": 0.0282, "step": 933975 }, { "epoch": 9.18, "grad_norm": 7.908378601074219, "learning_rate": 4.1436543636490014e-07, "loss": 0.0071, "step": 934000 }, { "epoch": 9.18, "grad_norm": 3.35577654838562, "learning_rate": 4.142413139106517e-07, "loss": 0.0419, "step": 934025 }, { "epoch": 9.18, "grad_norm": 6.710821628570557, "learning_rate": 4.141171914564033e-07, "loss": 0.018, "step": 934050 }, { "epoch": 9.18, "grad_norm": 0.4427385628223419, "learning_rate": 4.139930690021548e-07, "loss": 0.0578, "step": 934075 }, { "epoch": 9.18, "grad_norm": 1.3492954969406128, "learning_rate": 4.1386894654790636e-07, "loss": 0.0088, "step": 934100 }, { "epoch": 9.18, "grad_norm": 9.417133331298828, "learning_rate": 4.1374482409365784e-07, "loss": 0.0455, "step": 934125 }, { "epoch": 9.18, "grad_norm": 10.405049324035645, "learning_rate": 4.136207016394094e-07, "loss": 0.0155, "step": 934150 }, { "epoch": 9.18, "grad_norm": 0.028526630252599716, "learning_rate": 4.13496579185161e-07, "loss": 0.0305, "step": 934175 }, { "epoch": 9.19, "grad_norm": 8.253486633300781, "learning_rate": 4.133724567309125e-07, "loss": 0.0224, "step": 934200 }, { "epoch": 9.19, "grad_norm": 0.021110394969582558, "learning_rate": 4.1324833427666406e-07, "loss": 0.03, "step": 934225 }, { "epoch": 9.19, "grad_norm": 0.4069306254386902, "learning_rate": 4.1312421182241553e-07, "loss": 0.0111, "step": 934250 }, { "epoch": 9.19, "grad_norm": 0.018549585714936256, "learning_rate": 4.130000893681671e-07, "loss": 0.0254, "step": 934275 }, { "epoch": 9.19, "grad_norm": 1.332083821296692, "learning_rate": 4.128759669139186e-07, "loss": 0.0135, "step": 934300 }, { "epoch": 9.19, "grad_norm": 4.583826065063477, "learning_rate": 4.127518444596702e-07, "loss": 0.0384, "step": 934325 }, { "epoch": 9.19, "grad_norm": 1.0586365461349487, "learning_rate": 4.126277220054217e-07, "loss": 0.0107, "step": 934350 }, { "epoch": 9.19, "grad_norm": 4.897874355316162, "learning_rate": 4.1250856444934315e-07, "loss": 0.0421, "step": 934375 }, { "epoch": 9.19, "grad_norm": 3.6703689098358154, "learning_rate": 4.1238444199509474e-07, "loss": 0.0117, "step": 934400 }, { "epoch": 9.19, "grad_norm": 0.008224520832300186, "learning_rate": 4.122603195408462e-07, "loss": 0.0379, "step": 934425 }, { "epoch": 9.19, "grad_norm": 5.743021488189697, "learning_rate": 4.121361970865978e-07, "loss": 0.0304, "step": 934450 }, { "epoch": 9.19, "grad_norm": 0.0616564080119133, "learning_rate": 4.1201207463234927e-07, "loss": 0.0219, "step": 934475 }, { "epoch": 9.19, "grad_norm": 8.370595932006836, "learning_rate": 4.1188795217810085e-07, "loss": 0.0223, "step": 934500 }, { "epoch": 9.19, "grad_norm": 6.684732437133789, "learning_rate": 4.1176382972385244e-07, "loss": 0.0301, "step": 934525 }, { "epoch": 9.19, "grad_norm": 0.27956458926200867, "learning_rate": 4.116397072696039e-07, "loss": 0.006, "step": 934550 }, { "epoch": 9.19, "grad_norm": 1.0193421840667725, "learning_rate": 4.115155848153555e-07, "loss": 0.0391, "step": 934575 }, { "epoch": 9.19, "grad_norm": 3.725259304046631, "learning_rate": 4.1139146236110697e-07, "loss": 0.007, "step": 934600 }, { "epoch": 9.19, "grad_norm": 0.06521647423505783, "learning_rate": 4.1126733990685855e-07, "loss": 0.0334, "step": 934625 }, { "epoch": 9.19, "grad_norm": 2.8848326206207275, "learning_rate": 4.1114321745261013e-07, "loss": 0.0215, "step": 934650 }, { "epoch": 9.19, "grad_norm": 3.7865700721740723, "learning_rate": 4.110190949983616e-07, "loss": 0.0406, "step": 934675 }, { "epoch": 9.19, "grad_norm": 7.346988201141357, "learning_rate": 4.108949725441132e-07, "loss": 0.0076, "step": 934700 }, { "epoch": 9.19, "grad_norm": 0.036282412707805634, "learning_rate": 4.1077085008986467e-07, "loss": 0.0246, "step": 934725 }, { "epoch": 9.19, "grad_norm": 5.379904747009277, "learning_rate": 4.1064672763561625e-07, "loss": 0.0234, "step": 934750 }, { "epoch": 9.19, "grad_norm": 4.11991548538208, "learning_rate": 4.105226051813677e-07, "loss": 0.036, "step": 934775 }, { "epoch": 9.19, "grad_norm": 8.50827693939209, "learning_rate": 4.103984827271193e-07, "loss": 0.018, "step": 934800 }, { "epoch": 9.19, "grad_norm": 0.07024259120225906, "learning_rate": 4.1027436027287084e-07, "loss": 0.0253, "step": 934825 }, { "epoch": 9.19, "grad_norm": 0.4877603054046631, "learning_rate": 4.1015023781862237e-07, "loss": 0.0096, "step": 934850 }, { "epoch": 9.19, "grad_norm": 0.013299341313540936, "learning_rate": 4.1002611536437395e-07, "loss": 0.0383, "step": 934875 }, { "epoch": 9.19, "grad_norm": 8.876437187194824, "learning_rate": 4.099019929101254e-07, "loss": 0.0158, "step": 934900 }, { "epoch": 9.19, "grad_norm": 3.9880785942077637, "learning_rate": 4.09777870455877e-07, "loss": 0.0518, "step": 934925 }, { "epoch": 9.19, "grad_norm": 10.713579177856445, "learning_rate": 4.0965374800162854e-07, "loss": 0.0164, "step": 934950 }, { "epoch": 9.19, "grad_norm": 7.466065406799316, "learning_rate": 4.0952962554738006e-07, "loss": 0.0404, "step": 934975 }, { "epoch": 9.19, "grad_norm": 0.1592218279838562, "learning_rate": 4.094055030931316e-07, "loss": 0.0036, "step": 935000 }, { "epoch": 9.19, "grad_norm": 0.08996561169624329, "learning_rate": 4.092813806388831e-07, "loss": 0.0314, "step": 935025 }, { "epoch": 9.19, "grad_norm": 1.0030078887939453, "learning_rate": 4.0915725818463465e-07, "loss": 0.0162, "step": 935050 }, { "epoch": 9.19, "grad_norm": 0.04078821837902069, "learning_rate": 4.0903313573038623e-07, "loss": 0.0267, "step": 935075 }, { "epoch": 9.19, "grad_norm": 7.300611972808838, "learning_rate": 4.089090132761377e-07, "loss": 0.014, "step": 935100 }, { "epoch": 9.19, "grad_norm": 1.1656070947647095, "learning_rate": 4.087848908218893e-07, "loss": 0.0443, "step": 935125 }, { "epoch": 9.19, "grad_norm": 7.724968433380127, "learning_rate": 4.0866076836764077e-07, "loss": 0.0143, "step": 935150 }, { "epoch": 9.19, "grad_norm": 0.9464001059532166, "learning_rate": 4.0853664591339235e-07, "loss": 0.0355, "step": 935175 }, { "epoch": 9.2, "grad_norm": 10.22348403930664, "learning_rate": 4.0841252345914383e-07, "loss": 0.0163, "step": 935200 }, { "epoch": 9.2, "grad_norm": 0.289948970079422, "learning_rate": 4.082884010048954e-07, "loss": 0.0395, "step": 935225 }, { "epoch": 9.2, "grad_norm": 1.612672209739685, "learning_rate": 4.08164278550647e-07, "loss": 0.0143, "step": 935250 }, { "epoch": 9.2, "grad_norm": 0.055526237934827805, "learning_rate": 4.0804015609639847e-07, "loss": 0.043, "step": 935275 }, { "epoch": 9.2, "grad_norm": 11.936261177062988, "learning_rate": 4.0791603364215005e-07, "loss": 0.015, "step": 935300 }, { "epoch": 9.2, "grad_norm": 0.7254924774169922, "learning_rate": 4.077919111879015e-07, "loss": 0.0388, "step": 935325 }, { "epoch": 9.2, "grad_norm": 14.348587036132812, "learning_rate": 4.076677887336531e-07, "loss": 0.0083, "step": 935350 }, { "epoch": 9.2, "grad_norm": 0.04614774137735367, "learning_rate": 4.075436662794047e-07, "loss": 0.0317, "step": 935375 }, { "epoch": 9.2, "grad_norm": 7.850377559661865, "learning_rate": 4.0741954382515617e-07, "loss": 0.0106, "step": 935400 }, { "epoch": 9.2, "grad_norm": 0.008498811163008213, "learning_rate": 4.0729542137090775e-07, "loss": 0.0465, "step": 935425 }, { "epoch": 9.2, "grad_norm": 0.24550341069698334, "learning_rate": 4.071712989166592e-07, "loss": 0.0106, "step": 935450 }, { "epoch": 9.2, "grad_norm": 1.7657972574234009, "learning_rate": 4.070471764624108e-07, "loss": 0.0178, "step": 935475 }, { "epoch": 9.2, "grad_norm": 0.42192521691322327, "learning_rate": 4.0692305400816233e-07, "loss": 0.0172, "step": 935500 }, { "epoch": 9.2, "grad_norm": 0.10670038312673569, "learning_rate": 4.0679893155391386e-07, "loss": 0.0327, "step": 935525 }, { "epoch": 9.2, "grad_norm": 10.520353317260742, "learning_rate": 4.066748090996654e-07, "loss": 0.0127, "step": 935550 }, { "epoch": 9.2, "grad_norm": 3.471024513244629, "learning_rate": 4.065506866454169e-07, "loss": 0.0269, "step": 935575 }, { "epoch": 9.2, "grad_norm": 0.9735283255577087, "learning_rate": 4.064265641911685e-07, "loss": 0.0144, "step": 935600 }, { "epoch": 9.2, "grad_norm": 3.434826612472534, "learning_rate": 4.0630244173692e-07, "loss": 0.0468, "step": 935625 }, { "epoch": 9.2, "grad_norm": 7.995263576507568, "learning_rate": 4.0617831928267156e-07, "loss": 0.0148, "step": 935650 }, { "epoch": 9.2, "grad_norm": 0.08693218231201172, "learning_rate": 4.060541968284231e-07, "loss": 0.0378, "step": 935675 }, { "epoch": 9.2, "grad_norm": 4.800447940826416, "learning_rate": 4.059300743741746e-07, "loss": 0.0141, "step": 935700 }, { "epoch": 9.2, "grad_norm": 0.14005336165428162, "learning_rate": 4.0580595191992615e-07, "loss": 0.0479, "step": 935725 }, { "epoch": 9.2, "grad_norm": 0.48480767011642456, "learning_rate": 4.056818294656777e-07, "loss": 0.0056, "step": 935750 }, { "epoch": 9.2, "grad_norm": 0.032267212867736816, "learning_rate": 4.055577070114292e-07, "loss": 0.032, "step": 935775 }, { "epoch": 9.2, "grad_norm": 9.01850414276123, "learning_rate": 4.054335845571808e-07, "loss": 0.0095, "step": 935800 }, { "epoch": 9.2, "grad_norm": 1.4986789226531982, "learning_rate": 4.0530946210293227e-07, "loss": 0.0314, "step": 935825 }, { "epoch": 9.2, "grad_norm": 0.40646055340766907, "learning_rate": 4.0518533964868385e-07, "loss": 0.0329, "step": 935850 }, { "epoch": 9.2, "grad_norm": 0.01576850563287735, "learning_rate": 4.050612171944353e-07, "loss": 0.0352, "step": 935875 }, { "epoch": 9.2, "grad_norm": 2.5037002563476562, "learning_rate": 4.049370947401869e-07, "loss": 0.0134, "step": 935900 }, { "epoch": 9.2, "grad_norm": 0.11076081544160843, "learning_rate": 4.048129722859385e-07, "loss": 0.0342, "step": 935925 }, { "epoch": 9.2, "grad_norm": 4.58524227142334, "learning_rate": 4.0468884983168996e-07, "loss": 0.0227, "step": 935950 }, { "epoch": 9.2, "grad_norm": 0.7222344279289246, "learning_rate": 4.0456472737744155e-07, "loss": 0.0386, "step": 935975 }, { "epoch": 9.2, "grad_norm": 8.703987121582031, "learning_rate": 4.04440604923193e-07, "loss": 0.0089, "step": 936000 }, { "epoch": 9.2, "grad_norm": 0.08939466625452042, "learning_rate": 4.043164824689446e-07, "loss": 0.0242, "step": 936025 }, { "epoch": 9.2, "grad_norm": 0.07937262207269669, "learning_rate": 4.041923600146961e-07, "loss": 0.0168, "step": 936050 }, { "epoch": 9.2, "grad_norm": 5.463051795959473, "learning_rate": 4.0406823756044766e-07, "loss": 0.0517, "step": 936075 }, { "epoch": 9.2, "grad_norm": 0.023067567497491837, "learning_rate": 4.0394411510619924e-07, "loss": 0.0187, "step": 936100 }, { "epoch": 9.2, "grad_norm": 1.2135332822799683, "learning_rate": 4.038199926519507e-07, "loss": 0.0598, "step": 936125 }, { "epoch": 9.2, "grad_norm": 2.7624974250793457, "learning_rate": 4.036958701977023e-07, "loss": 0.0286, "step": 936150 }, { "epoch": 9.2, "grad_norm": 9.112709999084473, "learning_rate": 4.035717477434538e-07, "loss": 0.0318, "step": 936175 }, { "epoch": 9.2, "grad_norm": 9.72628402709961, "learning_rate": 4.0344762528920536e-07, "loss": 0.0248, "step": 936200 }, { "epoch": 9.21, "grad_norm": 1.0615525245666504, "learning_rate": 4.033235028349569e-07, "loss": 0.0332, "step": 936225 }, { "epoch": 9.21, "grad_norm": 13.518892288208008, "learning_rate": 4.031993803807084e-07, "loss": 0.0138, "step": 936250 }, { "epoch": 9.21, "grad_norm": 0.19932281970977783, "learning_rate": 4.0307525792646e-07, "loss": 0.0446, "step": 936275 }, { "epoch": 9.21, "grad_norm": 0.2823771834373474, "learning_rate": 4.029511354722115e-07, "loss": 0.021, "step": 936300 }, { "epoch": 9.21, "grad_norm": 0.1378673017024994, "learning_rate": 4.0282701301796306e-07, "loss": 0.059, "step": 936325 }, { "epoch": 9.21, "grad_norm": 1.0786553621292114, "learning_rate": 4.027028905637146e-07, "loss": 0.0147, "step": 936350 }, { "epoch": 9.21, "grad_norm": 2.0232479572296143, "learning_rate": 4.025787681094661e-07, "loss": 0.0296, "step": 936375 }, { "epoch": 9.21, "grad_norm": 10.097530364990234, "learning_rate": 4.0245464565521765e-07, "loss": 0.0183, "step": 936400 }, { "epoch": 9.21, "grad_norm": 0.8616231679916382, "learning_rate": 4.023305232009692e-07, "loss": 0.0249, "step": 936425 }, { "epoch": 9.21, "grad_norm": 10.643003463745117, "learning_rate": 4.022064007467207e-07, "loss": 0.0169, "step": 936450 }, { "epoch": 9.21, "grad_norm": 0.15068243443965912, "learning_rate": 4.020822782924723e-07, "loss": 0.0386, "step": 936475 }, { "epoch": 9.21, "grad_norm": 0.18360573053359985, "learning_rate": 4.0195815583822376e-07, "loss": 0.0141, "step": 936500 }, { "epoch": 9.21, "grad_norm": 0.218949556350708, "learning_rate": 4.0183403338397535e-07, "loss": 0.0362, "step": 936525 }, { "epoch": 9.21, "grad_norm": 2.0131397247314453, "learning_rate": 4.017099109297268e-07, "loss": 0.013, "step": 936550 }, { "epoch": 9.21, "grad_norm": 2.407529830932617, "learning_rate": 4.015857884754784e-07, "loss": 0.0279, "step": 936575 }, { "epoch": 9.21, "grad_norm": 8.48028564453125, "learning_rate": 4.0146166602122993e-07, "loss": 0.0173, "step": 936600 }, { "epoch": 9.21, "grad_norm": 0.22893692553043365, "learning_rate": 4.0133754356698146e-07, "loss": 0.035, "step": 936625 }, { "epoch": 9.21, "grad_norm": 13.76251220703125, "learning_rate": 4.0121342111273304e-07, "loss": 0.0148, "step": 936650 }, { "epoch": 9.21, "grad_norm": 0.00431878212839365, "learning_rate": 4.010892986584845e-07, "loss": 0.0359, "step": 936675 }, { "epoch": 9.21, "grad_norm": 3.278956174850464, "learning_rate": 4.009651762042361e-07, "loss": 0.0176, "step": 936700 }, { "epoch": 9.21, "grad_norm": 0.2815895974636078, "learning_rate": 4.008410537499876e-07, "loss": 0.0252, "step": 936725 }, { "epoch": 9.21, "grad_norm": 12.934112548828125, "learning_rate": 4.0071693129573916e-07, "loss": 0.0166, "step": 936750 }, { "epoch": 9.21, "grad_norm": 1.1544028520584106, "learning_rate": 4.005977737396606e-07, "loss": 0.0467, "step": 936775 }, { "epoch": 9.21, "grad_norm": 7.408784866333008, "learning_rate": 4.004736512854122e-07, "loss": 0.0124, "step": 936800 }, { "epoch": 9.21, "grad_norm": 7.581596851348877, "learning_rate": 4.003495288311637e-07, "loss": 0.0343, "step": 936825 }, { "epoch": 9.21, "grad_norm": 0.2532840669155121, "learning_rate": 4.0022540637691525e-07, "loss": 0.0136, "step": 936850 }, { "epoch": 9.21, "grad_norm": 0.020845822989940643, "learning_rate": 4.001012839226668e-07, "loss": 0.0498, "step": 936875 }, { "epoch": 9.21, "grad_norm": 5.810604095458984, "learning_rate": 3.999771614684183e-07, "loss": 0.031, "step": 936900 }, { "epoch": 9.21, "grad_norm": 0.11537390202283859, "learning_rate": 3.9985303901416984e-07, "loss": 0.0334, "step": 936925 }, { "epoch": 9.21, "grad_norm": 5.3234405517578125, "learning_rate": 3.997289165599214e-07, "loss": 0.012, "step": 936950 }, { "epoch": 9.21, "grad_norm": 4.691598892211914, "learning_rate": 3.996047941056729e-07, "loss": 0.019, "step": 936975 }, { "epoch": 9.21, "grad_norm": 4.728482246398926, "learning_rate": 3.994806716514245e-07, "loss": 0.0268, "step": 937000 }, { "epoch": 9.21, "grad_norm": 2.844770669937134, "learning_rate": 3.9935654919717595e-07, "loss": 0.0381, "step": 937025 }, { "epoch": 9.21, "grad_norm": 2.8764379024505615, "learning_rate": 3.9923242674292754e-07, "loss": 0.0237, "step": 937050 }, { "epoch": 9.21, "grad_norm": 0.25108155608177185, "learning_rate": 3.9910830428867907e-07, "loss": 0.0261, "step": 937075 }, { "epoch": 9.21, "grad_norm": 1.4407047033309937, "learning_rate": 3.989841818344306e-07, "loss": 0.0318, "step": 937100 }, { "epoch": 9.21, "grad_norm": 0.03124774619936943, "learning_rate": 3.988600593801822e-07, "loss": 0.0419, "step": 937125 }, { "epoch": 9.21, "grad_norm": 0.3027004599571228, "learning_rate": 3.9873593692593365e-07, "loss": 0.0238, "step": 937150 }, { "epoch": 9.21, "grad_norm": 1.2364634275436401, "learning_rate": 3.9861181447168524e-07, "loss": 0.0445, "step": 937175 }, { "epoch": 9.21, "grad_norm": 1.4910306930541992, "learning_rate": 3.984876920174367e-07, "loss": 0.0152, "step": 937200 }, { "epoch": 9.21, "grad_norm": 1.4818239212036133, "learning_rate": 3.983635695631883e-07, "loss": 0.0416, "step": 937225 }, { "epoch": 9.22, "grad_norm": 13.396728515625, "learning_rate": 3.982394471089399e-07, "loss": 0.0177, "step": 937250 }, { "epoch": 9.22, "grad_norm": 5.385490417480469, "learning_rate": 3.9811532465469135e-07, "loss": 0.0444, "step": 937275 }, { "epoch": 9.22, "grad_norm": 2.116266965866089, "learning_rate": 3.9799120220044293e-07, "loss": 0.0191, "step": 937300 }, { "epoch": 9.22, "grad_norm": 4.2065606117248535, "learning_rate": 3.978670797461944e-07, "loss": 0.034, "step": 937325 }, { "epoch": 9.22, "grad_norm": 6.962299346923828, "learning_rate": 3.97742957291946e-07, "loss": 0.0072, "step": 937350 }, { "epoch": 9.22, "grad_norm": 0.03746217489242554, "learning_rate": 3.976188348376975e-07, "loss": 0.0387, "step": 937375 }, { "epoch": 9.22, "grad_norm": 13.577754020690918, "learning_rate": 3.9749471238344905e-07, "loss": 0.0113, "step": 937400 }, { "epoch": 9.22, "grad_norm": 0.01836758479475975, "learning_rate": 3.9737058992920063e-07, "loss": 0.0186, "step": 937425 }, { "epoch": 9.22, "grad_norm": 2.7620270252227783, "learning_rate": 3.972464674749521e-07, "loss": 0.0143, "step": 937450 }, { "epoch": 9.22, "grad_norm": 0.07937891036272049, "learning_rate": 3.971223450207037e-07, "loss": 0.0306, "step": 937475 }, { "epoch": 9.22, "grad_norm": 0.3865666687488556, "learning_rate": 3.9699822256645517e-07, "loss": 0.0113, "step": 937500 }, { "epoch": 9.22, "grad_norm": 4.146552562713623, "learning_rate": 3.9687410011220675e-07, "loss": 0.0199, "step": 937525 }, { "epoch": 9.22, "grad_norm": 0.19093233346939087, "learning_rate": 3.967499776579583e-07, "loss": 0.0126, "step": 937550 }, { "epoch": 9.22, "grad_norm": 4.549788475036621, "learning_rate": 3.966258552037098e-07, "loss": 0.0319, "step": 937575 }, { "epoch": 9.22, "grad_norm": 9.78670597076416, "learning_rate": 3.9650173274946134e-07, "loss": 0.0104, "step": 937600 }, { "epoch": 9.22, "grad_norm": 0.020316706970334053, "learning_rate": 3.9637761029521286e-07, "loss": 0.0349, "step": 937625 }, { "epoch": 9.22, "grad_norm": 1.034724235534668, "learning_rate": 3.962534878409644e-07, "loss": 0.0135, "step": 937650 }, { "epoch": 9.22, "grad_norm": 0.38371047377586365, "learning_rate": 3.96129365386716e-07, "loss": 0.0315, "step": 937675 }, { "epoch": 9.22, "grad_norm": 1.5527032613754272, "learning_rate": 3.9600524293246745e-07, "loss": 0.016, "step": 937700 }, { "epoch": 9.22, "grad_norm": 0.14682376384735107, "learning_rate": 3.9588112047821903e-07, "loss": 0.0363, "step": 937725 }, { "epoch": 9.22, "grad_norm": 4.853964805603027, "learning_rate": 3.9575699802397056e-07, "loss": 0.0158, "step": 937750 }, { "epoch": 9.22, "grad_norm": 3.7381231784820557, "learning_rate": 3.956328755697221e-07, "loss": 0.0356, "step": 937775 }, { "epoch": 9.22, "grad_norm": 0.2553432285785675, "learning_rate": 3.955087531154737e-07, "loss": 0.0081, "step": 937800 }, { "epoch": 9.22, "grad_norm": 0.09007363021373749, "learning_rate": 3.9538463066122515e-07, "loss": 0.0323, "step": 937825 }, { "epoch": 9.22, "grad_norm": 2.2489070892333984, "learning_rate": 3.9526050820697673e-07, "loss": 0.016, "step": 937850 }, { "epoch": 9.22, "grad_norm": 0.022986453026533127, "learning_rate": 3.951363857527282e-07, "loss": 0.0398, "step": 937875 }, { "epoch": 9.22, "grad_norm": 0.9868889451026917, "learning_rate": 3.950122632984798e-07, "loss": 0.0126, "step": 937900 }, { "epoch": 9.22, "grad_norm": 0.007940806448459625, "learning_rate": 3.9488814084423127e-07, "loss": 0.0488, "step": 937925 }, { "epoch": 9.22, "grad_norm": 6.785215377807617, "learning_rate": 3.9476401838998285e-07, "loss": 0.0108, "step": 937950 }, { "epoch": 9.22, "grad_norm": 0.16568300127983093, "learning_rate": 3.9463989593573443e-07, "loss": 0.0322, "step": 937975 }, { "epoch": 9.22, "grad_norm": 8.676249504089355, "learning_rate": 3.945157734814859e-07, "loss": 0.0097, "step": 938000 }, { "epoch": 9.22, "grad_norm": 0.10986069589853287, "learning_rate": 3.943916510272375e-07, "loss": 0.028, "step": 938025 }, { "epoch": 9.22, "grad_norm": 13.548742294311523, "learning_rate": 3.9426752857298897e-07, "loss": 0.0143, "step": 938050 }, { "epoch": 9.22, "grad_norm": 0.07276448607444763, "learning_rate": 3.9414340611874055e-07, "loss": 0.0304, "step": 938075 }, { "epoch": 9.22, "grad_norm": 1.6305463314056396, "learning_rate": 3.940192836644921e-07, "loss": 0.0092, "step": 938100 }, { "epoch": 9.22, "grad_norm": 0.01726536452770233, "learning_rate": 3.938951612102436e-07, "loss": 0.0489, "step": 938125 }, { "epoch": 9.22, "grad_norm": 0.3369027376174927, "learning_rate": 3.937710387559952e-07, "loss": 0.0173, "step": 938150 }, { "epoch": 9.22, "grad_norm": 0.042609814554452896, "learning_rate": 3.9364691630174666e-07, "loss": 0.0405, "step": 938175 }, { "epoch": 9.22, "grad_norm": 0.023214953020215034, "learning_rate": 3.9352279384749825e-07, "loss": 0.0194, "step": 938200 }, { "epoch": 9.22, "grad_norm": 0.4141584634780884, "learning_rate": 3.933986713932498e-07, "loss": 0.0326, "step": 938225 }, { "epoch": 9.23, "grad_norm": 6.365622043609619, "learning_rate": 3.932745489390013e-07, "loss": 0.0109, "step": 938250 }, { "epoch": 9.23, "grad_norm": 1.1297625303268433, "learning_rate": 3.9315042648475283e-07, "loss": 0.0531, "step": 938275 }, { "epoch": 9.23, "grad_norm": 11.424023628234863, "learning_rate": 3.9302630403050436e-07, "loss": 0.0084, "step": 938300 }, { "epoch": 9.23, "grad_norm": 0.3363328278064728, "learning_rate": 3.929021815762559e-07, "loss": 0.0403, "step": 938325 }, { "epoch": 9.23, "grad_norm": 8.496368408203125, "learning_rate": 3.927780591220074e-07, "loss": 0.0241, "step": 938350 }, { "epoch": 9.23, "grad_norm": 4.4276580810546875, "learning_rate": 3.9265393666775895e-07, "loss": 0.0439, "step": 938375 }, { "epoch": 9.23, "grad_norm": 3.960361957550049, "learning_rate": 3.9252981421351053e-07, "loss": 0.0041, "step": 938400 }, { "epoch": 9.23, "grad_norm": 1.0360090732574463, "learning_rate": 3.92405691759262e-07, "loss": 0.0322, "step": 938425 }, { "epoch": 9.23, "grad_norm": 4.5454206466674805, "learning_rate": 3.922815693050136e-07, "loss": 0.0096, "step": 938450 }, { "epoch": 9.23, "grad_norm": 2.0332000255584717, "learning_rate": 3.921574468507651e-07, "loss": 0.031, "step": 938475 }, { "epoch": 9.23, "grad_norm": 0.2012377679347992, "learning_rate": 3.9203332439651665e-07, "loss": 0.0127, "step": 938500 }, { "epoch": 9.23, "grad_norm": 0.006857732310891151, "learning_rate": 3.9190920194226823e-07, "loss": 0.0245, "step": 938525 }, { "epoch": 9.23, "grad_norm": 6.012472629547119, "learning_rate": 3.917850794880197e-07, "loss": 0.0221, "step": 938550 }, { "epoch": 9.23, "grad_norm": 0.19374540448188782, "learning_rate": 3.916609570337713e-07, "loss": 0.0256, "step": 938575 }, { "epoch": 9.23, "grad_norm": 13.545767784118652, "learning_rate": 3.9153683457952276e-07, "loss": 0.0152, "step": 938600 }, { "epoch": 9.23, "grad_norm": 0.0690220296382904, "learning_rate": 3.9141271212527435e-07, "loss": 0.0559, "step": 938625 }, { "epoch": 9.23, "grad_norm": 6.371376991271973, "learning_rate": 3.9128858967102593e-07, "loss": 0.0148, "step": 938650 }, { "epoch": 9.23, "grad_norm": 0.3610171973705292, "learning_rate": 3.911644672167774e-07, "loss": 0.0466, "step": 938675 }, { "epoch": 9.23, "grad_norm": 9.52075481414795, "learning_rate": 3.91040344762529e-07, "loss": 0.0132, "step": 938700 }, { "epoch": 9.23, "grad_norm": 0.003907458391040564, "learning_rate": 3.9091622230828046e-07, "loss": 0.0225, "step": 938725 }, { "epoch": 9.23, "grad_norm": 1.4249719381332397, "learning_rate": 3.9079209985403204e-07, "loss": 0.0035, "step": 938750 }, { "epoch": 9.23, "grad_norm": 5.79374361038208, "learning_rate": 3.906679773997835e-07, "loss": 0.0413, "step": 938775 }, { "epoch": 9.23, "grad_norm": 0.28498804569244385, "learning_rate": 3.905438549455351e-07, "loss": 0.0093, "step": 938800 }, { "epoch": 9.23, "grad_norm": 0.040102701634168625, "learning_rate": 3.904197324912867e-07, "loss": 0.0315, "step": 938825 }, { "epoch": 9.23, "grad_norm": 14.808379173278809, "learning_rate": 3.9029561003703816e-07, "loss": 0.0195, "step": 938850 }, { "epoch": 9.23, "grad_norm": 3.3057949542999268, "learning_rate": 3.9017148758278974e-07, "loss": 0.0212, "step": 938875 }, { "epoch": 9.23, "grad_norm": 5.238733291625977, "learning_rate": 3.900473651285412e-07, "loss": 0.0098, "step": 938900 }, { "epoch": 9.23, "grad_norm": 0.03181331977248192, "learning_rate": 3.899232426742928e-07, "loss": 0.0407, "step": 938925 }, { "epoch": 9.23, "grad_norm": 3.924198865890503, "learning_rate": 3.8979912022004433e-07, "loss": 0.0205, "step": 938950 }, { "epoch": 9.23, "grad_norm": 0.03598535433411598, "learning_rate": 3.8967499776579586e-07, "loss": 0.0348, "step": 938975 }, { "epoch": 9.23, "grad_norm": 1.9897643327713013, "learning_rate": 3.895508753115474e-07, "loss": 0.0278, "step": 939000 }, { "epoch": 9.23, "grad_norm": 0.11852272599935532, "learning_rate": 3.894267528572989e-07, "loss": 0.0164, "step": 939025 }, { "epoch": 9.23, "grad_norm": 0.7229263782501221, "learning_rate": 3.8930263040305045e-07, "loss": 0.0213, "step": 939050 }, { "epoch": 9.23, "grad_norm": 4.465103626251221, "learning_rate": 3.8917850794880203e-07, "loss": 0.0481, "step": 939075 }, { "epoch": 9.23, "grad_norm": 0.7588120102882385, "learning_rate": 3.890543854945535e-07, "loss": 0.01, "step": 939100 }, { "epoch": 9.23, "grad_norm": 0.04938371852040291, "learning_rate": 3.889302630403051e-07, "loss": 0.0281, "step": 939125 }, { "epoch": 9.23, "grad_norm": 3.0165328979492188, "learning_rate": 3.888061405860566e-07, "loss": 0.026, "step": 939150 }, { "epoch": 9.23, "grad_norm": 0.22086748480796814, "learning_rate": 3.8868201813180815e-07, "loss": 0.0343, "step": 939175 }, { "epoch": 9.23, "grad_norm": 5.390534400939941, "learning_rate": 3.885578956775597e-07, "loss": 0.0105, "step": 939200 }, { "epoch": 9.23, "grad_norm": 0.05544812232255936, "learning_rate": 3.884337732233112e-07, "loss": 0.0217, "step": 939225 }, { "epoch": 9.23, "grad_norm": 7.1165056228637695, "learning_rate": 3.883096507690628e-07, "loss": 0.0129, "step": 939250 }, { "epoch": 9.24, "grad_norm": 1.941404938697815, "learning_rate": 3.8818552831481426e-07, "loss": 0.0315, "step": 939275 }, { "epoch": 9.24, "grad_norm": 2.263162136077881, "learning_rate": 3.8806140586056584e-07, "loss": 0.0159, "step": 939300 }, { "epoch": 9.24, "grad_norm": 0.06687265634536743, "learning_rate": 3.879372834063173e-07, "loss": 0.0298, "step": 939325 }, { "epoch": 9.24, "grad_norm": 3.8121180534362793, "learning_rate": 3.878131609520689e-07, "loss": 0.0065, "step": 939350 }, { "epoch": 9.24, "grad_norm": 0.02078518643975258, "learning_rate": 3.876890384978205e-07, "loss": 0.0429, "step": 939375 }, { "epoch": 9.24, "grad_norm": 0.24714693427085876, "learning_rate": 3.8756491604357196e-07, "loss": 0.02, "step": 939400 }, { "epoch": 9.24, "grad_norm": 0.09720838814973831, "learning_rate": 3.8744079358932354e-07, "loss": 0.0197, "step": 939425 }, { "epoch": 9.24, "grad_norm": 5.353551387786865, "learning_rate": 3.87316671135075e-07, "loss": 0.0136, "step": 939450 }, { "epoch": 9.24, "grad_norm": 0.5874468088150024, "learning_rate": 3.871925486808266e-07, "loss": 0.0282, "step": 939475 }, { "epoch": 9.24, "grad_norm": 0.10073191672563553, "learning_rate": 3.870684262265782e-07, "loss": 0.0226, "step": 939500 }, { "epoch": 9.24, "grad_norm": 0.05483347177505493, "learning_rate": 3.8694430377232966e-07, "loss": 0.027, "step": 939525 }, { "epoch": 9.24, "grad_norm": 4.603133678436279, "learning_rate": 3.8682018131808124e-07, "loss": 0.005, "step": 939550 }, { "epoch": 9.24, "grad_norm": 0.09247763454914093, "learning_rate": 3.866960588638327e-07, "loss": 0.0369, "step": 939575 }, { "epoch": 9.24, "grad_norm": 2.028473377227783, "learning_rate": 3.865719364095843e-07, "loss": 0.0261, "step": 939600 }, { "epoch": 9.24, "grad_norm": 1.2780663967132568, "learning_rate": 3.864478139553358e-07, "loss": 0.0421, "step": 939625 }, { "epoch": 9.24, "grad_norm": 6.847756862640381, "learning_rate": 3.8632369150108736e-07, "loss": 0.0166, "step": 939650 }, { "epoch": 9.24, "grad_norm": 0.1845228224992752, "learning_rate": 3.861995690468389e-07, "loss": 0.0334, "step": 939675 }, { "epoch": 9.24, "grad_norm": 0.18459929525852203, "learning_rate": 3.860754465925904e-07, "loss": 0.0167, "step": 939700 }, { "epoch": 9.24, "grad_norm": 0.007350764703005552, "learning_rate": 3.8595132413834194e-07, "loss": 0.0294, "step": 939725 }, { "epoch": 9.24, "grad_norm": 10.808289527893066, "learning_rate": 3.8582720168409347e-07, "loss": 0.0192, "step": 939750 }, { "epoch": 9.24, "grad_norm": 0.052860233932733536, "learning_rate": 3.85708044128015e-07, "loss": 0.0334, "step": 939775 }, { "epoch": 9.24, "grad_norm": 0.6046005487442017, "learning_rate": 3.8558392167376645e-07, "loss": 0.0095, "step": 939800 }, { "epoch": 9.24, "grad_norm": 5.87028694152832, "learning_rate": 3.8545979921951804e-07, "loss": 0.0657, "step": 939825 }, { "epoch": 9.24, "grad_norm": 0.47282785177230835, "learning_rate": 3.853356767652696e-07, "loss": 0.0045, "step": 939850 }, { "epoch": 9.24, "grad_norm": 1.143527865409851, "learning_rate": 3.852115543110211e-07, "loss": 0.0268, "step": 939875 }, { "epoch": 9.24, "grad_norm": 2.490950107574463, "learning_rate": 3.850874318567727e-07, "loss": 0.0196, "step": 939900 }, { "epoch": 9.24, "grad_norm": 9.870123863220215, "learning_rate": 3.8496330940252415e-07, "loss": 0.0382, "step": 939925 }, { "epoch": 9.24, "grad_norm": 0.12909288704395294, "learning_rate": 3.8483918694827573e-07, "loss": 0.018, "step": 939950 }, { "epoch": 9.24, "grad_norm": 0.013590006157755852, "learning_rate": 3.847150644940273e-07, "loss": 0.04, "step": 939975 }, { "epoch": 9.24, "grad_norm": 9.546037673950195, "learning_rate": 3.845909420397788e-07, "loss": 0.0166, "step": 940000 }, { "epoch": 9.24, "eval_loss": 0.9053454995155334, "eval_runtime": 6090.7659, "eval_samples_per_second": 1.554, "eval_steps_per_second": 0.194, "eval_wer": 0.11029743127534926, "step": 940000 }, { "epoch": 9.24, "grad_norm": 0.23257246613502502, "learning_rate": 3.844668195855304e-07, "loss": 0.0408, "step": 940025 }, { "epoch": 9.24, "grad_norm": 7.84941291809082, "learning_rate": 3.8434269713128185e-07, "loss": 0.0196, "step": 940050 }, { "epoch": 9.24, "grad_norm": 0.0845567062497139, "learning_rate": 3.8421857467703343e-07, "loss": 0.042, "step": 940075 }, { "epoch": 9.24, "grad_norm": 22.063692092895508, "learning_rate": 3.840944522227849e-07, "loss": 0.0111, "step": 940100 }, { "epoch": 9.24, "grad_norm": 0.2782747447490692, "learning_rate": 3.839703297685365e-07, "loss": 0.02, "step": 940125 }, { "epoch": 9.24, "grad_norm": 0.23276305198669434, "learning_rate": 3.83846207314288e-07, "loss": 0.0125, "step": 940150 }, { "epoch": 9.24, "grad_norm": 1.1419270038604736, "learning_rate": 3.8372208486003955e-07, "loss": 0.0431, "step": 940175 }, { "epoch": 9.24, "grad_norm": 0.09512748569250107, "learning_rate": 3.835979624057911e-07, "loss": 0.021, "step": 940200 }, { "epoch": 9.24, "grad_norm": 9.796502113342285, "learning_rate": 3.834738399515426e-07, "loss": 0.0398, "step": 940225 }, { "epoch": 9.24, "grad_norm": 0.2867334187030792, "learning_rate": 3.8334971749729414e-07, "loss": 0.0062, "step": 940250 }, { "epoch": 9.24, "grad_norm": 3.0330381393432617, "learning_rate": 3.832255950430457e-07, "loss": 0.0447, "step": 940275 }, { "epoch": 9.25, "grad_norm": 5.707541465759277, "learning_rate": 3.8310147258879725e-07, "loss": 0.007, "step": 940300 }, { "epoch": 9.25, "grad_norm": 1.4993810653686523, "learning_rate": 3.829773501345488e-07, "loss": 0.0461, "step": 940325 }, { "epoch": 9.25, "grad_norm": 1.5866332054138184, "learning_rate": 3.828532276803003e-07, "loss": 0.0215, "step": 940350 }, { "epoch": 9.25, "grad_norm": 0.16497257351875305, "learning_rate": 3.8272910522605183e-07, "loss": 0.0376, "step": 940375 }, { "epoch": 9.25, "grad_norm": 0.3939051032066345, "learning_rate": 3.826049827718034e-07, "loss": 0.0105, "step": 940400 }, { "epoch": 9.25, "grad_norm": 6.522799015045166, "learning_rate": 3.824808603175549e-07, "loss": 0.047, "step": 940425 }, { "epoch": 9.25, "grad_norm": 9.95474624633789, "learning_rate": 3.823567378633065e-07, "loss": 0.0244, "step": 940450 }, { "epoch": 9.25, "grad_norm": 0.5347980260848999, "learning_rate": 3.8223261540905795e-07, "loss": 0.039, "step": 940475 }, { "epoch": 9.25, "grad_norm": 4.963041305541992, "learning_rate": 3.8210849295480953e-07, "loss": 0.0112, "step": 940500 }, { "epoch": 9.25, "grad_norm": 0.08524113148450851, "learning_rate": 3.81984370500561e-07, "loss": 0.033, "step": 940525 }, { "epoch": 9.25, "grad_norm": 0.3194262385368347, "learning_rate": 3.818602480463126e-07, "loss": 0.0146, "step": 940550 }, { "epoch": 9.25, "grad_norm": 0.028128735721111298, "learning_rate": 3.8173612559206417e-07, "loss": 0.0248, "step": 940575 }, { "epoch": 9.25, "grad_norm": 0.44790300726890564, "learning_rate": 3.8161200313781565e-07, "loss": 0.0135, "step": 940600 }, { "epoch": 9.25, "grad_norm": 1.3234779834747314, "learning_rate": 3.8148788068356723e-07, "loss": 0.043, "step": 940625 }, { "epoch": 9.25, "grad_norm": 6.99839448928833, "learning_rate": 3.813637582293187e-07, "loss": 0.0304, "step": 940650 }, { "epoch": 9.25, "grad_norm": 8.516242027282715, "learning_rate": 3.812396357750703e-07, "loss": 0.0337, "step": 940675 }, { "epoch": 9.25, "grad_norm": 2.943643569946289, "learning_rate": 3.8111551332082187e-07, "loss": 0.0166, "step": 940700 }, { "epoch": 9.25, "grad_norm": 4.826661109924316, "learning_rate": 3.8099139086657335e-07, "loss": 0.0359, "step": 940725 }, { "epoch": 9.25, "grad_norm": 1.1322203874588013, "learning_rate": 3.8086726841232493e-07, "loss": 0.0214, "step": 940750 }, { "epoch": 9.25, "grad_norm": 0.014205658808350563, "learning_rate": 3.807431459580764e-07, "loss": 0.0285, "step": 940775 }, { "epoch": 9.25, "grad_norm": 0.8933281302452087, "learning_rate": 3.80619023503828e-07, "loss": 0.0093, "step": 940800 }, { "epoch": 9.25, "grad_norm": 0.015838230028748512, "learning_rate": 3.804949010495795e-07, "loss": 0.0478, "step": 940825 }, { "epoch": 9.25, "grad_norm": 6.710143566131592, "learning_rate": 3.8037077859533105e-07, "loss": 0.0091, "step": 940850 }, { "epoch": 9.25, "grad_norm": 0.720591127872467, "learning_rate": 3.802466561410826e-07, "loss": 0.027, "step": 940875 }, { "epoch": 9.25, "grad_norm": 12.259318351745605, "learning_rate": 3.801225336868341e-07, "loss": 0.0113, "step": 940900 }, { "epoch": 9.25, "grad_norm": 0.025802385061979294, "learning_rate": 3.7999841123258563e-07, "loss": 0.0461, "step": 940925 }, { "epoch": 9.25, "grad_norm": 0.7254810333251953, "learning_rate": 3.798742887783372e-07, "loss": 0.0083, "step": 940950 }, { "epoch": 9.25, "grad_norm": 0.031674835830926895, "learning_rate": 3.797501663240887e-07, "loss": 0.0401, "step": 940975 }, { "epoch": 9.25, "grad_norm": 10.774298667907715, "learning_rate": 3.7962604386984027e-07, "loss": 0.0064, "step": 941000 }, { "epoch": 9.25, "grad_norm": 8.846391677856445, "learning_rate": 3.795019214155918e-07, "loss": 0.0443, "step": 941025 }, { "epoch": 9.25, "grad_norm": 14.168177604675293, "learning_rate": 3.7937779896134333e-07, "loss": 0.0296, "step": 941050 }, { "epoch": 9.25, "grad_norm": 0.009594221599400043, "learning_rate": 3.7925367650709486e-07, "loss": 0.0387, "step": 941075 }, { "epoch": 9.25, "grad_norm": 2.513930320739746, "learning_rate": 3.791295540528464e-07, "loss": 0.0128, "step": 941100 }, { "epoch": 9.25, "grad_norm": 7.325976371765137, "learning_rate": 3.7900543159859797e-07, "loss": 0.0416, "step": 941125 }, { "epoch": 9.25, "grad_norm": 0.2470855414867401, "learning_rate": 3.7888130914434945e-07, "loss": 0.0189, "step": 941150 }, { "epoch": 9.25, "grad_norm": 0.012597648426890373, "learning_rate": 3.7875718669010103e-07, "loss": 0.0396, "step": 941175 }, { "epoch": 9.25, "grad_norm": 4.2969651222229, "learning_rate": 3.786330642358525e-07, "loss": 0.0136, "step": 941200 }, { "epoch": 9.25, "grad_norm": 0.22106723487377167, "learning_rate": 3.785089417816041e-07, "loss": 0.0362, "step": 941225 }, { "epoch": 9.25, "grad_norm": 6.203573226928711, "learning_rate": 3.7838481932735567e-07, "loss": 0.0154, "step": 941250 }, { "epoch": 9.25, "grad_norm": 4.239302158355713, "learning_rate": 3.7826069687310715e-07, "loss": 0.0435, "step": 941275 }, { "epoch": 9.26, "grad_norm": 12.139524459838867, "learning_rate": 3.7813657441885873e-07, "loss": 0.014, "step": 941300 }, { "epoch": 9.26, "grad_norm": 0.15759842097759247, "learning_rate": 3.780124519646102e-07, "loss": 0.0291, "step": 941325 }, { "epoch": 9.26, "grad_norm": 0.2869443893432617, "learning_rate": 3.778883295103618e-07, "loss": 0.0117, "step": 941350 }, { "epoch": 9.26, "grad_norm": 1.2308954000473022, "learning_rate": 3.7776420705611337e-07, "loss": 0.0298, "step": 941375 }, { "epoch": 9.26, "grad_norm": 8.494911193847656, "learning_rate": 3.7764008460186484e-07, "loss": 0.0232, "step": 941400 }, { "epoch": 9.26, "grad_norm": 0.2833919823169708, "learning_rate": 3.7751596214761643e-07, "loss": 0.0278, "step": 941425 }, { "epoch": 9.26, "grad_norm": 3.376152515411377, "learning_rate": 3.773918396933679e-07, "loss": 0.0103, "step": 941450 }, { "epoch": 9.26, "grad_norm": 1.106372356414795, "learning_rate": 3.772677172391195e-07, "loss": 0.0362, "step": 941475 }, { "epoch": 9.26, "grad_norm": 9.526920318603516, "learning_rate": 3.7714359478487096e-07, "loss": 0.0165, "step": 941500 }, { "epoch": 9.26, "grad_norm": 1.4238077402114868, "learning_rate": 3.7701947233062254e-07, "loss": 0.0365, "step": 941525 }, { "epoch": 9.26, "grad_norm": 11.408486366271973, "learning_rate": 3.7689534987637407e-07, "loss": 0.0077, "step": 941550 }, { "epoch": 9.26, "grad_norm": 4.015286922454834, "learning_rate": 3.767712274221256e-07, "loss": 0.0409, "step": 941575 }, { "epoch": 9.26, "grad_norm": 9.166516304016113, "learning_rate": 3.7664710496787713e-07, "loss": 0.0181, "step": 941600 }, { "epoch": 9.26, "grad_norm": 0.03315161168575287, "learning_rate": 3.7652298251362866e-07, "loss": 0.0336, "step": 941625 }, { "epoch": 9.26, "grad_norm": 12.629378318786621, "learning_rate": 3.763988600593802e-07, "loss": 0.0255, "step": 941650 }, { "epoch": 9.26, "grad_norm": 0.11230644583702087, "learning_rate": 3.7627473760513177e-07, "loss": 0.0276, "step": 941675 }, { "epoch": 9.26, "grad_norm": 2.391404390335083, "learning_rate": 3.761506151508833e-07, "loss": 0.018, "step": 941700 }, { "epoch": 9.26, "grad_norm": 0.003597278380766511, "learning_rate": 3.7602649269663483e-07, "loss": 0.0394, "step": 941725 }, { "epoch": 9.26, "grad_norm": 10.980165481567383, "learning_rate": 3.7590237024238636e-07, "loss": 0.0205, "step": 941750 }, { "epoch": 9.26, "grad_norm": 0.04796263948082924, "learning_rate": 3.757782477881379e-07, "loss": 0.0326, "step": 941775 }, { "epoch": 9.26, "grad_norm": 5.959763526916504, "learning_rate": 3.7565412533388947e-07, "loss": 0.0203, "step": 941800 }, { "epoch": 9.26, "grad_norm": 0.007733928505331278, "learning_rate": 3.7553000287964095e-07, "loss": 0.0374, "step": 941825 }, { "epoch": 9.26, "grad_norm": 6.690493106842041, "learning_rate": 3.7540588042539253e-07, "loss": 0.0165, "step": 941850 }, { "epoch": 9.26, "grad_norm": 0.2190883308649063, "learning_rate": 3.75281757971144e-07, "loss": 0.0308, "step": 941875 }, { "epoch": 9.26, "grad_norm": 5.6403374671936035, "learning_rate": 3.751576355168956e-07, "loss": 0.0059, "step": 941900 }, { "epoch": 9.26, "grad_norm": 6.165993690490723, "learning_rate": 3.7503351306264706e-07, "loss": 0.0405, "step": 941925 }, { "epoch": 9.26, "grad_norm": 0.1521294116973877, "learning_rate": 3.7490939060839864e-07, "loss": 0.0198, "step": 941950 }, { "epoch": 9.26, "grad_norm": 0.44177529215812683, "learning_rate": 3.747852681541502e-07, "loss": 0.0378, "step": 941975 }, { "epoch": 9.26, "grad_norm": 1.9014009237289429, "learning_rate": 3.746611456999017e-07, "loss": 0.0149, "step": 942000 }, { "epoch": 9.26, "grad_norm": 0.023463351652026176, "learning_rate": 3.745370232456533e-07, "loss": 0.0407, "step": 942025 }, { "epoch": 9.26, "grad_norm": 10.630576133728027, "learning_rate": 3.7441290079140476e-07, "loss": 0.0286, "step": 942050 }, { "epoch": 9.26, "grad_norm": 0.049086615443229675, "learning_rate": 3.7428877833715634e-07, "loss": 0.0231, "step": 942075 }, { "epoch": 9.26, "grad_norm": 0.10785023123025894, "learning_rate": 3.741646558829079e-07, "loss": 0.0182, "step": 942100 }, { "epoch": 9.26, "grad_norm": 0.6340417861938477, "learning_rate": 3.740405334286594e-07, "loss": 0.0193, "step": 942125 }, { "epoch": 9.26, "grad_norm": 1.9571912288665771, "learning_rate": 3.73916410974411e-07, "loss": 0.0079, "step": 942150 }, { "epoch": 9.26, "grad_norm": 1.9535112380981445, "learning_rate": 3.7379228852016246e-07, "loss": 0.0313, "step": 942175 }, { "epoch": 9.26, "grad_norm": 0.44616925716400146, "learning_rate": 3.7366816606591404e-07, "loss": 0.0193, "step": 942200 }, { "epoch": 9.26, "grad_norm": 0.09464041888713837, "learning_rate": 3.7354404361166557e-07, "loss": 0.0445, "step": 942225 }, { "epoch": 9.26, "grad_norm": 10.880223274230957, "learning_rate": 3.734199211574171e-07, "loss": 0.0076, "step": 942250 }, { "epoch": 9.26, "grad_norm": 1.1763296127319336, "learning_rate": 3.7329579870316863e-07, "loss": 0.0211, "step": 942275 }, { "epoch": 9.26, "grad_norm": 2.8943631649017334, "learning_rate": 3.7317167624892016e-07, "loss": 0.0069, "step": 942300 }, { "epoch": 9.27, "grad_norm": 0.04790792614221573, "learning_rate": 3.730475537946717e-07, "loss": 0.0297, "step": 942325 }, { "epoch": 9.27, "grad_norm": 16.620576858520508, "learning_rate": 3.729234313404232e-07, "loss": 0.0251, "step": 942350 }, { "epoch": 9.27, "grad_norm": 1.6109442710876465, "learning_rate": 3.727993088861748e-07, "loss": 0.0417, "step": 942375 }, { "epoch": 9.27, "grad_norm": 1.1001869440078735, "learning_rate": 3.726751864319263e-07, "loss": 0.0177, "step": 942400 }, { "epoch": 9.27, "grad_norm": 1.0698705911636353, "learning_rate": 3.7255106397767786e-07, "loss": 0.0355, "step": 942425 }, { "epoch": 9.27, "grad_norm": 11.391128540039062, "learning_rate": 3.724269415234294e-07, "loss": 0.0133, "step": 942450 }, { "epoch": 9.27, "grad_norm": 0.0020771771669387817, "learning_rate": 3.723028190691809e-07, "loss": 0.028, "step": 942475 }, { "epoch": 9.27, "grad_norm": 2.7158117294311523, "learning_rate": 3.7217869661493244e-07, "loss": 0.0088, "step": 942500 }, { "epoch": 9.27, "grad_norm": 0.2932700216770172, "learning_rate": 3.72054574160684e-07, "loss": 0.0344, "step": 942525 }, { "epoch": 9.27, "grad_norm": 9.173439025878906, "learning_rate": 3.719304517064355e-07, "loss": 0.0098, "step": 942550 }, { "epoch": 9.27, "grad_norm": 1.0119507312774658, "learning_rate": 3.718063292521871e-07, "loss": 0.0275, "step": 942575 }, { "epoch": 9.27, "grad_norm": 9.409342765808105, "learning_rate": 3.7168220679793856e-07, "loss": 0.013, "step": 942600 }, { "epoch": 9.27, "grad_norm": 0.009202280081808567, "learning_rate": 3.7155808434369014e-07, "loss": 0.0414, "step": 942625 }, { "epoch": 9.27, "grad_norm": 8.420291900634766, "learning_rate": 3.714339618894417e-07, "loss": 0.0202, "step": 942650 }, { "epoch": 9.27, "grad_norm": 0.006403371691703796, "learning_rate": 3.713098394351932e-07, "loss": 0.0391, "step": 942675 }, { "epoch": 9.27, "grad_norm": 0.40861180424690247, "learning_rate": 3.711857169809448e-07, "loss": 0.0088, "step": 942700 }, { "epoch": 9.27, "grad_norm": 0.041001297533512115, "learning_rate": 3.7106159452669626e-07, "loss": 0.0247, "step": 942725 }, { "epoch": 9.27, "grad_norm": 18.477046966552734, "learning_rate": 3.7093747207244784e-07, "loss": 0.0185, "step": 942750 }, { "epoch": 9.27, "grad_norm": 4.063650131225586, "learning_rate": 3.708133496181993e-07, "loss": 0.0353, "step": 942775 }, { "epoch": 9.27, "grad_norm": 0.4707919657230377, "learning_rate": 3.706892271639509e-07, "loss": 0.014, "step": 942800 }, { "epoch": 9.27, "grad_norm": 3.2930028438568115, "learning_rate": 3.705651047097025e-07, "loss": 0.0226, "step": 942825 }, { "epoch": 9.27, "grad_norm": 9.627608299255371, "learning_rate": 3.7044098225545396e-07, "loss": 0.0118, "step": 942850 }, { "epoch": 9.27, "grad_norm": 0.03344019502401352, "learning_rate": 3.7032182469937546e-07, "loss": 0.0544, "step": 942875 }, { "epoch": 9.27, "grad_norm": 8.402201652526855, "learning_rate": 3.70197702245127e-07, "loss": 0.0187, "step": 942900 }, { "epoch": 9.27, "grad_norm": 0.0407283678650856, "learning_rate": 3.700735797908785e-07, "loss": 0.0456, "step": 942925 }, { "epoch": 9.27, "grad_norm": 0.6825025081634521, "learning_rate": 3.6994945733663005e-07, "loss": 0.0107, "step": 942950 }, { "epoch": 9.27, "grad_norm": 0.08051614463329315, "learning_rate": 3.698253348823816e-07, "loss": 0.0402, "step": 942975 }, { "epoch": 9.27, "grad_norm": 4.988670349121094, "learning_rate": 3.6970121242813316e-07, "loss": 0.0158, "step": 943000 }, { "epoch": 9.27, "grad_norm": 1.3605715036392212, "learning_rate": 3.6957708997388463e-07, "loss": 0.0507, "step": 943025 }, { "epoch": 9.27, "grad_norm": 7.4938859939575195, "learning_rate": 3.694529675196362e-07, "loss": 0.0106, "step": 943050 }, { "epoch": 9.27, "grad_norm": 3.6280171871185303, "learning_rate": 3.693288450653877e-07, "loss": 0.0403, "step": 943075 }, { "epoch": 9.27, "grad_norm": 2.5493574142456055, "learning_rate": 3.692047226111393e-07, "loss": 0.0187, "step": 943100 }, { "epoch": 9.27, "grad_norm": 0.7943382859230042, "learning_rate": 3.6908060015689086e-07, "loss": 0.0404, "step": 943125 }, { "epoch": 9.27, "grad_norm": 10.43459415435791, "learning_rate": 3.6895647770264233e-07, "loss": 0.0093, "step": 943150 }, { "epoch": 9.27, "grad_norm": 0.009633355773985386, "learning_rate": 3.688323552483939e-07, "loss": 0.0265, "step": 943175 }, { "epoch": 9.27, "grad_norm": 1.9736136198043823, "learning_rate": 3.687082327941454e-07, "loss": 0.0254, "step": 943200 }, { "epoch": 9.27, "grad_norm": 1.649260401725769, "learning_rate": 3.6858411033989697e-07, "loss": 0.0269, "step": 943225 }, { "epoch": 9.27, "grad_norm": 1.3541816473007202, "learning_rate": 3.6845998788564845e-07, "loss": 0.0175, "step": 943250 }, { "epoch": 9.27, "grad_norm": 0.6752196550369263, "learning_rate": 3.6833586543140003e-07, "loss": 0.0388, "step": 943275 }, { "epoch": 9.27, "grad_norm": 3.508336305618286, "learning_rate": 3.682117429771516e-07, "loss": 0.0102, "step": 943300 }, { "epoch": 9.27, "grad_norm": 0.09730945527553558, "learning_rate": 3.680876205229031e-07, "loss": 0.0376, "step": 943325 }, { "epoch": 9.28, "grad_norm": 4.318304538726807, "learning_rate": 3.6796349806865467e-07, "loss": 0.0133, "step": 943350 }, { "epoch": 9.28, "grad_norm": 1.0818688869476318, "learning_rate": 3.6783937561440615e-07, "loss": 0.0347, "step": 943375 }, { "epoch": 9.28, "grad_norm": 0.18591055274009705, "learning_rate": 3.6771525316015773e-07, "loss": 0.0198, "step": 943400 }, { "epoch": 9.28, "grad_norm": 0.20134267210960388, "learning_rate": 3.6759113070590926e-07, "loss": 0.0465, "step": 943425 }, { "epoch": 9.28, "grad_norm": 1.038012146949768, "learning_rate": 3.674670082516608e-07, "loss": 0.0193, "step": 943450 }, { "epoch": 9.28, "grad_norm": 0.020251499488949776, "learning_rate": 3.673428857974123e-07, "loss": 0.0341, "step": 943475 }, { "epoch": 9.28, "grad_norm": 1.443695068359375, "learning_rate": 3.6721876334316385e-07, "loss": 0.0161, "step": 943500 }, { "epoch": 9.28, "grad_norm": 0.10916931927204132, "learning_rate": 3.6709464088891543e-07, "loss": 0.0253, "step": 943525 }, { "epoch": 9.28, "grad_norm": 0.7174234986305237, "learning_rate": 3.6697051843466696e-07, "loss": 0.0093, "step": 943550 }, { "epoch": 9.28, "grad_norm": 0.05419168621301651, "learning_rate": 3.668463959804185e-07, "loss": 0.0401, "step": 943575 }, { "epoch": 9.28, "grad_norm": 9.340703964233398, "learning_rate": 3.6672227352617e-07, "loss": 0.0092, "step": 943600 }, { "epoch": 9.28, "grad_norm": 0.11812394857406616, "learning_rate": 3.6659815107192154e-07, "loss": 0.0357, "step": 943625 }, { "epoch": 9.28, "grad_norm": 1.1071475744247437, "learning_rate": 3.6647402861767307e-07, "loss": 0.018, "step": 943650 }, { "epoch": 9.28, "grad_norm": 0.02674187906086445, "learning_rate": 3.663499061634246e-07, "loss": 0.0473, "step": 943675 }, { "epoch": 9.28, "grad_norm": 3.29719877243042, "learning_rate": 3.6622578370917613e-07, "loss": 0.0115, "step": 943700 }, { "epoch": 9.28, "grad_norm": 0.03559121862053871, "learning_rate": 3.661016612549277e-07, "loss": 0.0341, "step": 943725 }, { "epoch": 9.28, "grad_norm": 11.872779846191406, "learning_rate": 3.659775388006792e-07, "loss": 0.0182, "step": 943750 }, { "epoch": 9.28, "grad_norm": 0.6696932911872864, "learning_rate": 3.6585341634643077e-07, "loss": 0.0467, "step": 943775 }, { "epoch": 9.28, "grad_norm": 1.5125401020050049, "learning_rate": 3.6572929389218225e-07, "loss": 0.0063, "step": 943800 }, { "epoch": 9.28, "grad_norm": 0.3146505057811737, "learning_rate": 3.6560517143793383e-07, "loss": 0.0397, "step": 943825 }, { "epoch": 9.28, "grad_norm": 0.4275728464126587, "learning_rate": 3.654810489836854e-07, "loss": 0.0194, "step": 943850 }, { "epoch": 9.28, "grad_norm": 0.19533933699131012, "learning_rate": 3.653569265294369e-07, "loss": 0.0279, "step": 943875 }, { "epoch": 9.28, "grad_norm": 0.8438477516174316, "learning_rate": 3.6523280407518847e-07, "loss": 0.0165, "step": 943900 }, { "epoch": 9.28, "grad_norm": 1.231403112411499, "learning_rate": 3.6510868162093995e-07, "loss": 0.0377, "step": 943925 }, { "epoch": 9.28, "grad_norm": 20.135366439819336, "learning_rate": 3.6498455916669153e-07, "loss": 0.0211, "step": 943950 }, { "epoch": 9.28, "grad_norm": 1.833397388458252, "learning_rate": 3.648604367124431e-07, "loss": 0.0468, "step": 943975 }, { "epoch": 9.28, "grad_norm": 0.10475671291351318, "learning_rate": 3.647363142581946e-07, "loss": 0.0138, "step": 944000 }, { "epoch": 9.28, "grad_norm": 4.790850639343262, "learning_rate": 3.6461219180394617e-07, "loss": 0.0275, "step": 944025 }, { "epoch": 9.28, "grad_norm": 8.79763126373291, "learning_rate": 3.6448806934969764e-07, "loss": 0.0168, "step": 944050 }, { "epoch": 9.28, "grad_norm": 0.018798530101776123, "learning_rate": 3.6436394689544923e-07, "loss": 0.0425, "step": 944075 }, { "epoch": 9.28, "grad_norm": 4.262472629547119, "learning_rate": 3.642398244412007e-07, "loss": 0.0148, "step": 944100 }, { "epoch": 9.28, "grad_norm": 0.002383358310908079, "learning_rate": 3.641157019869523e-07, "loss": 0.0355, "step": 944125 }, { "epoch": 9.28, "grad_norm": 0.4366741180419922, "learning_rate": 3.639915795327038e-07, "loss": 0.0139, "step": 944150 }, { "epoch": 9.28, "grad_norm": 0.49084702134132385, "learning_rate": 3.6386745707845534e-07, "loss": 0.0292, "step": 944175 }, { "epoch": 9.28, "grad_norm": 8.861002922058105, "learning_rate": 3.6374333462420687e-07, "loss": 0.0187, "step": 944200 }, { "epoch": 9.28, "grad_norm": 0.027438439428806305, "learning_rate": 3.636192121699584e-07, "loss": 0.0458, "step": 944225 }, { "epoch": 9.28, "grad_norm": 1.5069588422775269, "learning_rate": 3.6349508971571e-07, "loss": 0.0107, "step": 944250 }, { "epoch": 9.28, "grad_norm": 1.308760404586792, "learning_rate": 3.633709672614615e-07, "loss": 0.0415, "step": 944275 }, { "epoch": 9.28, "grad_norm": 13.725738525390625, "learning_rate": 3.6324684480721304e-07, "loss": 0.0139, "step": 944300 }, { "epoch": 9.28, "grad_norm": 0.02418968826532364, "learning_rate": 3.6312272235296457e-07, "loss": 0.0381, "step": 944325 }, { "epoch": 9.29, "grad_norm": 3.728156328201294, "learning_rate": 3.629985998987161e-07, "loss": 0.013, "step": 944350 }, { "epoch": 9.29, "grad_norm": 0.045439306646585464, "learning_rate": 3.6287447744446763e-07, "loss": 0.0402, "step": 944375 }, { "epoch": 9.29, "grad_norm": 6.646708011627197, "learning_rate": 3.627503549902192e-07, "loss": 0.0226, "step": 944400 }, { "epoch": 9.29, "grad_norm": 0.0175466425716877, "learning_rate": 3.626262325359707e-07, "loss": 0.0283, "step": 944425 }, { "epoch": 9.29, "grad_norm": 5.768858432769775, "learning_rate": 3.6250211008172227e-07, "loss": 0.0235, "step": 944450 }, { "epoch": 9.29, "grad_norm": 0.019406866282224655, "learning_rate": 3.6237798762747375e-07, "loss": 0.0416, "step": 944475 }, { "epoch": 9.29, "grad_norm": 10.223541259765625, "learning_rate": 3.6225386517322533e-07, "loss": 0.0116, "step": 944500 }, { "epoch": 9.29, "grad_norm": 0.011332586407661438, "learning_rate": 3.621297427189768e-07, "loss": 0.0389, "step": 944525 }, { "epoch": 9.29, "grad_norm": 2.4070847034454346, "learning_rate": 3.620056202647284e-07, "loss": 0.0097, "step": 944550 }, { "epoch": 9.29, "grad_norm": 2.2135937213897705, "learning_rate": 3.6188149781047997e-07, "loss": 0.0465, "step": 944575 }, { "epoch": 9.29, "grad_norm": 7.534956455230713, "learning_rate": 3.6175737535623144e-07, "loss": 0.0182, "step": 944600 }, { "epoch": 9.29, "grad_norm": 0.03943866491317749, "learning_rate": 3.61633252901983e-07, "loss": 0.0604, "step": 944625 }, { "epoch": 9.29, "grad_norm": 12.904239654541016, "learning_rate": 3.615091304477345e-07, "loss": 0.0227, "step": 944650 }, { "epoch": 9.29, "grad_norm": 0.09922775626182556, "learning_rate": 3.613850079934861e-07, "loss": 0.0398, "step": 944675 }, { "epoch": 9.29, "grad_norm": 1.288384199142456, "learning_rate": 3.6126088553923767e-07, "loss": 0.014, "step": 944700 }, { "epoch": 9.29, "grad_norm": 0.24045656621456146, "learning_rate": 3.6113676308498914e-07, "loss": 0.042, "step": 944725 }, { "epoch": 9.29, "grad_norm": 8.944910049438477, "learning_rate": 3.610126406307407e-07, "loss": 0.0094, "step": 944750 }, { "epoch": 9.29, "grad_norm": 0.057007089257240295, "learning_rate": 3.608885181764922e-07, "loss": 0.0313, "step": 944775 }, { "epoch": 9.29, "grad_norm": 0.3723742961883545, "learning_rate": 3.607643957222438e-07, "loss": 0.0291, "step": 944800 }, { "epoch": 9.29, "grad_norm": 0.039829447865486145, "learning_rate": 3.606402732679953e-07, "loss": 0.0293, "step": 944825 }, { "epoch": 9.29, "grad_norm": 0.9685693383216858, "learning_rate": 3.6051615081374684e-07, "loss": 0.0072, "step": 944850 }, { "epoch": 9.29, "grad_norm": 1.1221016645431519, "learning_rate": 3.6039202835949837e-07, "loss": 0.0278, "step": 944875 }, { "epoch": 9.29, "grad_norm": 1.0423544645309448, "learning_rate": 3.602679059052499e-07, "loss": 0.0086, "step": 944900 }, { "epoch": 9.29, "grad_norm": 0.05129372701048851, "learning_rate": 3.601437834510015e-07, "loss": 0.0267, "step": 944925 }, { "epoch": 9.29, "grad_norm": 16.820175170898438, "learning_rate": 3.60019660996753e-07, "loss": 0.0106, "step": 944950 }, { "epoch": 9.29, "grad_norm": 0.034206755459308624, "learning_rate": 3.5989553854250454e-07, "loss": 0.0304, "step": 944975 }, { "epoch": 9.29, "grad_norm": 7.301421165466309, "learning_rate": 3.5977141608825607e-07, "loss": 0.0168, "step": 945000 }, { "epoch": 9.29, "grad_norm": 1.8316556215286255, "learning_rate": 3.596472936340076e-07, "loss": 0.0402, "step": 945025 }, { "epoch": 9.29, "grad_norm": 8.31715202331543, "learning_rate": 3.595231711797591e-07, "loss": 0.019, "step": 945050 }, { "epoch": 9.29, "grad_norm": 0.2584281265735626, "learning_rate": 3.5939904872551066e-07, "loss": 0.0253, "step": 945075 }, { "epoch": 9.29, "grad_norm": 6.882602214813232, "learning_rate": 3.592749262712622e-07, "loss": 0.0129, "step": 945100 }, { "epoch": 9.29, "grad_norm": 3.562089681625366, "learning_rate": 3.5915080381701377e-07, "loss": 0.0572, "step": 945125 }, { "epoch": 9.29, "grad_norm": 0.7510283589363098, "learning_rate": 3.5902668136276524e-07, "loss": 0.0213, "step": 945150 }, { "epoch": 9.29, "grad_norm": 0.0027735901530832052, "learning_rate": 3.589025589085168e-07, "loss": 0.0357, "step": 945175 }, { "epoch": 9.29, "grad_norm": 16.86767578125, "learning_rate": 3.587784364542683e-07, "loss": 0.0106, "step": 945200 }, { "epoch": 9.29, "grad_norm": 0.10212531685829163, "learning_rate": 3.586543140000199e-07, "loss": 0.0226, "step": 945225 }, { "epoch": 9.29, "grad_norm": 5.370250225067139, "learning_rate": 3.5853019154577146e-07, "loss": 0.0151, "step": 945250 }, { "epoch": 9.29, "grad_norm": 1.7237162590026855, "learning_rate": 3.5840606909152294e-07, "loss": 0.0313, "step": 945275 }, { "epoch": 9.29, "grad_norm": 31.86207389831543, "learning_rate": 3.582819466372745e-07, "loss": 0.0229, "step": 945300 }, { "epoch": 9.29, "grad_norm": 0.06910555064678192, "learning_rate": 3.58157824183026e-07, "loss": 0.0367, "step": 945325 }, { "epoch": 9.29, "grad_norm": 9.661274909973145, "learning_rate": 3.580337017287776e-07, "loss": 0.01, "step": 945350 }, { "epoch": 9.3, "grad_norm": 3.323833465576172, "learning_rate": 3.5790957927452916e-07, "loss": 0.0389, "step": 945375 }, { "epoch": 9.3, "grad_norm": 0.15994229912757874, "learning_rate": 3.5778545682028064e-07, "loss": 0.01, "step": 945400 }, { "epoch": 9.3, "grad_norm": 0.02380509302020073, "learning_rate": 3.576613343660322e-07, "loss": 0.0384, "step": 945425 }, { "epoch": 9.3, "grad_norm": 5.833061218261719, "learning_rate": 3.575372119117837e-07, "loss": 0.0058, "step": 945450 }, { "epoch": 9.3, "grad_norm": 1.2509167194366455, "learning_rate": 3.574130894575353e-07, "loss": 0.0364, "step": 945475 }, { "epoch": 9.3, "grad_norm": 4.201569080352783, "learning_rate": 3.5728896700328676e-07, "loss": 0.0113, "step": 945500 }, { "epoch": 9.3, "grad_norm": 0.986962080001831, "learning_rate": 3.5716484454903834e-07, "loss": 0.0281, "step": 945525 }, { "epoch": 9.3, "grad_norm": 0.06923723965883255, "learning_rate": 3.5704072209478987e-07, "loss": 0.0083, "step": 945550 }, { "epoch": 9.3, "grad_norm": 0.1059698686003685, "learning_rate": 3.569165996405414e-07, "loss": 0.0337, "step": 945575 }, { "epoch": 9.3, "grad_norm": 3.7710812091827393, "learning_rate": 3.567924771862929e-07, "loss": 0.0105, "step": 945600 }, { "epoch": 9.3, "grad_norm": 7.499840259552002, "learning_rate": 3.5666835473204445e-07, "loss": 0.0499, "step": 945625 }, { "epoch": 9.3, "grad_norm": 0.8984639048576355, "learning_rate": 3.5654423227779604e-07, "loss": 0.0171, "step": 945650 }, { "epoch": 9.3, "grad_norm": 0.5200117826461792, "learning_rate": 3.5642010982354757e-07, "loss": 0.0452, "step": 945675 }, { "epoch": 9.3, "grad_norm": 0.2222534418106079, "learning_rate": 3.562959873692991e-07, "loss": 0.0228, "step": 945700 }, { "epoch": 9.3, "grad_norm": 0.1776343435049057, "learning_rate": 3.561718649150506e-07, "loss": 0.0414, "step": 945725 }, { "epoch": 9.3, "grad_norm": 4.209135055541992, "learning_rate": 3.5604774246080215e-07, "loss": 0.019, "step": 945750 }, { "epoch": 9.3, "grad_norm": 8.4292631149292, "learning_rate": 3.559236200065537e-07, "loss": 0.0418, "step": 945775 }, { "epoch": 9.3, "grad_norm": 6.307013988494873, "learning_rate": 3.5579949755230526e-07, "loss": 0.0182, "step": 945800 }, { "epoch": 9.3, "grad_norm": 0.0652446374297142, "learning_rate": 3.5567537509805674e-07, "loss": 0.0372, "step": 945825 }, { "epoch": 9.3, "grad_norm": 0.5349738597869873, "learning_rate": 3.555512526438083e-07, "loss": 0.0043, "step": 945850 }, { "epoch": 9.3, "grad_norm": 4.718949794769287, "learning_rate": 3.554271301895598e-07, "loss": 0.0523, "step": 945875 }, { "epoch": 9.3, "grad_norm": 1.4851765632629395, "learning_rate": 3.553030077353114e-07, "loss": 0.0195, "step": 945900 }, { "epoch": 9.3, "grad_norm": 0.04249102994799614, "learning_rate": 3.5517888528106286e-07, "loss": 0.0371, "step": 945925 }, { "epoch": 9.3, "grad_norm": 8.306864738464355, "learning_rate": 3.5505476282681444e-07, "loss": 0.0115, "step": 945950 }, { "epoch": 9.3, "grad_norm": 0.0025618900544941425, "learning_rate": 3.54930640372566e-07, "loss": 0.0371, "step": 945975 }, { "epoch": 9.3, "grad_norm": 1.4143069982528687, "learning_rate": 3.548065179183175e-07, "loss": 0.0071, "step": 946000 }, { "epoch": 9.3, "grad_norm": 0.12913696467876434, "learning_rate": 3.546823954640691e-07, "loss": 0.041, "step": 946025 }, { "epoch": 9.3, "grad_norm": 2.5538337230682373, "learning_rate": 3.5455827300982055e-07, "loss": 0.0136, "step": 946050 }, { "epoch": 9.3, "grad_norm": 0.03407781571149826, "learning_rate": 3.5443415055557214e-07, "loss": 0.0475, "step": 946075 }, { "epoch": 9.3, "grad_norm": 0.1881335824728012, "learning_rate": 3.543100281013237e-07, "loss": 0.0079, "step": 946100 }, { "epoch": 9.3, "grad_norm": 0.07224695384502411, "learning_rate": 3.541859056470752e-07, "loss": 0.0426, "step": 946125 }, { "epoch": 9.3, "grad_norm": 0.47136303782463074, "learning_rate": 3.540617831928268e-07, "loss": 0.0124, "step": 946150 }, { "epoch": 9.3, "grad_norm": 0.003865655744448304, "learning_rate": 3.5393766073857825e-07, "loss": 0.0381, "step": 946175 }, { "epoch": 9.3, "grad_norm": 2.8862576484680176, "learning_rate": 3.5381353828432984e-07, "loss": 0.0081, "step": 946200 }, { "epoch": 9.3, "grad_norm": 3.3652286529541016, "learning_rate": 3.536943807282513e-07, "loss": 0.0383, "step": 946225 }, { "epoch": 9.3, "grad_norm": 7.941649913787842, "learning_rate": 3.535702582740028e-07, "loss": 0.0082, "step": 946250 }, { "epoch": 9.3, "grad_norm": 0.029517635703086853, "learning_rate": 3.534461358197544e-07, "loss": 0.0369, "step": 946275 }, { "epoch": 9.3, "grad_norm": 1.2687997817993164, "learning_rate": 3.533220133655059e-07, "loss": 0.0159, "step": 946300 }, { "epoch": 9.3, "grad_norm": 0.051271215081214905, "learning_rate": 3.5319789091125746e-07, "loss": 0.0391, "step": 946325 }, { "epoch": 9.3, "grad_norm": 4.21151065826416, "learning_rate": 3.5307376845700893e-07, "loss": 0.0087, "step": 946350 }, { "epoch": 9.3, "grad_norm": 0.028880266472697258, "learning_rate": 3.529496460027605e-07, "loss": 0.08, "step": 946375 }, { "epoch": 9.31, "grad_norm": 9.426558494567871, "learning_rate": 3.5282552354851204e-07, "loss": 0.0201, "step": 946400 }, { "epoch": 9.31, "grad_norm": 0.0907798781991005, "learning_rate": 3.5270140109426357e-07, "loss": 0.044, "step": 946425 }, { "epoch": 9.31, "grad_norm": 11.205710411071777, "learning_rate": 3.5257727864001515e-07, "loss": 0.0108, "step": 946450 }, { "epoch": 9.31, "grad_norm": 0.015836268663406372, "learning_rate": 3.5245315618576663e-07, "loss": 0.0448, "step": 946475 }, { "epoch": 9.31, "grad_norm": 7.006383419036865, "learning_rate": 3.523290337315182e-07, "loss": 0.0217, "step": 946500 }, { "epoch": 9.31, "grad_norm": 0.019422920420765877, "learning_rate": 3.522049112772697e-07, "loss": 0.0231, "step": 946525 }, { "epoch": 9.31, "grad_norm": 7.871044635772705, "learning_rate": 3.5208078882302127e-07, "loss": 0.0144, "step": 946550 }, { "epoch": 9.31, "grad_norm": 0.041073739528656006, "learning_rate": 3.5195666636877285e-07, "loss": 0.0223, "step": 946575 }, { "epoch": 9.31, "grad_norm": 1.89812171459198, "learning_rate": 3.5183254391452433e-07, "loss": 0.0137, "step": 946600 }, { "epoch": 9.31, "grad_norm": 0.005501275882124901, "learning_rate": 3.517084214602759e-07, "loss": 0.0355, "step": 946625 }, { "epoch": 9.31, "grad_norm": 0.20706401765346527, "learning_rate": 3.515842990060274e-07, "loss": 0.0092, "step": 946650 }, { "epoch": 9.31, "grad_norm": 0.033307306468486786, "learning_rate": 3.5146017655177897e-07, "loss": 0.0248, "step": 946675 }, { "epoch": 9.31, "grad_norm": 0.16506430506706238, "learning_rate": 3.513360540975305e-07, "loss": 0.0066, "step": 946700 }, { "epoch": 9.31, "grad_norm": 0.12794163823127747, "learning_rate": 3.5121193164328203e-07, "loss": 0.0332, "step": 946725 }, { "epoch": 9.31, "grad_norm": 4.642558574676514, "learning_rate": 3.5108780918903356e-07, "loss": 0.0107, "step": 946750 }, { "epoch": 9.31, "grad_norm": 2.5775718688964844, "learning_rate": 3.509636867347851e-07, "loss": 0.0247, "step": 946775 }, { "epoch": 9.31, "grad_norm": 4.2281107902526855, "learning_rate": 3.5083956428053667e-07, "loss": 0.0223, "step": 946800 }, { "epoch": 9.31, "grad_norm": 1.416611671447754, "learning_rate": 3.5071544182628814e-07, "loss": 0.05, "step": 946825 }, { "epoch": 9.31, "grad_norm": 8.294751167297363, "learning_rate": 3.505913193720397e-07, "loss": 0.0067, "step": 946850 }, { "epoch": 9.31, "grad_norm": 0.021877672523260117, "learning_rate": 3.5046719691779125e-07, "loss": 0.0355, "step": 946875 }, { "epoch": 9.31, "grad_norm": 11.609858512878418, "learning_rate": 3.503430744635428e-07, "loss": 0.0129, "step": 946900 }, { "epoch": 9.31, "grad_norm": 0.12452587485313416, "learning_rate": 3.502189520092943e-07, "loss": 0.0275, "step": 946925 }, { "epoch": 9.31, "grad_norm": 6.61193323135376, "learning_rate": 3.5009482955504584e-07, "loss": 0.0108, "step": 946950 }, { "epoch": 9.31, "grad_norm": 0.007266737520694733, "learning_rate": 3.4997070710079737e-07, "loss": 0.0362, "step": 946975 }, { "epoch": 9.31, "grad_norm": 0.21832741796970367, "learning_rate": 3.4984658464654895e-07, "loss": 0.0257, "step": 947000 }, { "epoch": 9.31, "grad_norm": 0.04899093508720398, "learning_rate": 3.4972246219230043e-07, "loss": 0.024, "step": 947025 }, { "epoch": 9.31, "grad_norm": 7.966212749481201, "learning_rate": 3.49598339738052e-07, "loss": 0.0066, "step": 947050 }, { "epoch": 9.31, "grad_norm": 1.4790245294570923, "learning_rate": 3.494742172838035e-07, "loss": 0.0287, "step": 947075 }, { "epoch": 9.31, "grad_norm": 14.178089141845703, "learning_rate": 3.4935009482955507e-07, "loss": 0.017, "step": 947100 }, { "epoch": 9.31, "grad_norm": 1.1843794584274292, "learning_rate": 3.4922597237530665e-07, "loss": 0.0391, "step": 947125 }, { "epoch": 9.31, "grad_norm": 2.51228404045105, "learning_rate": 3.4910184992105813e-07, "loss": 0.0181, "step": 947150 }, { "epoch": 9.31, "grad_norm": 0.17910701036453247, "learning_rate": 3.489777274668097e-07, "loss": 0.0405, "step": 947175 }, { "epoch": 9.31, "grad_norm": 1.0073378086090088, "learning_rate": 3.488536050125612e-07, "loss": 0.0112, "step": 947200 }, { "epoch": 9.31, "grad_norm": 0.18573789298534393, "learning_rate": 3.4872948255831277e-07, "loss": 0.0358, "step": 947225 }, { "epoch": 9.31, "grad_norm": 19.23812484741211, "learning_rate": 3.4860536010406424e-07, "loss": 0.0183, "step": 947250 }, { "epoch": 9.31, "grad_norm": 0.019529549404978752, "learning_rate": 3.484812376498158e-07, "loss": 0.0258, "step": 947275 }, { "epoch": 9.31, "grad_norm": 2.3753654956817627, "learning_rate": 3.483571151955674e-07, "loss": 0.0059, "step": 947300 }, { "epoch": 9.31, "grad_norm": 1.1840912103652954, "learning_rate": 3.482329927413189e-07, "loss": 0.0409, "step": 947325 }, { "epoch": 9.31, "grad_norm": 1.0181936025619507, "learning_rate": 3.4810887028707047e-07, "loss": 0.013, "step": 947350 }, { "epoch": 9.31, "grad_norm": 1.6102895736694336, "learning_rate": 3.4798474783282194e-07, "loss": 0.0344, "step": 947375 }, { "epoch": 9.31, "grad_norm": 10.651955604553223, "learning_rate": 3.478606253785735e-07, "loss": 0.0184, "step": 947400 }, { "epoch": 9.32, "grad_norm": 0.014841011725366116, "learning_rate": 3.4773650292432505e-07, "loss": 0.0368, "step": 947425 }, { "epoch": 9.32, "grad_norm": 0.21529728174209595, "learning_rate": 3.476123804700766e-07, "loss": 0.0201, "step": 947450 }, { "epoch": 9.32, "grad_norm": 0.1438254565000534, "learning_rate": 3.4748825801582816e-07, "loss": 0.019, "step": 947475 }, { "epoch": 9.32, "grad_norm": 0.10069172829389572, "learning_rate": 3.4736413556157964e-07, "loss": 0.0239, "step": 947500 }, { "epoch": 9.32, "grad_norm": 1.195846438407898, "learning_rate": 3.472400131073312e-07, "loss": 0.0614, "step": 947525 }, { "epoch": 9.32, "grad_norm": 6.487679958343506, "learning_rate": 3.4711589065308275e-07, "loss": 0.023, "step": 947550 }, { "epoch": 9.32, "grad_norm": 0.1851624995470047, "learning_rate": 3.469917681988343e-07, "loss": 0.0377, "step": 947575 }, { "epoch": 9.32, "grad_norm": 3.3292007446289062, "learning_rate": 3.468676457445858e-07, "loss": 0.0109, "step": 947600 }, { "epoch": 9.32, "grad_norm": 0.6874074339866638, "learning_rate": 3.4674352329033734e-07, "loss": 0.0332, "step": 947625 }, { "epoch": 9.32, "grad_norm": 14.7428560256958, "learning_rate": 3.4661940083608887e-07, "loss": 0.013, "step": 947650 }, { "epoch": 9.32, "grad_norm": 0.014935394749045372, "learning_rate": 3.464952783818404e-07, "loss": 0.0222, "step": 947675 }, { "epoch": 9.32, "grad_norm": 7.515717506408691, "learning_rate": 3.463711559275919e-07, "loss": 0.0131, "step": 947700 }, { "epoch": 9.32, "grad_norm": 0.03547711297869682, "learning_rate": 3.462470334733435e-07, "loss": 0.0297, "step": 947725 }, { "epoch": 9.32, "grad_norm": 6.371575355529785, "learning_rate": 3.46122911019095e-07, "loss": 0.0089, "step": 947750 }, { "epoch": 9.32, "grad_norm": 0.3557877838611603, "learning_rate": 3.4599878856484657e-07, "loss": 0.0502, "step": 947775 }, { "epoch": 9.32, "grad_norm": 8.986308097839355, "learning_rate": 3.458746661105981e-07, "loss": 0.0234, "step": 947800 }, { "epoch": 9.32, "grad_norm": 1.1513248682022095, "learning_rate": 3.457505436563496e-07, "loss": 0.0384, "step": 947825 }, { "epoch": 9.32, "grad_norm": 1.3845711946487427, "learning_rate": 3.456264212021012e-07, "loss": 0.0195, "step": 947850 }, { "epoch": 9.32, "grad_norm": 0.10070957243442535, "learning_rate": 3.455022987478527e-07, "loss": 0.0305, "step": 947875 }, { "epoch": 9.32, "grad_norm": 7.977591037750244, "learning_rate": 3.4537817629360427e-07, "loss": 0.0171, "step": 947900 }, { "epoch": 9.32, "grad_norm": 0.007835420779883862, "learning_rate": 3.4525405383935574e-07, "loss": 0.0198, "step": 947925 }, { "epoch": 9.32, "grad_norm": 1.299115777015686, "learning_rate": 3.451299313851073e-07, "loss": 0.0207, "step": 947950 }, { "epoch": 9.32, "grad_norm": 1.2176263332366943, "learning_rate": 3.450058089308589e-07, "loss": 0.0439, "step": 947975 }, { "epoch": 9.32, "grad_norm": 0.2776883542537689, "learning_rate": 3.448816864766104e-07, "loss": 0.0069, "step": 948000 }, { "epoch": 9.32, "grad_norm": 0.2704468369483948, "learning_rate": 3.4475756402236196e-07, "loss": 0.0475, "step": 948025 }, { "epoch": 9.32, "grad_norm": 0.9234769344329834, "learning_rate": 3.4463344156811344e-07, "loss": 0.011, "step": 948050 }, { "epoch": 9.32, "grad_norm": 0.2247745245695114, "learning_rate": 3.44509319113865e-07, "loss": 0.0526, "step": 948075 }, { "epoch": 9.32, "grad_norm": 9.329933166503906, "learning_rate": 3.443851966596165e-07, "loss": 0.0174, "step": 948100 }, { "epoch": 9.32, "grad_norm": 3.980992317199707, "learning_rate": 3.442610742053681e-07, "loss": 0.0434, "step": 948125 }, { "epoch": 9.32, "grad_norm": 0.06478340178728104, "learning_rate": 3.4413695175111966e-07, "loss": 0.0122, "step": 948150 }, { "epoch": 9.32, "grad_norm": 4.526786804199219, "learning_rate": 3.4401282929687114e-07, "loss": 0.0523, "step": 948175 }, { "epoch": 9.32, "grad_norm": 0.1165742352604866, "learning_rate": 3.438887068426227e-07, "loss": 0.0131, "step": 948200 }, { "epoch": 9.32, "grad_norm": 0.02485605515539646, "learning_rate": 3.437645843883742e-07, "loss": 0.0452, "step": 948225 }, { "epoch": 9.32, "grad_norm": 2.4372498989105225, "learning_rate": 3.436404619341258e-07, "loss": 0.0125, "step": 948250 }, { "epoch": 9.32, "grad_norm": 0.1104908436536789, "learning_rate": 3.435163394798773e-07, "loss": 0.0292, "step": 948275 }, { "epoch": 9.32, "grad_norm": 0.8731285929679871, "learning_rate": 3.4339221702562884e-07, "loss": 0.0091, "step": 948300 }, { "epoch": 9.32, "grad_norm": 0.07231444120407104, "learning_rate": 3.4326809457138037e-07, "loss": 0.0283, "step": 948325 }, { "epoch": 9.32, "grad_norm": 3.86788010597229, "learning_rate": 3.431439721171319e-07, "loss": 0.0115, "step": 948350 }, { "epoch": 9.32, "grad_norm": 1.5351284742355347, "learning_rate": 3.430198496628834e-07, "loss": 0.0385, "step": 948375 }, { "epoch": 9.32, "grad_norm": 0.9003145098686218, "learning_rate": 3.42895727208635e-07, "loss": 0.0173, "step": 948400 }, { "epoch": 9.33, "grad_norm": 0.27469921112060547, "learning_rate": 3.427716047543865e-07, "loss": 0.0547, "step": 948425 }, { "epoch": 9.33, "grad_norm": 5.049697399139404, "learning_rate": 3.4264748230013806e-07, "loss": 0.0172, "step": 948450 }, { "epoch": 9.33, "grad_norm": 0.012300119735300541, "learning_rate": 3.4252335984588954e-07, "loss": 0.0279, "step": 948475 }, { "epoch": 9.33, "grad_norm": 17.555313110351562, "learning_rate": 3.423992373916411e-07, "loss": 0.0103, "step": 948500 }, { "epoch": 9.33, "grad_norm": 0.01790679432451725, "learning_rate": 3.4227511493739265e-07, "loss": 0.029, "step": 948525 }, { "epoch": 9.33, "grad_norm": 0.33439216017723083, "learning_rate": 3.421509924831442e-07, "loss": 0.0096, "step": 948550 }, { "epoch": 9.33, "grad_norm": 0.0335770919919014, "learning_rate": 3.4202687002889576e-07, "loss": 0.037, "step": 948575 }, { "epoch": 9.33, "grad_norm": 17.087671279907227, "learning_rate": 3.4190274757464724e-07, "loss": 0.0246, "step": 948600 }, { "epoch": 9.33, "grad_norm": 4.071225166320801, "learning_rate": 3.417786251203988e-07, "loss": 0.028, "step": 948625 }, { "epoch": 9.33, "grad_norm": 0.0989886149764061, "learning_rate": 3.416545026661503e-07, "loss": 0.0146, "step": 948650 }, { "epoch": 9.33, "grad_norm": 4.17443323135376, "learning_rate": 3.415303802119019e-07, "loss": 0.0379, "step": 948675 }, { "epoch": 9.33, "grad_norm": 16.22010612487793, "learning_rate": 3.4140625775765346e-07, "loss": 0.0156, "step": 948700 }, { "epoch": 9.33, "grad_norm": 6.328703880310059, "learning_rate": 3.4128213530340494e-07, "loss": 0.0551, "step": 948725 }, { "epoch": 9.33, "grad_norm": 0.46145713329315186, "learning_rate": 3.411580128491565e-07, "loss": 0.0124, "step": 948750 }, { "epoch": 9.33, "grad_norm": 0.2551455795764923, "learning_rate": 3.41033890394908e-07, "loss": 0.0251, "step": 948775 }, { "epoch": 9.33, "grad_norm": 20.979400634765625, "learning_rate": 3.409097679406596e-07, "loss": 0.0364, "step": 948800 }, { "epoch": 9.33, "grad_norm": 0.08943548053503036, "learning_rate": 3.407856454864111e-07, "loss": 0.0353, "step": 948825 }, { "epoch": 9.33, "grad_norm": 2.462514877319336, "learning_rate": 3.4066152303216264e-07, "loss": 0.0041, "step": 948850 }, { "epoch": 9.33, "grad_norm": 3.3793318271636963, "learning_rate": 3.405374005779142e-07, "loss": 0.0387, "step": 948875 }, { "epoch": 9.33, "grad_norm": 6.516716003417969, "learning_rate": 3.404132781236657e-07, "loss": 0.0229, "step": 948900 }, { "epoch": 9.33, "grad_norm": 0.10572300851345062, "learning_rate": 3.402891556694173e-07, "loss": 0.0324, "step": 948925 }, { "epoch": 9.33, "grad_norm": 17.68952178955078, "learning_rate": 3.401650332151688e-07, "loss": 0.0174, "step": 948950 }, { "epoch": 9.33, "grad_norm": 0.06553412228822708, "learning_rate": 3.4004091076092033e-07, "loss": 0.031, "step": 948975 }, { "epoch": 9.33, "grad_norm": 1.7641160488128662, "learning_rate": 3.3991678830667186e-07, "loss": 0.0083, "step": 949000 }, { "epoch": 9.33, "grad_norm": 0.3413848280906677, "learning_rate": 3.397926658524234e-07, "loss": 0.0435, "step": 949025 }, { "epoch": 9.33, "grad_norm": 1.3836101293563843, "learning_rate": 3.396685433981749e-07, "loss": 0.0108, "step": 949050 }, { "epoch": 9.33, "grad_norm": 1.616761326789856, "learning_rate": 3.3954442094392645e-07, "loss": 0.0369, "step": 949075 }, { "epoch": 9.33, "grad_norm": 8.559211730957031, "learning_rate": 3.39420298489678e-07, "loss": 0.0191, "step": 949100 }, { "epoch": 9.33, "grad_norm": 5.148550510406494, "learning_rate": 3.3929617603542956e-07, "loss": 0.0326, "step": 949125 }, { "epoch": 9.33, "grad_norm": 12.608073234558105, "learning_rate": 3.3917205358118104e-07, "loss": 0.0156, "step": 949150 }, { "epoch": 9.33, "grad_norm": 0.21670283377170563, "learning_rate": 3.390479311269326e-07, "loss": 0.0408, "step": 949175 }, { "epoch": 9.33, "grad_norm": 8.113886833190918, "learning_rate": 3.3892380867268415e-07, "loss": 0.0216, "step": 949200 }, { "epoch": 9.33, "grad_norm": 0.08476845175027847, "learning_rate": 3.387996862184357e-07, "loss": 0.0366, "step": 949225 }, { "epoch": 9.33, "grad_norm": 6.499586582183838, "learning_rate": 3.3867556376418726e-07, "loss": 0.017, "step": 949250 }, { "epoch": 9.33, "grad_norm": 1.127946376800537, "learning_rate": 3.3855144130993874e-07, "loss": 0.0288, "step": 949275 }, { "epoch": 9.33, "grad_norm": 4.674213886260986, "learning_rate": 3.384273188556903e-07, "loss": 0.0076, "step": 949300 }, { "epoch": 9.33, "grad_norm": 0.5641337633132935, "learning_rate": 3.383031964014418e-07, "loss": 0.0345, "step": 949325 }, { "epoch": 9.33, "grad_norm": 8.33480453491211, "learning_rate": 3.381790739471934e-07, "loss": 0.014, "step": 949350 }, { "epoch": 9.33, "grad_norm": 0.063972607254982, "learning_rate": 3.3805495149294496e-07, "loss": 0.0487, "step": 949375 }, { "epoch": 9.33, "grad_norm": 6.526846885681152, "learning_rate": 3.3793082903869643e-07, "loss": 0.0207, "step": 949400 }, { "epoch": 9.33, "grad_norm": 4.517490863800049, "learning_rate": 3.37806706584448e-07, "loss": 0.0342, "step": 949425 }, { "epoch": 9.34, "grad_norm": 16.6046085357666, "learning_rate": 3.376825841301995e-07, "loss": 0.007, "step": 949450 }, { "epoch": 9.34, "grad_norm": 0.20351852476596832, "learning_rate": 3.375584616759511e-07, "loss": 0.0234, "step": 949475 }, { "epoch": 9.34, "grad_norm": 17.64059066772461, "learning_rate": 3.3743433922170255e-07, "loss": 0.0265, "step": 949500 }, { "epoch": 9.34, "grad_norm": 0.002711232751607895, "learning_rate": 3.3731518166562405e-07, "loss": 0.0533, "step": 949525 }, { "epoch": 9.34, "grad_norm": 0.15497566759586334, "learning_rate": 3.371910592113756e-07, "loss": 0.0157, "step": 949550 }, { "epoch": 9.34, "grad_norm": 0.07526522874832153, "learning_rate": 3.370669367571271e-07, "loss": 0.0339, "step": 949575 }, { "epoch": 9.34, "grad_norm": 18.910444259643555, "learning_rate": 3.369428143028787e-07, "loss": 0.0321, "step": 949600 }, { "epoch": 9.34, "grad_norm": 0.6885420680046082, "learning_rate": 3.3681869184863017e-07, "loss": 0.0417, "step": 949625 }, { "epoch": 9.34, "grad_norm": 12.631180763244629, "learning_rate": 3.3669456939438175e-07, "loss": 0.0155, "step": 949650 }, { "epoch": 9.34, "grad_norm": 1.5031111240386963, "learning_rate": 3.365704469401333e-07, "loss": 0.0295, "step": 949675 }, { "epoch": 9.34, "grad_norm": 0.54571533203125, "learning_rate": 3.364463244858848e-07, "loss": 0.0216, "step": 949700 }, { "epoch": 9.34, "grad_norm": 0.010600754991173744, "learning_rate": 3.363222020316364e-07, "loss": 0.0232, "step": 949725 }, { "epoch": 9.34, "grad_norm": 10.690478324890137, "learning_rate": 3.3619807957738787e-07, "loss": 0.0133, "step": 949750 }, { "epoch": 9.34, "grad_norm": 0.17176954448223114, "learning_rate": 3.3607395712313945e-07, "loss": 0.0538, "step": 949775 }, { "epoch": 9.34, "grad_norm": 1.0001225471496582, "learning_rate": 3.3594983466889093e-07, "loss": 0.0147, "step": 949800 }, { "epoch": 9.34, "grad_norm": 5.584761619567871, "learning_rate": 3.358257122146425e-07, "loss": 0.034, "step": 949825 }, { "epoch": 9.34, "grad_norm": 5.769382476806641, "learning_rate": 3.357015897603941e-07, "loss": 0.0145, "step": 949850 }, { "epoch": 9.34, "grad_norm": 2.1634042263031006, "learning_rate": 3.3557746730614557e-07, "loss": 0.0442, "step": 949875 }, { "epoch": 9.34, "grad_norm": 14.905183792114258, "learning_rate": 3.3545334485189715e-07, "loss": 0.0115, "step": 949900 }, { "epoch": 9.34, "grad_norm": 2.2228081226348877, "learning_rate": 3.353292223976486e-07, "loss": 0.036, "step": 949925 }, { "epoch": 9.34, "grad_norm": 5.153228282928467, "learning_rate": 3.352050999434002e-07, "loss": 0.0152, "step": 949950 }, { "epoch": 9.34, "grad_norm": 0.4258977472782135, "learning_rate": 3.350809774891517e-07, "loss": 0.0525, "step": 949975 }, { "epoch": 9.34, "grad_norm": 1.419783115386963, "learning_rate": 3.3495685503490327e-07, "loss": 0.0162, "step": 950000 }, { "epoch": 9.34, "grad_norm": 2.166273593902588, "learning_rate": 3.3483273258065485e-07, "loss": 0.0207, "step": 950025 }, { "epoch": 9.34, "grad_norm": 1.0813536643981934, "learning_rate": 3.347086101264063e-07, "loss": 0.0115, "step": 950050 }, { "epoch": 9.34, "grad_norm": 0.06715525686740875, "learning_rate": 3.345844876721579e-07, "loss": 0.0511, "step": 950075 }, { "epoch": 9.34, "grad_norm": 0.5732385516166687, "learning_rate": 3.344603652179094e-07, "loss": 0.0088, "step": 950100 }, { "epoch": 9.34, "grad_norm": 0.11040227115154266, "learning_rate": 3.3433624276366096e-07, "loss": 0.0308, "step": 950125 }, { "epoch": 9.34, "grad_norm": 7.960848331451416, "learning_rate": 3.342121203094125e-07, "loss": 0.0055, "step": 950150 }, { "epoch": 9.34, "grad_norm": 1.987928032875061, "learning_rate": 3.34087997855164e-07, "loss": 0.0369, "step": 950175 }, { "epoch": 9.34, "grad_norm": 12.98668384552002, "learning_rate": 3.3396387540091555e-07, "loss": 0.0102, "step": 950200 }, { "epoch": 9.34, "grad_norm": 0.03796623274683952, "learning_rate": 3.338397529466671e-07, "loss": 0.0377, "step": 950225 }, { "epoch": 9.34, "grad_norm": 16.57004165649414, "learning_rate": 3.337156304924186e-07, "loss": 0.0248, "step": 950250 }, { "epoch": 9.34, "grad_norm": 0.3177982270717621, "learning_rate": 3.335915080381702e-07, "loss": 0.035, "step": 950275 }, { "epoch": 9.34, "grad_norm": 15.713542938232422, "learning_rate": 3.3346738558392167e-07, "loss": 0.0141, "step": 950300 }, { "epoch": 9.34, "grad_norm": 0.17152298986911774, "learning_rate": 3.3334326312967325e-07, "loss": 0.0349, "step": 950325 }, { "epoch": 9.34, "grad_norm": 8.484130859375, "learning_rate": 3.332191406754248e-07, "loss": 0.0143, "step": 950350 }, { "epoch": 9.34, "grad_norm": 1.3222324848175049, "learning_rate": 3.330950182211763e-07, "loss": 0.0347, "step": 950375 }, { "epoch": 9.34, "grad_norm": 8.671682357788086, "learning_rate": 3.3297089576692784e-07, "loss": 0.0144, "step": 950400 }, { "epoch": 9.34, "grad_norm": 0.021321313455700874, "learning_rate": 3.3284677331267937e-07, "loss": 0.0488, "step": 950425 }, { "epoch": 9.34, "grad_norm": 2.3016788959503174, "learning_rate": 3.3272265085843095e-07, "loss": 0.0091, "step": 950450 }, { "epoch": 9.35, "grad_norm": 0.05810113251209259, "learning_rate": 3.325985284041824e-07, "loss": 0.044, "step": 950475 }, { "epoch": 9.35, "grad_norm": 0.2600933909416199, "learning_rate": 3.32474405949934e-07, "loss": 0.0178, "step": 950500 }, { "epoch": 9.35, "grad_norm": 0.047561440616846085, "learning_rate": 3.323502834956855e-07, "loss": 0.0477, "step": 950525 }, { "epoch": 9.35, "grad_norm": 0.28085049986839294, "learning_rate": 3.3222616104143707e-07, "loss": 0.0132, "step": 950550 }, { "epoch": 9.35, "grad_norm": 1.6664857864379883, "learning_rate": 3.3210203858718865e-07, "loss": 0.0535, "step": 950575 }, { "epoch": 9.35, "grad_norm": 1.769177794456482, "learning_rate": 3.319779161329401e-07, "loss": 0.0169, "step": 950600 }, { "epoch": 9.35, "grad_norm": 1.7925794124603271, "learning_rate": 3.318537936786917e-07, "loss": 0.0322, "step": 950625 }, { "epoch": 9.35, "grad_norm": 2.0145719051361084, "learning_rate": 3.317296712244432e-07, "loss": 0.0157, "step": 950650 }, { "epoch": 9.35, "grad_norm": 3.1945109367370605, "learning_rate": 3.3160554877019476e-07, "loss": 0.0502, "step": 950675 }, { "epoch": 9.35, "grad_norm": 0.4805380702018738, "learning_rate": 3.3148142631594635e-07, "loss": 0.0108, "step": 950700 }, { "epoch": 9.35, "grad_norm": 0.004662730265408754, "learning_rate": 3.313573038616978e-07, "loss": 0.0241, "step": 950725 }, { "epoch": 9.35, "grad_norm": 3.8775665760040283, "learning_rate": 3.312331814074494e-07, "loss": 0.0192, "step": 950750 }, { "epoch": 9.35, "grad_norm": 1.1864522695541382, "learning_rate": 3.311090589532009e-07, "loss": 0.0396, "step": 950775 }, { "epoch": 9.35, "grad_norm": 6.32108736038208, "learning_rate": 3.3098493649895246e-07, "loss": 0.017, "step": 950800 }, { "epoch": 9.35, "grad_norm": 0.10057439655065536, "learning_rate": 3.3086081404470394e-07, "loss": 0.035, "step": 950825 }, { "epoch": 9.35, "grad_norm": 3.5165293216705322, "learning_rate": 3.307366915904555e-07, "loss": 0.0165, "step": 950850 }, { "epoch": 9.35, "grad_norm": 1.3744752407073975, "learning_rate": 3.3061256913620705e-07, "loss": 0.0326, "step": 950875 }, { "epoch": 9.35, "grad_norm": 7.627086162567139, "learning_rate": 3.304884466819586e-07, "loss": 0.0161, "step": 950900 }, { "epoch": 9.35, "grad_norm": 1.480269193649292, "learning_rate": 3.303643242277101e-07, "loss": 0.0311, "step": 950925 }, { "epoch": 9.35, "grad_norm": 0.5193156003952026, "learning_rate": 3.3024020177346164e-07, "loss": 0.0084, "step": 950950 }, { "epoch": 9.35, "grad_norm": 0.03421088680624962, "learning_rate": 3.3011607931921317e-07, "loss": 0.0511, "step": 950975 }, { "epoch": 9.35, "grad_norm": 2.5101819038391113, "learning_rate": 3.2999195686496475e-07, "loss": 0.0125, "step": 951000 }, { "epoch": 9.35, "grad_norm": 10.900116920471191, "learning_rate": 3.298678344107163e-07, "loss": 0.0422, "step": 951025 }, { "epoch": 9.35, "grad_norm": 2.3494186401367188, "learning_rate": 3.297437119564678e-07, "loss": 0.0252, "step": 951050 }, { "epoch": 9.35, "grad_norm": 1.5182827711105347, "learning_rate": 3.2961958950221933e-07, "loss": 0.037, "step": 951075 }, { "epoch": 9.35, "grad_norm": 7.7571234703063965, "learning_rate": 3.2949546704797086e-07, "loss": 0.0137, "step": 951100 }, { "epoch": 9.35, "grad_norm": 0.1507510542869568, "learning_rate": 3.2937134459372245e-07, "loss": 0.048, "step": 951125 }, { "epoch": 9.35, "grad_norm": 2.0248072147369385, "learning_rate": 3.292472221394739e-07, "loss": 0.0188, "step": 951150 }, { "epoch": 9.35, "grad_norm": 0.05587217956781387, "learning_rate": 3.291230996852255e-07, "loss": 0.0541, "step": 951175 }, { "epoch": 9.35, "grad_norm": 1.711217999458313, "learning_rate": 3.28998977230977e-07, "loss": 0.0211, "step": 951200 }, { "epoch": 9.35, "grad_norm": 0.04793395847082138, "learning_rate": 3.2887485477672856e-07, "loss": 0.0265, "step": 951225 }, { "epoch": 9.35, "grad_norm": 0.26400861144065857, "learning_rate": 3.2875073232248004e-07, "loss": 0.0098, "step": 951250 }, { "epoch": 9.35, "grad_norm": 2.7007596492767334, "learning_rate": 3.286266098682316e-07, "loss": 0.0258, "step": 951275 }, { "epoch": 9.35, "grad_norm": 11.407846450805664, "learning_rate": 3.285024874139832e-07, "loss": 0.0148, "step": 951300 }, { "epoch": 9.35, "grad_norm": 0.006989671383053064, "learning_rate": 3.283783649597347e-07, "loss": 0.057, "step": 951325 }, { "epoch": 9.35, "grad_norm": 7.759479522705078, "learning_rate": 3.2825424250548626e-07, "loss": 0.0151, "step": 951350 }, { "epoch": 9.35, "grad_norm": 4.089055061340332, "learning_rate": 3.2813012005123774e-07, "loss": 0.0273, "step": 951375 }, { "epoch": 9.35, "grad_norm": 0.48027944564819336, "learning_rate": 3.280059975969893e-07, "loss": 0.0055, "step": 951400 }, { "epoch": 9.35, "grad_norm": 0.08888696879148483, "learning_rate": 3.278818751427409e-07, "loss": 0.0448, "step": 951425 }, { "epoch": 9.35, "grad_norm": 0.19265896081924438, "learning_rate": 3.277577526884924e-07, "loss": 0.0105, "step": 951450 }, { "epoch": 9.36, "grad_norm": 0.4614463150501251, "learning_rate": 3.2763363023424396e-07, "loss": 0.0532, "step": 951475 }, { "epoch": 9.36, "grad_norm": 12.98231029510498, "learning_rate": 3.2750950777999544e-07, "loss": 0.0215, "step": 951500 }, { "epoch": 9.36, "grad_norm": 1.2709232568740845, "learning_rate": 3.27385385325747e-07, "loss": 0.0484, "step": 951525 }, { "epoch": 9.36, "grad_norm": 14.90278148651123, "learning_rate": 3.2726126287149855e-07, "loss": 0.0139, "step": 951550 }, { "epoch": 9.36, "grad_norm": 0.33566686511039734, "learning_rate": 3.271371404172501e-07, "loss": 0.0358, "step": 951575 }, { "epoch": 9.36, "grad_norm": 9.87586498260498, "learning_rate": 3.270130179630016e-07, "loss": 0.0212, "step": 951600 }, { "epoch": 9.36, "grad_norm": 0.10316359251737595, "learning_rate": 3.2688889550875313e-07, "loss": 0.0443, "step": 951625 }, { "epoch": 9.36, "grad_norm": 12.852981567382812, "learning_rate": 3.2676477305450466e-07, "loss": 0.0209, "step": 951650 }, { "epoch": 9.36, "grad_norm": 1.8745579719543457, "learning_rate": 3.266406506002562e-07, "loss": 0.0364, "step": 951675 }, { "epoch": 9.36, "grad_norm": 0.8782221674919128, "learning_rate": 3.265165281460077e-07, "loss": 0.0124, "step": 951700 }, { "epoch": 9.36, "grad_norm": 0.10509326308965683, "learning_rate": 3.263924056917593e-07, "loss": 0.0322, "step": 951725 }, { "epoch": 9.36, "grad_norm": 6.583446025848389, "learning_rate": 3.2626828323751083e-07, "loss": 0.0261, "step": 951750 }, { "epoch": 9.36, "grad_norm": 0.039475858211517334, "learning_rate": 3.2614416078326236e-07, "loss": 0.0359, "step": 951775 }, { "epoch": 9.36, "grad_norm": 0.08323590457439423, "learning_rate": 3.260200383290139e-07, "loss": 0.0062, "step": 951800 }, { "epoch": 9.36, "grad_norm": 1.2919729948043823, "learning_rate": 3.258959158747654e-07, "loss": 0.0437, "step": 951825 }, { "epoch": 9.36, "grad_norm": 8.726645469665527, "learning_rate": 3.25771793420517e-07, "loss": 0.0202, "step": 951850 }, { "epoch": 9.36, "grad_norm": 0.11961238086223602, "learning_rate": 3.256476709662685e-07, "loss": 0.0375, "step": 951875 }, { "epoch": 9.36, "grad_norm": 8.395672798156738, "learning_rate": 3.2552354851202006e-07, "loss": 0.0166, "step": 951900 }, { "epoch": 9.36, "grad_norm": 0.17577990889549255, "learning_rate": 3.2539942605777154e-07, "loss": 0.0262, "step": 951925 }, { "epoch": 9.36, "grad_norm": 6.863895893096924, "learning_rate": 3.252753036035231e-07, "loss": 0.0243, "step": 951950 }, { "epoch": 9.36, "grad_norm": 0.06414125859737396, "learning_rate": 3.251511811492747e-07, "loss": 0.0288, "step": 951975 }, { "epoch": 9.36, "grad_norm": 16.852142333984375, "learning_rate": 3.250270586950262e-07, "loss": 0.0265, "step": 952000 }, { "epoch": 9.36, "grad_norm": 4.240401268005371, "learning_rate": 3.2490293624077776e-07, "loss": 0.0312, "step": 952025 }, { "epoch": 9.36, "grad_norm": 7.6132988929748535, "learning_rate": 3.2477881378652923e-07, "loss": 0.0053, "step": 952050 }, { "epoch": 9.36, "grad_norm": 0.0032931999303400517, "learning_rate": 3.246546913322808e-07, "loss": 0.0392, "step": 952075 }, { "epoch": 9.36, "grad_norm": 5.581097602844238, "learning_rate": 3.245305688780323e-07, "loss": 0.0257, "step": 952100 }, { "epoch": 9.36, "grad_norm": 0.15549850463867188, "learning_rate": 3.244064464237839e-07, "loss": 0.0401, "step": 952125 }, { "epoch": 9.36, "grad_norm": 4.180016040802002, "learning_rate": 3.2428232396953546e-07, "loss": 0.0107, "step": 952150 }, { "epoch": 9.36, "grad_norm": 0.005909629166126251, "learning_rate": 3.241631664134569e-07, "loss": 0.0335, "step": 952175 }, { "epoch": 9.36, "grad_norm": 0.5286657810211182, "learning_rate": 3.2403904395920844e-07, "loss": 0.0065, "step": 952200 }, { "epoch": 9.36, "grad_norm": 0.010584503412246704, "learning_rate": 3.2391492150495997e-07, "loss": 0.0344, "step": 952225 }, { "epoch": 9.36, "grad_norm": 8.211276054382324, "learning_rate": 3.237907990507115e-07, "loss": 0.0219, "step": 952250 }, { "epoch": 9.36, "grad_norm": 6.887116432189941, "learning_rate": 3.23666676596463e-07, "loss": 0.0374, "step": 952275 }, { "epoch": 9.36, "grad_norm": 17.061147689819336, "learning_rate": 3.2354255414221455e-07, "loss": 0.0153, "step": 952300 }, { "epoch": 9.36, "grad_norm": 0.02186417579650879, "learning_rate": 3.2341843168796614e-07, "loss": 0.0308, "step": 952325 }, { "epoch": 9.36, "grad_norm": 4.657742500305176, "learning_rate": 3.232943092337176e-07, "loss": 0.0121, "step": 952350 }, { "epoch": 9.36, "grad_norm": 0.04692733287811279, "learning_rate": 3.231701867794692e-07, "loss": 0.0349, "step": 952375 }, { "epoch": 9.36, "grad_norm": 14.378432273864746, "learning_rate": 3.2304606432522067e-07, "loss": 0.0136, "step": 952400 }, { "epoch": 9.36, "grad_norm": 0.16143864393234253, "learning_rate": 3.2292194187097225e-07, "loss": 0.0281, "step": 952425 }, { "epoch": 9.36, "grad_norm": 1.771703839302063, "learning_rate": 3.2279781941672383e-07, "loss": 0.0223, "step": 952450 }, { "epoch": 9.36, "grad_norm": 0.052859701216220856, "learning_rate": 3.226736969624753e-07, "loss": 0.0334, "step": 952475 }, { "epoch": 9.37, "grad_norm": 10.400193214416504, "learning_rate": 3.225495745082269e-07, "loss": 0.0134, "step": 952500 }, { "epoch": 9.37, "grad_norm": 0.024968702346086502, "learning_rate": 3.2242545205397837e-07, "loss": 0.0332, "step": 952525 }, { "epoch": 9.37, "grad_norm": 7.543726444244385, "learning_rate": 3.2230132959972995e-07, "loss": 0.0202, "step": 952550 }, { "epoch": 9.37, "grad_norm": 0.5319291353225708, "learning_rate": 3.221772071454814e-07, "loss": 0.0388, "step": 952575 }, { "epoch": 9.37, "grad_norm": 17.87544059753418, "learning_rate": 3.22053084691233e-07, "loss": 0.0297, "step": 952600 }, { "epoch": 9.37, "grad_norm": 0.021523961797356606, "learning_rate": 3.219289622369846e-07, "loss": 0.0546, "step": 952625 }, { "epoch": 9.37, "grad_norm": 0.07852939516305923, "learning_rate": 3.2180483978273607e-07, "loss": 0.0146, "step": 952650 }, { "epoch": 9.37, "grad_norm": 10.00819206237793, "learning_rate": 3.2168071732848765e-07, "loss": 0.0247, "step": 952675 }, { "epoch": 9.37, "grad_norm": 3.436008930206299, "learning_rate": 3.215565948742391e-07, "loss": 0.0188, "step": 952700 }, { "epoch": 9.37, "grad_norm": 6.39424467086792, "learning_rate": 3.214324724199907e-07, "loss": 0.0294, "step": 952725 }, { "epoch": 9.37, "grad_norm": 10.58802604675293, "learning_rate": 3.2130834996574224e-07, "loss": 0.0138, "step": 952750 }, { "epoch": 9.37, "grad_norm": 0.03969541937112808, "learning_rate": 3.2118422751149376e-07, "loss": 0.0366, "step": 952775 }, { "epoch": 9.37, "grad_norm": 2.4761760234832764, "learning_rate": 3.210601050572453e-07, "loss": 0.0089, "step": 952800 }, { "epoch": 9.37, "grad_norm": 0.1263415515422821, "learning_rate": 3.209359826029968e-07, "loss": 0.0224, "step": 952825 }, { "epoch": 9.37, "grad_norm": 5.14990234375, "learning_rate": 3.2081186014874835e-07, "loss": 0.009, "step": 952850 }, { "epoch": 9.37, "grad_norm": 0.25781381130218506, "learning_rate": 3.2068773769449993e-07, "loss": 0.0392, "step": 952875 }, { "epoch": 9.37, "grad_norm": 12.510557174682617, "learning_rate": 3.2056361524025146e-07, "loss": 0.0122, "step": 952900 }, { "epoch": 9.37, "grad_norm": 0.053670767694711685, "learning_rate": 3.20439492786003e-07, "loss": 0.0335, "step": 952925 }, { "epoch": 9.37, "grad_norm": 6.804921627044678, "learning_rate": 3.203153703317545e-07, "loss": 0.0181, "step": 952950 }, { "epoch": 9.37, "grad_norm": 0.059883613139390945, "learning_rate": 3.2019124787750605e-07, "loss": 0.0346, "step": 952975 }, { "epoch": 9.37, "grad_norm": 12.84303092956543, "learning_rate": 3.2006712542325763e-07, "loss": 0.018, "step": 953000 }, { "epoch": 9.37, "grad_norm": 7.493513584136963, "learning_rate": 3.199430029690091e-07, "loss": 0.0196, "step": 953025 }, { "epoch": 9.37, "grad_norm": 4.898589134216309, "learning_rate": 3.198188805147607e-07, "loss": 0.0164, "step": 953050 }, { "epoch": 9.37, "grad_norm": 4.729598522186279, "learning_rate": 3.1969475806051217e-07, "loss": 0.0433, "step": 953075 }, { "epoch": 9.37, "grad_norm": 0.037194907665252686, "learning_rate": 3.1957063560626375e-07, "loss": 0.0129, "step": 953100 }, { "epoch": 9.37, "grad_norm": 0.017614394426345825, "learning_rate": 3.194465131520152e-07, "loss": 0.0286, "step": 953125 }, { "epoch": 9.37, "grad_norm": 0.5890962481498718, "learning_rate": 3.193223906977668e-07, "loss": 0.003, "step": 953150 }, { "epoch": 9.37, "grad_norm": 1.2416807413101196, "learning_rate": 3.191982682435184e-07, "loss": 0.0489, "step": 953175 }, { "epoch": 9.37, "grad_norm": 2.923750877380371, "learning_rate": 3.1907414578926987e-07, "loss": 0.0231, "step": 953200 }, { "epoch": 9.37, "grad_norm": 0.5333374738693237, "learning_rate": 3.1895002333502145e-07, "loss": 0.0425, "step": 953225 }, { "epoch": 9.37, "grad_norm": 13.560260772705078, "learning_rate": 3.188259008807729e-07, "loss": 0.0133, "step": 953250 }, { "epoch": 9.37, "grad_norm": 0.08490759879350662, "learning_rate": 3.187017784265245e-07, "loss": 0.0313, "step": 953275 }, { "epoch": 9.37, "grad_norm": 0.8905537128448486, "learning_rate": 3.185776559722761e-07, "loss": 0.0138, "step": 953300 }, { "epoch": 9.37, "grad_norm": 0.02687707729637623, "learning_rate": 3.1845353351802756e-07, "loss": 0.0359, "step": 953325 }, { "epoch": 9.37, "grad_norm": 0.5153416991233826, "learning_rate": 3.1832941106377915e-07, "loss": 0.0145, "step": 953350 }, { "epoch": 9.37, "grad_norm": 0.21585501730442047, "learning_rate": 3.182052886095306e-07, "loss": 0.0492, "step": 953375 }, { "epoch": 9.37, "grad_norm": 2.7571909427642822, "learning_rate": 3.180811661552822e-07, "loss": 0.0162, "step": 953400 }, { "epoch": 9.37, "grad_norm": 1.363558292388916, "learning_rate": 3.1795704370103373e-07, "loss": 0.0402, "step": 953425 }, { "epoch": 9.37, "grad_norm": 6.71474552154541, "learning_rate": 3.1783292124678526e-07, "loss": 0.0125, "step": 953450 }, { "epoch": 9.37, "grad_norm": 0.22021783888339996, "learning_rate": 3.177087987925368e-07, "loss": 0.0279, "step": 953475 }, { "epoch": 9.37, "grad_norm": 4.233968257904053, "learning_rate": 3.175846763382883e-07, "loss": 0.0117, "step": 953500 }, { "epoch": 9.38, "grad_norm": 0.04409031942486763, "learning_rate": 3.1746055388403985e-07, "loss": 0.0393, "step": 953525 }, { "epoch": 9.38, "grad_norm": 13.71911334991455, "learning_rate": 3.173364314297914e-07, "loss": 0.0099, "step": 953550 }, { "epoch": 9.38, "grad_norm": 0.11613289266824722, "learning_rate": 3.1721230897554296e-07, "loss": 0.0419, "step": 953575 }, { "epoch": 9.38, "grad_norm": 3.5899226665496826, "learning_rate": 3.170881865212945e-07, "loss": 0.0101, "step": 953600 }, { "epoch": 9.38, "grad_norm": 7.445803165435791, "learning_rate": 3.16964064067046e-07, "loss": 0.0336, "step": 953625 }, { "epoch": 9.38, "grad_norm": 23.11989974975586, "learning_rate": 3.1683994161279755e-07, "loss": 0.0234, "step": 953650 }, { "epoch": 9.38, "grad_norm": 1.4963420629501343, "learning_rate": 3.167158191585491e-07, "loss": 0.0345, "step": 953675 }, { "epoch": 9.38, "grad_norm": 5.900609016418457, "learning_rate": 3.165916967043006e-07, "loss": 0.0159, "step": 953700 }, { "epoch": 9.38, "grad_norm": 1.8632153272628784, "learning_rate": 3.164675742500522e-07, "loss": 0.0224, "step": 953725 }, { "epoch": 9.38, "grad_norm": 4.2041754722595215, "learning_rate": 3.1634345179580366e-07, "loss": 0.0219, "step": 953750 }, { "epoch": 9.38, "grad_norm": 0.026655681431293488, "learning_rate": 3.1621932934155525e-07, "loss": 0.0318, "step": 953775 }, { "epoch": 9.38, "grad_norm": 7.934545516967773, "learning_rate": 3.160952068873067e-07, "loss": 0.0169, "step": 953800 }, { "epoch": 9.38, "grad_norm": 0.009535581804811954, "learning_rate": 3.159710844330583e-07, "loss": 0.0252, "step": 953825 }, { "epoch": 9.38, "grad_norm": 7.313134670257568, "learning_rate": 3.158469619788099e-07, "loss": 0.0094, "step": 953850 }, { "epoch": 9.38, "grad_norm": 0.20444072782993317, "learning_rate": 3.1572283952456136e-07, "loss": 0.0264, "step": 953875 }, { "epoch": 9.38, "grad_norm": 0.5971460342407227, "learning_rate": 3.1559871707031294e-07, "loss": 0.0056, "step": 953900 }, { "epoch": 9.38, "grad_norm": 0.368028461933136, "learning_rate": 3.154745946160644e-07, "loss": 0.0389, "step": 953925 }, { "epoch": 9.38, "grad_norm": 8.007135391235352, "learning_rate": 3.15350472161816e-07, "loss": 0.0187, "step": 953950 }, { "epoch": 9.38, "grad_norm": 0.45928701758384705, "learning_rate": 3.152263497075675e-07, "loss": 0.041, "step": 953975 }, { "epoch": 9.38, "grad_norm": 0.7838749289512634, "learning_rate": 3.1510222725331906e-07, "loss": 0.0045, "step": 954000 }, { "epoch": 9.38, "grad_norm": 1.6638528108596802, "learning_rate": 3.1497810479907064e-07, "loss": 0.0423, "step": 954025 }, { "epoch": 9.38, "grad_norm": 0.15967737138271332, "learning_rate": 3.148539823448221e-07, "loss": 0.0134, "step": 954050 }, { "epoch": 9.38, "grad_norm": 2.2959506511688232, "learning_rate": 3.147298598905737e-07, "loss": 0.0445, "step": 954075 }, { "epoch": 9.38, "grad_norm": 4.475374698638916, "learning_rate": 3.146057374363252e-07, "loss": 0.0109, "step": 954100 }, { "epoch": 9.38, "grad_norm": 0.18995679914951324, "learning_rate": 3.1448161498207676e-07, "loss": 0.0477, "step": 954125 }, { "epoch": 9.38, "grad_norm": 17.678607940673828, "learning_rate": 3.143574925278283e-07, "loss": 0.0136, "step": 954150 }, { "epoch": 9.38, "grad_norm": 0.018169941380620003, "learning_rate": 3.142333700735798e-07, "loss": 0.0352, "step": 954175 }, { "epoch": 9.38, "grad_norm": 3.32997989654541, "learning_rate": 3.1410924761933135e-07, "loss": 0.0127, "step": 954200 }, { "epoch": 9.38, "grad_norm": 0.143058180809021, "learning_rate": 3.139900900632528e-07, "loss": 0.0317, "step": 954225 }, { "epoch": 9.38, "grad_norm": 0.07696878910064697, "learning_rate": 3.138659676090044e-07, "loss": 0.0121, "step": 954250 }, { "epoch": 9.38, "grad_norm": 2.0155277252197266, "learning_rate": 3.1374184515475586e-07, "loss": 0.0345, "step": 954275 }, { "epoch": 9.38, "grad_norm": 0.22888684272766113, "learning_rate": 3.1361772270050744e-07, "loss": 0.0124, "step": 954300 }, { "epoch": 9.38, "grad_norm": 1.0381745100021362, "learning_rate": 3.13493600246259e-07, "loss": 0.0212, "step": 954325 }, { "epoch": 9.38, "grad_norm": 6.717156887054443, "learning_rate": 3.133694777920105e-07, "loss": 0.013, "step": 954350 }, { "epoch": 9.38, "grad_norm": 0.004173118621110916, "learning_rate": 3.132453553377621e-07, "loss": 0.027, "step": 954375 }, { "epoch": 9.38, "grad_norm": 1.351952314376831, "learning_rate": 3.1312123288351355e-07, "loss": 0.0111, "step": 954400 }, { "epoch": 9.38, "grad_norm": 0.23321828246116638, "learning_rate": 3.1299711042926514e-07, "loss": 0.0319, "step": 954425 }, { "epoch": 9.38, "grad_norm": 15.30553150177002, "learning_rate": 3.128729879750166e-07, "loss": 0.014, "step": 954450 }, { "epoch": 9.38, "grad_norm": 0.0027712222654372454, "learning_rate": 3.127488655207682e-07, "loss": 0.0346, "step": 954475 }, { "epoch": 9.38, "grad_norm": 0.9685354828834534, "learning_rate": 3.126247430665198e-07, "loss": 0.0162, "step": 954500 }, { "epoch": 9.39, "grad_norm": 0.05466447398066521, "learning_rate": 3.1250062061227125e-07, "loss": 0.0474, "step": 954525 }, { "epoch": 9.39, "grad_norm": 0.25499168038368225, "learning_rate": 3.1237649815802283e-07, "loss": 0.0131, "step": 954550 }, { "epoch": 9.39, "grad_norm": 0.3058929443359375, "learning_rate": 3.1225237570377436e-07, "loss": 0.0162, "step": 954575 }, { "epoch": 9.39, "grad_norm": 0.2668909728527069, "learning_rate": 3.121282532495259e-07, "loss": 0.0172, "step": 954600 }, { "epoch": 9.39, "grad_norm": 0.019077396020293236, "learning_rate": 3.120041307952774e-07, "loss": 0.0362, "step": 954625 }, { "epoch": 9.39, "grad_norm": 5.401650428771973, "learning_rate": 3.1188000834102895e-07, "loss": 0.02, "step": 954650 }, { "epoch": 9.39, "grad_norm": 0.03524632379412651, "learning_rate": 3.117558858867805e-07, "loss": 0.0337, "step": 954675 }, { "epoch": 9.39, "grad_norm": 4.222387790679932, "learning_rate": 3.11631763432532e-07, "loss": 0.0144, "step": 954700 }, { "epoch": 9.39, "grad_norm": 0.13855984807014465, "learning_rate": 3.115076409782836e-07, "loss": 0.0361, "step": 954725 }, { "epoch": 9.39, "grad_norm": 4.475503444671631, "learning_rate": 3.113835185240351e-07, "loss": 0.0191, "step": 954750 }, { "epoch": 9.39, "grad_norm": 0.029923945665359497, "learning_rate": 3.1125939606978665e-07, "loss": 0.0257, "step": 954775 }, { "epoch": 9.39, "grad_norm": 9.914495468139648, "learning_rate": 3.111352736155382e-07, "loss": 0.0157, "step": 954800 }, { "epoch": 9.39, "grad_norm": 0.23657919466495514, "learning_rate": 3.110111511612897e-07, "loss": 0.0333, "step": 954825 }, { "epoch": 9.39, "grad_norm": 0.6961385011672974, "learning_rate": 3.1088702870704124e-07, "loss": 0.0071, "step": 954850 }, { "epoch": 9.39, "grad_norm": 1.0386948585510254, "learning_rate": 3.1076290625279277e-07, "loss": 0.0444, "step": 954875 }, { "epoch": 9.39, "grad_norm": 2.8368425369262695, "learning_rate": 3.106387837985443e-07, "loss": 0.0081, "step": 954900 }, { "epoch": 9.39, "grad_norm": 1.2628997564315796, "learning_rate": 3.105146613442958e-07, "loss": 0.0498, "step": 954925 }, { "epoch": 9.39, "grad_norm": 0.9117976427078247, "learning_rate": 3.1039053889004735e-07, "loss": 0.0076, "step": 954950 }, { "epoch": 9.39, "grad_norm": 0.047620922327041626, "learning_rate": 3.1026641643579894e-07, "loss": 0.05, "step": 954975 }, { "epoch": 9.39, "grad_norm": 0.7768259644508362, "learning_rate": 3.1014229398155046e-07, "loss": 0.0182, "step": 955000 }, { "epoch": 9.39, "grad_norm": 4.067888259887695, "learning_rate": 3.10018171527302e-07, "loss": 0.0395, "step": 955025 }, { "epoch": 9.39, "grad_norm": 5.388175010681152, "learning_rate": 3.098940490730535e-07, "loss": 0.0121, "step": 955050 }, { "epoch": 9.39, "grad_norm": 0.11614956706762314, "learning_rate": 3.0976992661880505e-07, "loss": 0.033, "step": 955075 }, { "epoch": 9.39, "grad_norm": 0.10840992629528046, "learning_rate": 3.0964580416455663e-07, "loss": 0.0067, "step": 955100 }, { "epoch": 9.39, "grad_norm": 0.23176880180835724, "learning_rate": 3.0952168171030816e-07, "loss": 0.0435, "step": 955125 }, { "epoch": 9.39, "grad_norm": 2.0635337829589844, "learning_rate": 3.093975592560597e-07, "loss": 0.0224, "step": 955150 }, { "epoch": 9.39, "grad_norm": 0.16774365305900574, "learning_rate": 3.092734368018112e-07, "loss": 0.0549, "step": 955175 }, { "epoch": 9.39, "grad_norm": 10.620437622070312, "learning_rate": 3.0914931434756275e-07, "loss": 0.0238, "step": 955200 }, { "epoch": 9.39, "grad_norm": 0.026389354839920998, "learning_rate": 3.090251918933143e-07, "loss": 0.0491, "step": 955225 }, { "epoch": 9.39, "grad_norm": 13.002337455749512, "learning_rate": 3.0890106943906586e-07, "loss": 0.0229, "step": 955250 }, { "epoch": 9.39, "grad_norm": 0.07290463149547577, "learning_rate": 3.087769469848174e-07, "loss": 0.0267, "step": 955275 }, { "epoch": 9.39, "grad_norm": 12.50668716430664, "learning_rate": 3.086528245305689e-07, "loss": 0.0174, "step": 955300 }, { "epoch": 9.39, "grad_norm": 0.008874636143445969, "learning_rate": 3.0852870207632045e-07, "loss": 0.0283, "step": 955325 }, { "epoch": 9.39, "grad_norm": 9.114786148071289, "learning_rate": 3.08404579622072e-07, "loss": 0.0231, "step": 955350 }, { "epoch": 9.39, "grad_norm": 0.025807633996009827, "learning_rate": 3.082804571678235e-07, "loss": 0.0191, "step": 955375 }, { "epoch": 9.39, "grad_norm": 4.776920795440674, "learning_rate": 3.0815633471357504e-07, "loss": 0.0097, "step": 955400 }, { "epoch": 9.39, "grad_norm": 0.6419570446014404, "learning_rate": 3.080322122593266e-07, "loss": 0.0464, "step": 955425 }, { "epoch": 9.39, "grad_norm": 3.2400403022766113, "learning_rate": 3.0790808980507815e-07, "loss": 0.0114, "step": 955450 }, { "epoch": 9.39, "grad_norm": 0.21900388598442078, "learning_rate": 3.077839673508297e-07, "loss": 0.0215, "step": 955475 }, { "epoch": 9.39, "grad_norm": 9.546359062194824, "learning_rate": 3.076598448965812e-07, "loss": 0.0136, "step": 955500 }, { "epoch": 9.39, "grad_norm": 0.09044366329908371, "learning_rate": 3.0753572244233273e-07, "loss": 0.046, "step": 955525 }, { "epoch": 9.4, "grad_norm": 14.196621894836426, "learning_rate": 3.0741159998808426e-07, "loss": 0.0215, "step": 955550 }, { "epoch": 9.4, "grad_norm": 0.59256511926651, "learning_rate": 3.072874775338358e-07, "loss": 0.0382, "step": 955575 }, { "epoch": 9.4, "grad_norm": 1.017160415649414, "learning_rate": 3.071633550795873e-07, "loss": 0.0129, "step": 955600 }, { "epoch": 9.4, "grad_norm": 0.006037218030542135, "learning_rate": 3.0703923262533885e-07, "loss": 0.0336, "step": 955625 }, { "epoch": 9.4, "grad_norm": 4.443064212799072, "learning_rate": 3.069151101710904e-07, "loss": 0.0121, "step": 955650 }, { "epoch": 9.4, "grad_norm": 0.4328444004058838, "learning_rate": 3.0679098771684196e-07, "loss": 0.0283, "step": 955675 }, { "epoch": 9.4, "grad_norm": 5.543866157531738, "learning_rate": 3.066668652625935e-07, "loss": 0.0095, "step": 955700 }, { "epoch": 9.4, "grad_norm": 0.03964169695973396, "learning_rate": 3.06542742808345e-07, "loss": 0.0395, "step": 955725 }, { "epoch": 9.4, "grad_norm": 12.617700576782227, "learning_rate": 3.0641862035409655e-07, "loss": 0.0345, "step": 955750 }, { "epoch": 9.4, "grad_norm": 1.0080585479736328, "learning_rate": 3.062944978998481e-07, "loss": 0.0292, "step": 955775 }, { "epoch": 9.4, "grad_norm": 3.4052748680114746, "learning_rate": 3.061703754455996e-07, "loss": 0.0115, "step": 955800 }, { "epoch": 9.4, "grad_norm": 0.0030566181521862745, "learning_rate": 3.060462529913512e-07, "loss": 0.0374, "step": 955825 }, { "epoch": 9.4, "grad_norm": 0.2235918641090393, "learning_rate": 3.059221305371027e-07, "loss": 0.0145, "step": 955850 }, { "epoch": 9.4, "grad_norm": 0.019286513328552246, "learning_rate": 3.0579800808285425e-07, "loss": 0.0269, "step": 955875 }, { "epoch": 9.4, "grad_norm": 0.5792203545570374, "learning_rate": 3.056738856286058e-07, "loss": 0.0092, "step": 955900 }, { "epoch": 9.4, "grad_norm": 0.8230485320091248, "learning_rate": 3.055497631743573e-07, "loss": 0.0278, "step": 955925 }, { "epoch": 9.4, "grad_norm": 9.234188079833984, "learning_rate": 3.054256407201089e-07, "loss": 0.0129, "step": 955950 }, { "epoch": 9.4, "grad_norm": 1.7499101161956787, "learning_rate": 3.053015182658604e-07, "loss": 0.048, "step": 955975 }, { "epoch": 9.4, "grad_norm": 1.1791918277740479, "learning_rate": 3.0517739581161195e-07, "loss": 0.0117, "step": 956000 }, { "epoch": 9.4, "grad_norm": 1.2826817035675049, "learning_rate": 3.050532733573635e-07, "loss": 0.0537, "step": 956025 }, { "epoch": 9.4, "grad_norm": 7.799938678741455, "learning_rate": 3.04929150903115e-07, "loss": 0.0228, "step": 956050 }, { "epoch": 9.4, "grad_norm": 1.3474235534667969, "learning_rate": 3.0480502844886653e-07, "loss": 0.0322, "step": 956075 }, { "epoch": 9.4, "grad_norm": 0.21408464014530182, "learning_rate": 3.0468090599461806e-07, "loss": 0.0121, "step": 956100 }, { "epoch": 9.4, "grad_norm": 0.7626672387123108, "learning_rate": 3.0455678354036964e-07, "loss": 0.0434, "step": 956125 }, { "epoch": 9.4, "grad_norm": 6.8184003829956055, "learning_rate": 3.0443266108612117e-07, "loss": 0.0114, "step": 956150 }, { "epoch": 9.4, "grad_norm": 0.017319021746516228, "learning_rate": 3.043085386318727e-07, "loss": 0.0422, "step": 956175 }, { "epoch": 9.4, "grad_norm": 9.515054702758789, "learning_rate": 3.0418441617762423e-07, "loss": 0.0089, "step": 956200 }, { "epoch": 9.4, "grad_norm": 0.028133446350693703, "learning_rate": 3.0406029372337576e-07, "loss": 0.031, "step": 956225 }, { "epoch": 9.4, "grad_norm": 1.2690836191177368, "learning_rate": 3.039361712691273e-07, "loss": 0.0129, "step": 956250 }, { "epoch": 9.4, "grad_norm": 0.09019230306148529, "learning_rate": 3.038120488148788e-07, "loss": 0.0289, "step": 956275 }, { "epoch": 9.4, "grad_norm": 0.2641116678714752, "learning_rate": 3.0368792636063035e-07, "loss": 0.0118, "step": 956300 }, { "epoch": 9.4, "grad_norm": 0.1378825455904007, "learning_rate": 3.035638039063819e-07, "loss": 0.0285, "step": 956325 }, { "epoch": 9.4, "grad_norm": 3.7821199893951416, "learning_rate": 3.034396814521334e-07, "loss": 0.0097, "step": 956350 }, { "epoch": 9.4, "grad_norm": 0.9928525686264038, "learning_rate": 3.03315558997885e-07, "loss": 0.0474, "step": 956375 }, { "epoch": 9.4, "grad_norm": 0.6565828919410706, "learning_rate": 3.031914365436365e-07, "loss": 0.0231, "step": 956400 }, { "epoch": 9.4, "grad_norm": 0.19808126986026764, "learning_rate": 3.0306731408938805e-07, "loss": 0.0462, "step": 956425 }, { "epoch": 9.4, "grad_norm": 26.356895446777344, "learning_rate": 3.029431916351396e-07, "loss": 0.0336, "step": 956450 }, { "epoch": 9.4, "grad_norm": 0.08249049633741379, "learning_rate": 3.028190691808911e-07, "loss": 0.0147, "step": 956475 }, { "epoch": 9.4, "grad_norm": 4.226415634155273, "learning_rate": 3.0269494672664263e-07, "loss": 0.0074, "step": 956500 }, { "epoch": 9.4, "grad_norm": 1.627602458000183, "learning_rate": 3.025708242723942e-07, "loss": 0.0313, "step": 956525 }, { "epoch": 9.4, "grad_norm": 0.19311052560806274, "learning_rate": 3.0244670181814574e-07, "loss": 0.0071, "step": 956550 }, { "epoch": 9.41, "grad_norm": 0.04298614710569382, "learning_rate": 3.023225793638973e-07, "loss": 0.0353, "step": 956575 }, { "epoch": 9.41, "grad_norm": 1.5474876165390015, "learning_rate": 3.021984569096488e-07, "loss": 0.0148, "step": 956600 }, { "epoch": 9.41, "grad_norm": 0.11912810802459717, "learning_rate": 3.0207433445540033e-07, "loss": 0.0264, "step": 956625 }, { "epoch": 9.41, "grad_norm": 1.7746700048446655, "learning_rate": 3.019502120011519e-07, "loss": 0.0163, "step": 956650 }, { "epoch": 9.41, "grad_norm": 0.17858292162418365, "learning_rate": 3.0182608954690344e-07, "loss": 0.0453, "step": 956675 }, { "epoch": 9.41, "grad_norm": 7.348031997680664, "learning_rate": 3.0170196709265497e-07, "loss": 0.0073, "step": 956700 }, { "epoch": 9.41, "grad_norm": 0.5206039547920227, "learning_rate": 3.015778446384065e-07, "loss": 0.0243, "step": 956725 }, { "epoch": 9.41, "grad_norm": 11.757801055908203, "learning_rate": 3.0145372218415803e-07, "loss": 0.0113, "step": 956750 }, { "epoch": 9.41, "grad_norm": 0.0909140408039093, "learning_rate": 3.0132959972990956e-07, "loss": 0.0277, "step": 956775 }, { "epoch": 9.41, "grad_norm": 12.884042739868164, "learning_rate": 3.0120547727566114e-07, "loss": 0.0109, "step": 956800 }, { "epoch": 9.41, "grad_norm": 0.00842992216348648, "learning_rate": 3.0108135482141267e-07, "loss": 0.0566, "step": 956825 }, { "epoch": 9.41, "grad_norm": 10.6412992477417, "learning_rate": 3.009572323671642e-07, "loss": 0.0149, "step": 956850 }, { "epoch": 9.41, "grad_norm": 0.10553106665611267, "learning_rate": 3.0083310991291573e-07, "loss": 0.0582, "step": 956875 }, { "epoch": 9.41, "grad_norm": 1.6615673303604126, "learning_rate": 3.0070898745866726e-07, "loss": 0.0188, "step": 956900 }, { "epoch": 9.41, "grad_norm": 1.5073918104171753, "learning_rate": 3.005848650044188e-07, "loss": 0.0374, "step": 956925 }, { "epoch": 9.41, "grad_norm": 0.3233679533004761, "learning_rate": 3.004607425501703e-07, "loss": 0.0093, "step": 956950 }, { "epoch": 9.41, "grad_norm": 1.344009518623352, "learning_rate": 3.0033662009592185e-07, "loss": 0.0338, "step": 956975 }, { "epoch": 9.41, "grad_norm": 5.049992084503174, "learning_rate": 3.002124976416734e-07, "loss": 0.0059, "step": 957000 }, { "epoch": 9.41, "grad_norm": 0.04377035051584244, "learning_rate": 3.000883751874249e-07, "loss": 0.0316, "step": 957025 }, { "epoch": 9.41, "grad_norm": 0.03721374273300171, "learning_rate": 2.9996425273317643e-07, "loss": 0.0263, "step": 957050 }, { "epoch": 9.41, "grad_norm": 1.475028395652771, "learning_rate": 2.99840130278928e-07, "loss": 0.0435, "step": 957075 }, { "epoch": 9.41, "grad_norm": 1.2352207899093628, "learning_rate": 2.9971600782467954e-07, "loss": 0.0216, "step": 957100 }, { "epoch": 9.41, "grad_norm": 15.331318855285645, "learning_rate": 2.9959685026860105e-07, "loss": 0.0242, "step": 957125 }, { "epoch": 9.41, "grad_norm": 0.11749432235956192, "learning_rate": 2.994727278143526e-07, "loss": 0.0123, "step": 957150 }, { "epoch": 9.41, "grad_norm": 0.16956306993961334, "learning_rate": 2.993486053601041e-07, "loss": 0.0286, "step": 957175 }, { "epoch": 9.41, "grad_norm": 1.138597011566162, "learning_rate": 2.9922448290585563e-07, "loss": 0.0084, "step": 957200 }, { "epoch": 9.41, "grad_norm": 1.6908302307128906, "learning_rate": 2.9910036045160716e-07, "loss": 0.0448, "step": 957225 }, { "epoch": 9.41, "grad_norm": 0.214660182595253, "learning_rate": 2.989762379973587e-07, "loss": 0.0085, "step": 957250 }, { "epoch": 9.41, "grad_norm": 0.28679969906806946, "learning_rate": 2.988521155431103e-07, "loss": 0.0223, "step": 957275 }, { "epoch": 9.41, "grad_norm": 6.225523948669434, "learning_rate": 2.987279930888618e-07, "loss": 0.0127, "step": 957300 }, { "epoch": 9.41, "grad_norm": 2.954655408859253, "learning_rate": 2.9860387063461333e-07, "loss": 0.0574, "step": 957325 }, { "epoch": 9.41, "grad_norm": 9.841652870178223, "learning_rate": 2.9847974818036486e-07, "loss": 0.0196, "step": 957350 }, { "epoch": 9.41, "grad_norm": 0.047407086938619614, "learning_rate": 2.983556257261164e-07, "loss": 0.0396, "step": 957375 }, { "epoch": 9.41, "grad_norm": 4.0276031494140625, "learning_rate": 2.982315032718679e-07, "loss": 0.0048, "step": 957400 }, { "epoch": 9.41, "grad_norm": 0.8457573652267456, "learning_rate": 2.9810738081761945e-07, "loss": 0.0291, "step": 957425 }, { "epoch": 9.41, "grad_norm": 10.66606616973877, "learning_rate": 2.97983258363371e-07, "loss": 0.0146, "step": 957450 }, { "epoch": 9.41, "grad_norm": 0.09697559475898743, "learning_rate": 2.978591359091225e-07, "loss": 0.0441, "step": 957475 }, { "epoch": 9.41, "grad_norm": 0.6219040751457214, "learning_rate": 2.9773501345487404e-07, "loss": 0.0094, "step": 957500 }, { "epoch": 9.41, "grad_norm": 0.11948429048061371, "learning_rate": 2.9761089100062557e-07, "loss": 0.0286, "step": 957525 }, { "epoch": 9.41, "grad_norm": 7.934668064117432, "learning_rate": 2.9748676854637715e-07, "loss": 0.0076, "step": 957550 }, { "epoch": 9.42, "grad_norm": 0.05559762939810753, "learning_rate": 2.973626460921287e-07, "loss": 0.0367, "step": 957575 }, { "epoch": 9.42, "grad_norm": 0.5725009441375732, "learning_rate": 2.972385236378802e-07, "loss": 0.0115, "step": 957600 }, { "epoch": 9.42, "grad_norm": 0.01652444712817669, "learning_rate": 2.9711440118363174e-07, "loss": 0.0342, "step": 957625 }, { "epoch": 9.42, "grad_norm": 7.980188846588135, "learning_rate": 2.9699027872938326e-07, "loss": 0.0128, "step": 957650 }, { "epoch": 9.42, "grad_norm": 0.8345174193382263, "learning_rate": 2.968661562751348e-07, "loss": 0.0444, "step": 957675 }, { "epoch": 9.42, "grad_norm": 0.844771146774292, "learning_rate": 2.967420338208864e-07, "loss": 0.0158, "step": 957700 }, { "epoch": 9.42, "grad_norm": 0.12024911493062973, "learning_rate": 2.966179113666379e-07, "loss": 0.0366, "step": 957725 }, { "epoch": 9.42, "grad_norm": 1.105852484703064, "learning_rate": 2.9649378891238943e-07, "loss": 0.0075, "step": 957750 }, { "epoch": 9.42, "grad_norm": 0.38834309577941895, "learning_rate": 2.9636966645814096e-07, "loss": 0.0321, "step": 957775 }, { "epoch": 9.42, "grad_norm": 15.405080795288086, "learning_rate": 2.962455440038925e-07, "loss": 0.0139, "step": 957800 }, { "epoch": 9.42, "grad_norm": 1.6989792585372925, "learning_rate": 2.96121421549644e-07, "loss": 0.0375, "step": 957825 }, { "epoch": 9.42, "grad_norm": 7.765219688415527, "learning_rate": 2.959972990953956e-07, "loss": 0.006, "step": 957850 }, { "epoch": 9.42, "grad_norm": 1.2526068687438965, "learning_rate": 2.9587317664114713e-07, "loss": 0.0468, "step": 957875 }, { "epoch": 9.42, "grad_norm": 8.956790924072266, "learning_rate": 2.9574905418689866e-07, "loss": 0.0145, "step": 957900 }, { "epoch": 9.42, "grad_norm": 1.1858254671096802, "learning_rate": 2.956249317326502e-07, "loss": 0.0528, "step": 957925 }, { "epoch": 9.42, "grad_norm": 0.8651218414306641, "learning_rate": 2.955008092784017e-07, "loss": 0.0141, "step": 957950 }, { "epoch": 9.42, "grad_norm": 5.3938446044921875, "learning_rate": 2.953766868241533e-07, "loss": 0.0354, "step": 957975 }, { "epoch": 9.42, "grad_norm": 0.32103121280670166, "learning_rate": 2.9525256436990483e-07, "loss": 0.0149, "step": 958000 }, { "epoch": 9.42, "grad_norm": 0.021267149597406387, "learning_rate": 2.9512844191565636e-07, "loss": 0.0289, "step": 958025 }, { "epoch": 9.42, "grad_norm": 7.954353332519531, "learning_rate": 2.950043194614079e-07, "loss": 0.0091, "step": 958050 }, { "epoch": 9.42, "grad_norm": 0.17484821379184723, "learning_rate": 2.948801970071594e-07, "loss": 0.0218, "step": 958075 }, { "epoch": 9.42, "grad_norm": 2.6743812561035156, "learning_rate": 2.9475607455291095e-07, "loss": 0.017, "step": 958100 }, { "epoch": 9.42, "grad_norm": 0.014203120954334736, "learning_rate": 2.946319520986625e-07, "loss": 0.0384, "step": 958125 }, { "epoch": 9.42, "grad_norm": 0.7481526136398315, "learning_rate": 2.94507829644414e-07, "loss": 0.0168, "step": 958150 }, { "epoch": 9.42, "grad_norm": 0.0020671102683991194, "learning_rate": 2.9438370719016553e-07, "loss": 0.0369, "step": 958175 }, { "epoch": 9.42, "grad_norm": 4.086062431335449, "learning_rate": 2.9425958473591706e-07, "loss": 0.0133, "step": 958200 }, { "epoch": 9.42, "grad_norm": 0.006892126053571701, "learning_rate": 2.941354622816686e-07, "loss": 0.0425, "step": 958225 }, { "epoch": 9.42, "grad_norm": 16.270414352416992, "learning_rate": 2.940113398274202e-07, "loss": 0.0122, "step": 958250 }, { "epoch": 9.42, "grad_norm": 0.011495258659124374, "learning_rate": 2.938872173731717e-07, "loss": 0.0405, "step": 958275 }, { "epoch": 9.42, "grad_norm": 12.939800262451172, "learning_rate": 2.9376309491892323e-07, "loss": 0.0186, "step": 958300 }, { "epoch": 9.42, "grad_norm": 0.029237277805805206, "learning_rate": 2.9363897246467476e-07, "loss": 0.034, "step": 958325 }, { "epoch": 9.42, "grad_norm": 0.18150806427001953, "learning_rate": 2.935148500104263e-07, "loss": 0.0148, "step": 958350 }, { "epoch": 9.42, "grad_norm": 1.507672905921936, "learning_rate": 2.933907275561778e-07, "loss": 0.0366, "step": 958375 }, { "epoch": 9.42, "grad_norm": 8.410821914672852, "learning_rate": 2.932666051019294e-07, "loss": 0.0248, "step": 958400 }, { "epoch": 9.42, "grad_norm": 0.02637970633804798, "learning_rate": 2.9314248264768093e-07, "loss": 0.049, "step": 958425 }, { "epoch": 9.42, "grad_norm": 1.9500186443328857, "learning_rate": 2.9301836019343246e-07, "loss": 0.01, "step": 958450 }, { "epoch": 9.42, "grad_norm": 0.016215350478887558, "learning_rate": 2.92894237739184e-07, "loss": 0.0362, "step": 958475 }, { "epoch": 9.42, "grad_norm": 3.2113616466522217, "learning_rate": 2.927701152849355e-07, "loss": 0.0222, "step": 958500 }, { "epoch": 9.42, "grad_norm": 0.03951490297913551, "learning_rate": 2.9264599283068705e-07, "loss": 0.0398, "step": 958525 }, { "epoch": 9.42, "grad_norm": 13.874244689941406, "learning_rate": 2.9252187037643863e-07, "loss": 0.0167, "step": 958550 }, { "epoch": 9.42, "grad_norm": 3.278963327407837, "learning_rate": 2.9239774792219016e-07, "loss": 0.0376, "step": 958575 }, { "epoch": 9.43, "grad_norm": 2.3477487564086914, "learning_rate": 2.922736254679417e-07, "loss": 0.0124, "step": 958600 }, { "epoch": 9.43, "grad_norm": 5.803061485290527, "learning_rate": 2.921495030136932e-07, "loss": 0.0358, "step": 958625 }, { "epoch": 9.43, "grad_norm": 1.3977712392807007, "learning_rate": 2.9202538055944475e-07, "loss": 0.012, "step": 958650 }, { "epoch": 9.43, "grad_norm": 0.014128729701042175, "learning_rate": 2.9190125810519633e-07, "loss": 0.0511, "step": 958675 }, { "epoch": 9.43, "grad_norm": 1.4696698188781738, "learning_rate": 2.9177713565094786e-07, "loss": 0.0084, "step": 958700 }, { "epoch": 9.43, "grad_norm": 3.683591842651367, "learning_rate": 2.916530131966994e-07, "loss": 0.0318, "step": 958725 }, { "epoch": 9.43, "grad_norm": 7.4423723220825195, "learning_rate": 2.915288907424509e-07, "loss": 0.0145, "step": 958750 }, { "epoch": 9.43, "grad_norm": 0.1778317391872406, "learning_rate": 2.9140476828820244e-07, "loss": 0.0347, "step": 958775 }, { "epoch": 9.43, "grad_norm": 10.520795822143555, "learning_rate": 2.9128064583395397e-07, "loss": 0.0125, "step": 958800 }, { "epoch": 9.43, "grad_norm": 0.011812608689069748, "learning_rate": 2.911565233797055e-07, "loss": 0.0418, "step": 958825 }, { "epoch": 9.43, "grad_norm": 0.5775943398475647, "learning_rate": 2.9103240092545703e-07, "loss": 0.0171, "step": 958850 }, { "epoch": 9.43, "grad_norm": 0.015906479209661484, "learning_rate": 2.9090827847120856e-07, "loss": 0.0299, "step": 958875 }, { "epoch": 9.43, "grad_norm": 14.72159194946289, "learning_rate": 2.907841560169601e-07, "loss": 0.0199, "step": 958900 }, { "epoch": 9.43, "grad_norm": 0.18586666882038116, "learning_rate": 2.906600335627116e-07, "loss": 0.0405, "step": 958925 }, { "epoch": 9.43, "grad_norm": 0.035240087658166885, "learning_rate": 2.9053591110846315e-07, "loss": 0.0134, "step": 958950 }, { "epoch": 9.43, "grad_norm": 8.794370651245117, "learning_rate": 2.9041178865421473e-07, "loss": 0.0276, "step": 958975 }, { "epoch": 9.43, "grad_norm": 12.950468063354492, "learning_rate": 2.9028766619996626e-07, "loss": 0.015, "step": 959000 }, { "epoch": 9.43, "grad_norm": 0.07621926814317703, "learning_rate": 2.901635437457178e-07, "loss": 0.0519, "step": 959025 }, { "epoch": 9.43, "grad_norm": 1.749876856803894, "learning_rate": 2.900394212914693e-07, "loss": 0.0032, "step": 959050 }, { "epoch": 9.43, "grad_norm": 5.848194599151611, "learning_rate": 2.8991529883722085e-07, "loss": 0.0375, "step": 959075 }, { "epoch": 9.43, "grad_norm": 4.815779209136963, "learning_rate": 2.8979117638297243e-07, "loss": 0.012, "step": 959100 }, { "epoch": 9.43, "grad_norm": 0.014363212510943413, "learning_rate": 2.8966705392872396e-07, "loss": 0.0283, "step": 959125 }, { "epoch": 9.43, "grad_norm": 5.70617151260376, "learning_rate": 2.895429314744755e-07, "loss": 0.0152, "step": 959150 }, { "epoch": 9.43, "grad_norm": 2.2932016849517822, "learning_rate": 2.89418809020227e-07, "loss": 0.0295, "step": 959175 }, { "epoch": 9.43, "grad_norm": 0.3097531497478485, "learning_rate": 2.8929468656597854e-07, "loss": 0.0152, "step": 959200 }, { "epoch": 9.43, "grad_norm": 0.009752373211085796, "learning_rate": 2.891705641117301e-07, "loss": 0.0542, "step": 959225 }, { "epoch": 9.43, "grad_norm": 1.1240040063858032, "learning_rate": 2.8904644165748166e-07, "loss": 0.0059, "step": 959250 }, { "epoch": 9.43, "grad_norm": 0.4649278521537781, "learning_rate": 2.889223192032332e-07, "loss": 0.0332, "step": 959275 }, { "epoch": 9.43, "grad_norm": 7.903885364532471, "learning_rate": 2.887981967489847e-07, "loss": 0.0097, "step": 959300 }, { "epoch": 9.43, "grad_norm": 3.4379570484161377, "learning_rate": 2.8867407429473624e-07, "loss": 0.0518, "step": 959325 }, { "epoch": 9.43, "grad_norm": 3.2911128997802734, "learning_rate": 2.8854995184048777e-07, "loss": 0.0127, "step": 959350 }, { "epoch": 9.43, "grad_norm": 0.031329382210969925, "learning_rate": 2.884258293862393e-07, "loss": 0.0168, "step": 959375 }, { "epoch": 9.43, "grad_norm": 2.9717416763305664, "learning_rate": 2.883017069319909e-07, "loss": 0.01, "step": 959400 }, { "epoch": 9.43, "grad_norm": 0.07355920225381851, "learning_rate": 2.881825493759123e-07, "loss": 0.0284, "step": 959425 }, { "epoch": 9.43, "grad_norm": 1.17366623878479, "learning_rate": 2.8805842692166386e-07, "loss": 0.0325, "step": 959450 }, { "epoch": 9.43, "grad_norm": 0.055700164288282394, "learning_rate": 2.879343044674154e-07, "loss": 0.0581, "step": 959475 }, { "epoch": 9.43, "grad_norm": 0.4142114222049713, "learning_rate": 2.878101820131669e-07, "loss": 0.0133, "step": 959500 }, { "epoch": 9.43, "grad_norm": 4.1658034324646, "learning_rate": 2.8768605955891845e-07, "loss": 0.0404, "step": 959525 }, { "epoch": 9.43, "grad_norm": 4.291421413421631, "learning_rate": 2.8756193710467e-07, "loss": 0.013, "step": 959550 }, { "epoch": 9.43, "grad_norm": 0.1637166291475296, "learning_rate": 2.8743781465042156e-07, "loss": 0.0333, "step": 959575 }, { "epoch": 9.43, "grad_norm": 6.37988805770874, "learning_rate": 2.873136921961731e-07, "loss": 0.0112, "step": 959600 }, { "epoch": 9.44, "grad_norm": 0.029660452157258987, "learning_rate": 2.871895697419246e-07, "loss": 0.0277, "step": 959625 }, { "epoch": 9.44, "grad_norm": 0.18298126757144928, "learning_rate": 2.8706544728767615e-07, "loss": 0.0248, "step": 959650 }, { "epoch": 9.44, "grad_norm": 1.5737266540527344, "learning_rate": 2.869413248334277e-07, "loss": 0.0336, "step": 959675 }, { "epoch": 9.44, "grad_norm": 3.9205684661865234, "learning_rate": 2.868172023791792e-07, "loss": 0.0269, "step": 959700 }, { "epoch": 9.44, "grad_norm": 6.378884792327881, "learning_rate": 2.866930799249308e-07, "loss": 0.0342, "step": 959725 }, { "epoch": 9.44, "grad_norm": 0.06728371977806091, "learning_rate": 2.865689574706823e-07, "loss": 0.0131, "step": 959750 }, { "epoch": 9.44, "grad_norm": 0.0062944176606833935, "learning_rate": 2.8644483501643385e-07, "loss": 0.0409, "step": 959775 }, { "epoch": 9.44, "grad_norm": 3.035922050476074, "learning_rate": 2.863207125621854e-07, "loss": 0.0161, "step": 959800 }, { "epoch": 9.44, "grad_norm": 0.20792478322982788, "learning_rate": 2.861965901079369e-07, "loss": 0.0316, "step": 959825 }, { "epoch": 9.44, "grad_norm": 25.182783126831055, "learning_rate": 2.8607246765368843e-07, "loss": 0.0079, "step": 959850 }, { "epoch": 9.44, "grad_norm": 0.23362460732460022, "learning_rate": 2.8594834519944e-07, "loss": 0.0204, "step": 959875 }, { "epoch": 9.44, "grad_norm": 5.675498962402344, "learning_rate": 2.8582422274519155e-07, "loss": 0.0159, "step": 959900 }, { "epoch": 9.44, "grad_norm": 1.2266263961791992, "learning_rate": 2.857001002909431e-07, "loss": 0.0125, "step": 959925 }, { "epoch": 9.44, "grad_norm": 2.5171499252319336, "learning_rate": 2.855759778366946e-07, "loss": 0.0101, "step": 959950 }, { "epoch": 9.44, "grad_norm": 0.012676992453634739, "learning_rate": 2.8545185538244613e-07, "loss": 0.025, "step": 959975 }, { "epoch": 9.44, "grad_norm": 9.530343055725098, "learning_rate": 2.8532773292819766e-07, "loss": 0.0177, "step": 960000 }, { "epoch": 9.44, "eval_loss": 0.9193033576011658, "eval_runtime": 6079.4698, "eval_samples_per_second": 1.557, "eval_steps_per_second": 0.195, "eval_wer": 0.10997553595570721, "step": 960000 }, { "epoch": 9.44, "grad_norm": 0.051895465701818466, "learning_rate": 2.852036104739492e-07, "loss": 0.0266, "step": 960025 }, { "epoch": 9.44, "grad_norm": 7.675142288208008, "learning_rate": 2.850794880197007e-07, "loss": 0.0128, "step": 960050 }, { "epoch": 9.44, "grad_norm": 0.19277550280094147, "learning_rate": 2.8495536556545225e-07, "loss": 0.0339, "step": 960075 }, { "epoch": 9.44, "grad_norm": 2.484936237335205, "learning_rate": 2.848312431112038e-07, "loss": 0.0266, "step": 960100 }, { "epoch": 9.44, "grad_norm": 0.0027053311932832003, "learning_rate": 2.847071206569553e-07, "loss": 0.0331, "step": 960125 }, { "epoch": 9.44, "grad_norm": 0.35569337010383606, "learning_rate": 2.845829982027069e-07, "loss": 0.0064, "step": 960150 }, { "epoch": 9.44, "grad_norm": 28.650827407836914, "learning_rate": 2.844588757484584e-07, "loss": 0.0547, "step": 960175 }, { "epoch": 9.44, "grad_norm": 0.265796035528183, "learning_rate": 2.8433475329420995e-07, "loss": 0.019, "step": 960200 }, { "epoch": 9.44, "grad_norm": 0.04895348846912384, "learning_rate": 2.842106308399615e-07, "loss": 0.0388, "step": 960225 }, { "epoch": 9.44, "grad_norm": 0.38744667172431946, "learning_rate": 2.84086508385713e-07, "loss": 0.013, "step": 960250 }, { "epoch": 9.44, "grad_norm": 0.0067055183462798595, "learning_rate": 2.839623859314646e-07, "loss": 0.0433, "step": 960275 }, { "epoch": 9.44, "grad_norm": 2.7974374294281006, "learning_rate": 2.838382634772161e-07, "loss": 0.0079, "step": 960300 }, { "epoch": 9.44, "grad_norm": 0.19299262762069702, "learning_rate": 2.8371414102296765e-07, "loss": 0.0364, "step": 960325 }, { "epoch": 9.44, "grad_norm": 0.4153870642185211, "learning_rate": 2.835900185687192e-07, "loss": 0.0229, "step": 960350 }, { "epoch": 9.44, "grad_norm": 1.06610107421875, "learning_rate": 2.834658961144707e-07, "loss": 0.0373, "step": 960375 }, { "epoch": 9.44, "grad_norm": 2.527674913406372, "learning_rate": 2.8334177366022223e-07, "loss": 0.0098, "step": 960400 }, { "epoch": 9.44, "grad_norm": 0.5798089504241943, "learning_rate": 2.832176512059738e-07, "loss": 0.0302, "step": 960425 }, { "epoch": 9.44, "grad_norm": 5.7980637550354, "learning_rate": 2.8309352875172534e-07, "loss": 0.017, "step": 960450 }, { "epoch": 9.44, "grad_norm": 0.15013785660266876, "learning_rate": 2.829694062974769e-07, "loss": 0.044, "step": 960475 }, { "epoch": 9.44, "grad_norm": 0.21700479090213776, "learning_rate": 2.828452838432284e-07, "loss": 0.0128, "step": 960500 }, { "epoch": 9.44, "grad_norm": 1.5320738554000854, "learning_rate": 2.8272116138897993e-07, "loss": 0.0428, "step": 960525 }, { "epoch": 9.44, "grad_norm": 0.4377392530441284, "learning_rate": 2.8259703893473146e-07, "loss": 0.0047, "step": 960550 }, { "epoch": 9.44, "grad_norm": 1.8876309394836426, "learning_rate": 2.8247291648048304e-07, "loss": 0.041, "step": 960575 }, { "epoch": 9.44, "grad_norm": 0.06512221693992615, "learning_rate": 2.8234879402623457e-07, "loss": 0.0232, "step": 960600 }, { "epoch": 9.45, "grad_norm": 0.020119983702898026, "learning_rate": 2.822246715719861e-07, "loss": 0.0258, "step": 960625 }, { "epoch": 9.45, "grad_norm": 0.8772450089454651, "learning_rate": 2.8210054911773763e-07, "loss": 0.0139, "step": 960650 }, { "epoch": 9.45, "grad_norm": 1.9407141208648682, "learning_rate": 2.8197642666348916e-07, "loss": 0.033, "step": 960675 }, { "epoch": 9.45, "grad_norm": 9.358431816101074, "learning_rate": 2.818523042092407e-07, "loss": 0.0315, "step": 960700 }, { "epoch": 9.45, "grad_norm": 0.07671339809894562, "learning_rate": 2.817281817549922e-07, "loss": 0.0502, "step": 960725 }, { "epoch": 9.45, "grad_norm": 8.662591934204102, "learning_rate": 2.8160405930074375e-07, "loss": 0.0121, "step": 960750 }, { "epoch": 9.45, "grad_norm": 0.021545808762311935, "learning_rate": 2.814799368464953e-07, "loss": 0.0375, "step": 960775 }, { "epoch": 9.45, "grad_norm": 1.8543909788131714, "learning_rate": 2.813558143922468e-07, "loss": 0.0242, "step": 960800 }, { "epoch": 9.45, "grad_norm": 0.041913289576768875, "learning_rate": 2.812316919379984e-07, "loss": 0.0206, "step": 960825 }, { "epoch": 9.45, "grad_norm": 0.1407485157251358, "learning_rate": 2.811075694837499e-07, "loss": 0.0087, "step": 960850 }, { "epoch": 9.45, "grad_norm": 0.13597139716148376, "learning_rate": 2.8098344702950145e-07, "loss": 0.0204, "step": 960875 }, { "epoch": 9.45, "grad_norm": 2.857957124710083, "learning_rate": 2.80859324575253e-07, "loss": 0.0216, "step": 960900 }, { "epoch": 9.45, "grad_norm": 2.5328502655029297, "learning_rate": 2.807352021210045e-07, "loss": 0.0259, "step": 960925 }, { "epoch": 9.45, "grad_norm": 0.31781259179115295, "learning_rate": 2.8061107966675603e-07, "loss": 0.0131, "step": 960950 }, { "epoch": 9.45, "grad_norm": 0.1335155963897705, "learning_rate": 2.8048695721250756e-07, "loss": 0.0249, "step": 960975 }, { "epoch": 9.45, "grad_norm": 5.012853145599365, "learning_rate": 2.8036283475825914e-07, "loss": 0.0132, "step": 961000 }, { "epoch": 9.45, "grad_norm": 0.042018625885248184, "learning_rate": 2.8023871230401067e-07, "loss": 0.0298, "step": 961025 }, { "epoch": 9.45, "grad_norm": 16.359960556030273, "learning_rate": 2.801145898497622e-07, "loss": 0.0256, "step": 961050 }, { "epoch": 9.45, "grad_norm": 1.2150921821594238, "learning_rate": 2.7999046739551373e-07, "loss": 0.0238, "step": 961075 }, { "epoch": 9.45, "grad_norm": 8.648064613342285, "learning_rate": 2.7986634494126526e-07, "loss": 0.016, "step": 961100 }, { "epoch": 9.45, "grad_norm": 0.005951727740466595, "learning_rate": 2.7974222248701684e-07, "loss": 0.0189, "step": 961125 }, { "epoch": 9.45, "grad_norm": 9.120759010314941, "learning_rate": 2.7961810003276837e-07, "loss": 0.0138, "step": 961150 }, { "epoch": 9.45, "grad_norm": 0.38462913036346436, "learning_rate": 2.794939775785199e-07, "loss": 0.0319, "step": 961175 }, { "epoch": 9.45, "grad_norm": 0.7165436148643494, "learning_rate": 2.7936985512427143e-07, "loss": 0.0033, "step": 961200 }, { "epoch": 9.45, "grad_norm": 0.03317315876483917, "learning_rate": 2.7924573267002296e-07, "loss": 0.0544, "step": 961225 }, { "epoch": 9.45, "grad_norm": 6.494649410247803, "learning_rate": 2.791216102157745e-07, "loss": 0.0234, "step": 961250 }, { "epoch": 9.45, "grad_norm": 0.012875108048319817, "learning_rate": 2.7899748776152607e-07, "loss": 0.0443, "step": 961275 }, { "epoch": 9.45, "grad_norm": 16.61625862121582, "learning_rate": 2.788733653072776e-07, "loss": 0.0099, "step": 961300 }, { "epoch": 9.45, "grad_norm": 0.15918606519699097, "learning_rate": 2.7874924285302913e-07, "loss": 0.0279, "step": 961325 }, { "epoch": 9.45, "grad_norm": 0.1245725080370903, "learning_rate": 2.7862512039878066e-07, "loss": 0.0087, "step": 961350 }, { "epoch": 9.45, "grad_norm": 17.94076156616211, "learning_rate": 2.785009979445322e-07, "loss": 0.0415, "step": 961375 }, { "epoch": 9.45, "grad_norm": 8.851072311401367, "learning_rate": 2.783768754902837e-07, "loss": 0.0258, "step": 961400 }, { "epoch": 9.45, "grad_norm": 0.14369995892047882, "learning_rate": 2.7825275303603524e-07, "loss": 0.0309, "step": 961425 }, { "epoch": 9.45, "grad_norm": 1.3428230285644531, "learning_rate": 2.7812863058178677e-07, "loss": 0.0212, "step": 961450 }, { "epoch": 9.45, "grad_norm": 0.3465293049812317, "learning_rate": 2.780045081275383e-07, "loss": 0.0522, "step": 961475 }, { "epoch": 9.45, "grad_norm": 0.42236557602882385, "learning_rate": 2.7788038567328983e-07, "loss": 0.0158, "step": 961500 }, { "epoch": 9.45, "grad_norm": 0.004108196124434471, "learning_rate": 2.777562632190414e-07, "loss": 0.0404, "step": 961525 }, { "epoch": 9.45, "grad_norm": 7.1783294677734375, "learning_rate": 2.7763214076479294e-07, "loss": 0.0262, "step": 961550 }, { "epoch": 9.45, "grad_norm": 1.2902486324310303, "learning_rate": 2.7750801831054447e-07, "loss": 0.0288, "step": 961575 }, { "epoch": 9.45, "grad_norm": 5.873608589172363, "learning_rate": 2.77383895856296e-07, "loss": 0.0197, "step": 961600 }, { "epoch": 9.45, "grad_norm": 0.005729170981794596, "learning_rate": 2.7725977340204753e-07, "loss": 0.0291, "step": 961625 }, { "epoch": 9.46, "grad_norm": 5.935834884643555, "learning_rate": 2.7713565094779906e-07, "loss": 0.0118, "step": 961650 }, { "epoch": 9.46, "grad_norm": 0.253415048122406, "learning_rate": 2.770115284935506e-07, "loss": 0.0352, "step": 961675 }, { "epoch": 9.46, "grad_norm": 0.03104841150343418, "learning_rate": 2.7688740603930217e-07, "loss": 0.0087, "step": 961700 }, { "epoch": 9.46, "grad_norm": 1.1335829496383667, "learning_rate": 2.767632835850537e-07, "loss": 0.0345, "step": 961725 }, { "epoch": 9.46, "grad_norm": 5.859738349914551, "learning_rate": 2.7663916113080523e-07, "loss": 0.0166, "step": 961750 }, { "epoch": 9.46, "grad_norm": 0.36184489727020264, "learning_rate": 2.7651503867655676e-07, "loss": 0.0589, "step": 961775 }, { "epoch": 9.46, "grad_norm": 4.320834636688232, "learning_rate": 2.763909162223083e-07, "loss": 0.0292, "step": 961800 }, { "epoch": 9.46, "grad_norm": 1.56934654712677, "learning_rate": 2.762667937680598e-07, "loss": 0.0322, "step": 961825 }, { "epoch": 9.46, "grad_norm": 1.2319029569625854, "learning_rate": 2.761426713138114e-07, "loss": 0.0069, "step": 961850 }, { "epoch": 9.46, "grad_norm": 0.03963048383593559, "learning_rate": 2.7602351375773285e-07, "loss": 0.051, "step": 961875 }, { "epoch": 9.46, "grad_norm": 1.5519335269927979, "learning_rate": 2.758993913034844e-07, "loss": 0.0216, "step": 961900 }, { "epoch": 9.46, "grad_norm": 0.06230510026216507, "learning_rate": 2.757752688492359e-07, "loss": 0.0313, "step": 961925 }, { "epoch": 9.46, "grad_norm": 2.3919286727905273, "learning_rate": 2.7565114639498744e-07, "loss": 0.0145, "step": 961950 }, { "epoch": 9.46, "grad_norm": 0.02170697972178459, "learning_rate": 2.7552702394073896e-07, "loss": 0.0269, "step": 961975 }, { "epoch": 9.46, "grad_norm": 0.6029036641120911, "learning_rate": 2.7540290148649055e-07, "loss": 0.0201, "step": 962000 }, { "epoch": 9.46, "grad_norm": 0.08722201734781265, "learning_rate": 2.752787790322421e-07, "loss": 0.0448, "step": 962025 }, { "epoch": 9.46, "grad_norm": 0.11911357939243317, "learning_rate": 2.751546565779936e-07, "loss": 0.0196, "step": 962050 }, { "epoch": 9.46, "grad_norm": 0.09256869554519653, "learning_rate": 2.7503053412374513e-07, "loss": 0.0429, "step": 962075 }, { "epoch": 9.46, "grad_norm": 13.018295288085938, "learning_rate": 2.7490641166949666e-07, "loss": 0.0168, "step": 962100 }, { "epoch": 9.46, "grad_norm": 0.010393338277935982, "learning_rate": 2.747822892152482e-07, "loss": 0.048, "step": 962125 }, { "epoch": 9.46, "grad_norm": 4.187787055969238, "learning_rate": 2.746581667609997e-07, "loss": 0.0148, "step": 962150 }, { "epoch": 9.46, "grad_norm": 0.8535890579223633, "learning_rate": 2.745340443067513e-07, "loss": 0.0416, "step": 962175 }, { "epoch": 9.46, "grad_norm": 2.6286559104919434, "learning_rate": 2.7440992185250283e-07, "loss": 0.0073, "step": 962200 }, { "epoch": 9.46, "grad_norm": 0.04169599711894989, "learning_rate": 2.7428579939825436e-07, "loss": 0.0326, "step": 962225 }, { "epoch": 9.46, "grad_norm": 0.4477292001247406, "learning_rate": 2.741616769440059e-07, "loss": 0.0099, "step": 962250 }, { "epoch": 9.46, "grad_norm": 0.12153717875480652, "learning_rate": 2.740375544897574e-07, "loss": 0.019, "step": 962275 }, { "epoch": 9.46, "grad_norm": 2.0809686183929443, "learning_rate": 2.7391343203550895e-07, "loss": 0.01, "step": 962300 }, { "epoch": 9.46, "grad_norm": 0.16066981852054596, "learning_rate": 2.7378930958126053e-07, "loss": 0.0285, "step": 962325 }, { "epoch": 9.46, "grad_norm": 0.3108617663383484, "learning_rate": 2.7366518712701206e-07, "loss": 0.0271, "step": 962350 }, { "epoch": 9.46, "grad_norm": 1.2371447086334229, "learning_rate": 2.735410646727636e-07, "loss": 0.0497, "step": 962375 }, { "epoch": 9.46, "grad_norm": 0.4321540892124176, "learning_rate": 2.734169422185151e-07, "loss": 0.0125, "step": 962400 }, { "epoch": 9.46, "grad_norm": 3.6049981117248535, "learning_rate": 2.7329281976426665e-07, "loss": 0.054, "step": 962425 }, { "epoch": 9.46, "grad_norm": 0.17089922726154327, "learning_rate": 2.7316869731001823e-07, "loss": 0.0144, "step": 962450 }, { "epoch": 9.46, "grad_norm": 0.026874352246522903, "learning_rate": 2.7304457485576976e-07, "loss": 0.028, "step": 962475 }, { "epoch": 9.46, "grad_norm": 0.015806570649147034, "learning_rate": 2.729204524015213e-07, "loss": 0.0073, "step": 962500 }, { "epoch": 9.46, "grad_norm": 0.17082010209560394, "learning_rate": 2.727963299472728e-07, "loss": 0.0341, "step": 962525 }, { "epoch": 9.46, "grad_norm": 6.309224605560303, "learning_rate": 2.7267220749302435e-07, "loss": 0.0196, "step": 962550 }, { "epoch": 9.46, "grad_norm": 0.08564393222332001, "learning_rate": 2.725480850387759e-07, "loss": 0.0395, "step": 962575 }, { "epoch": 9.46, "grad_norm": 5.273634433746338, "learning_rate": 2.724239625845274e-07, "loss": 0.0142, "step": 962600 }, { "epoch": 9.46, "grad_norm": 0.031099217012524605, "learning_rate": 2.7229984013027893e-07, "loss": 0.0381, "step": 962625 }, { "epoch": 9.46, "grad_norm": 1.6191790103912354, "learning_rate": 2.7217571767603046e-07, "loss": 0.0146, "step": 962650 }, { "epoch": 9.47, "grad_norm": 1.2788889408111572, "learning_rate": 2.7205159522178204e-07, "loss": 0.0468, "step": 962675 }, { "epoch": 9.47, "grad_norm": 0.47605645656585693, "learning_rate": 2.7192747276753357e-07, "loss": 0.0125, "step": 962700 }, { "epoch": 9.47, "grad_norm": 3.127192735671997, "learning_rate": 2.718033503132851e-07, "loss": 0.0367, "step": 962725 }, { "epoch": 9.47, "grad_norm": 0.9831407070159912, "learning_rate": 2.7167922785903663e-07, "loss": 0.0132, "step": 962750 }, { "epoch": 9.47, "grad_norm": 17.670774459838867, "learning_rate": 2.7155510540478816e-07, "loss": 0.0253, "step": 962775 }, { "epoch": 9.47, "grad_norm": 10.59614372253418, "learning_rate": 2.714309829505397e-07, "loss": 0.015, "step": 962800 }, { "epoch": 9.47, "grad_norm": 1.2128791809082031, "learning_rate": 2.713068604962912e-07, "loss": 0.0299, "step": 962825 }, { "epoch": 9.47, "grad_norm": 0.39259782433509827, "learning_rate": 2.7118273804204275e-07, "loss": 0.0142, "step": 962850 }, { "epoch": 9.47, "grad_norm": 0.05124971643090248, "learning_rate": 2.7105861558779433e-07, "loss": 0.0477, "step": 962875 }, { "epoch": 9.47, "grad_norm": 0.9948617219924927, "learning_rate": 2.7093449313354586e-07, "loss": 0.0142, "step": 962900 }, { "epoch": 9.47, "grad_norm": 2.1550393104553223, "learning_rate": 2.708103706792974e-07, "loss": 0.0308, "step": 962925 }, { "epoch": 9.47, "grad_norm": 9.24984073638916, "learning_rate": 2.706862482250489e-07, "loss": 0.0127, "step": 962950 }, { "epoch": 9.47, "grad_norm": 0.04861679673194885, "learning_rate": 2.7056212577080045e-07, "loss": 0.0164, "step": 962975 }, { "epoch": 9.47, "grad_norm": 3.940753936767578, "learning_rate": 2.70438003316552e-07, "loss": 0.0153, "step": 963000 }, { "epoch": 9.47, "grad_norm": 12.554147720336914, "learning_rate": 2.7031388086230356e-07, "loss": 0.0364, "step": 963025 }, { "epoch": 9.47, "grad_norm": 9.525437355041504, "learning_rate": 2.701897584080551e-07, "loss": 0.0182, "step": 963050 }, { "epoch": 9.47, "grad_norm": 1.3444920778274536, "learning_rate": 2.700656359538066e-07, "loss": 0.0317, "step": 963075 }, { "epoch": 9.47, "grad_norm": 0.3759630620479584, "learning_rate": 2.6994151349955814e-07, "loss": 0.0138, "step": 963100 }, { "epoch": 9.47, "grad_norm": 0.14699798822402954, "learning_rate": 2.698173910453097e-07, "loss": 0.0316, "step": 963125 }, { "epoch": 9.47, "grad_norm": 10.43281078338623, "learning_rate": 2.6969326859106126e-07, "loss": 0.0272, "step": 963150 }, { "epoch": 9.47, "grad_norm": 1.0408573150634766, "learning_rate": 2.695691461368128e-07, "loss": 0.0365, "step": 963175 }, { "epoch": 9.47, "grad_norm": 0.1926925778388977, "learning_rate": 2.694450236825643e-07, "loss": 0.0148, "step": 963200 }, { "epoch": 9.47, "grad_norm": 0.10017946362495422, "learning_rate": 2.6932090122831584e-07, "loss": 0.0362, "step": 963225 }, { "epoch": 9.47, "grad_norm": 3.045938014984131, "learning_rate": 2.6919677877406737e-07, "loss": 0.0243, "step": 963250 }, { "epoch": 9.47, "grad_norm": 11.117877960205078, "learning_rate": 2.690726563198189e-07, "loss": 0.0371, "step": 963275 }, { "epoch": 9.47, "grad_norm": 7.136343955993652, "learning_rate": 2.6894853386557043e-07, "loss": 0.0271, "step": 963300 }, { "epoch": 9.47, "grad_norm": 1.702500343322754, "learning_rate": 2.6882441141132196e-07, "loss": 0.0332, "step": 963325 }, { "epoch": 9.47, "grad_norm": 2.231311559677124, "learning_rate": 2.687002889570735e-07, "loss": 0.0073, "step": 963350 }, { "epoch": 9.47, "grad_norm": 8.495361328125, "learning_rate": 2.6857616650282507e-07, "loss": 0.0328, "step": 963375 }, { "epoch": 9.47, "grad_norm": 5.004312038421631, "learning_rate": 2.684520440485766e-07, "loss": 0.0057, "step": 963400 }, { "epoch": 9.47, "grad_norm": 0.060135237872600555, "learning_rate": 2.6832792159432813e-07, "loss": 0.0568, "step": 963425 }, { "epoch": 9.47, "grad_norm": 12.109265327453613, "learning_rate": 2.6820379914007966e-07, "loss": 0.011, "step": 963450 }, { "epoch": 9.47, "grad_norm": 0.35445496439933777, "learning_rate": 2.680796766858312e-07, "loss": 0.0375, "step": 963475 }, { "epoch": 9.47, "grad_norm": 0.23614032566547394, "learning_rate": 2.679555542315827e-07, "loss": 0.0078, "step": 963500 }, { "epoch": 9.47, "grad_norm": 15.681682586669922, "learning_rate": 2.6783143177733425e-07, "loss": 0.017, "step": 963525 }, { "epoch": 9.47, "grad_norm": 8.069317817687988, "learning_rate": 2.677073093230858e-07, "loss": 0.0178, "step": 963550 }, { "epoch": 9.47, "grad_norm": 0.938036322593689, "learning_rate": 2.6758318686883736e-07, "loss": 0.0344, "step": 963575 }, { "epoch": 9.47, "grad_norm": 7.485848426818848, "learning_rate": 2.674590644145889e-07, "loss": 0.0052, "step": 963600 }, { "epoch": 9.47, "grad_norm": 0.08508527278900146, "learning_rate": 2.673349419603404e-07, "loss": 0.0297, "step": 963625 }, { "epoch": 9.47, "grad_norm": 0.6723185777664185, "learning_rate": 2.6721081950609194e-07, "loss": 0.0119, "step": 963650 }, { "epoch": 9.48, "grad_norm": 0.02991749346256256, "learning_rate": 2.6708669705184347e-07, "loss": 0.0305, "step": 963675 }, { "epoch": 9.48, "grad_norm": 0.900649905204773, "learning_rate": 2.66962574597595e-07, "loss": 0.0116, "step": 963700 }, { "epoch": 9.48, "grad_norm": 0.01115771010518074, "learning_rate": 2.668384521433466e-07, "loss": 0.0433, "step": 963725 }, { "epoch": 9.48, "grad_norm": 3.561720132827759, "learning_rate": 2.667143296890981e-07, "loss": 0.0155, "step": 963750 }, { "epoch": 9.48, "grad_norm": 3.655174732208252, "learning_rate": 2.6659020723484964e-07, "loss": 0.0446, "step": 963775 }, { "epoch": 9.48, "grad_norm": 4.1539411544799805, "learning_rate": 2.6646608478060117e-07, "loss": 0.0194, "step": 963800 }, { "epoch": 9.48, "grad_norm": 11.18898868560791, "learning_rate": 2.663419623263527e-07, "loss": 0.0218, "step": 963825 }, { "epoch": 9.48, "grad_norm": 7.349545478820801, "learning_rate": 2.6621783987210423e-07, "loss": 0.0164, "step": 963850 }, { "epoch": 9.48, "grad_norm": 0.030700618401169777, "learning_rate": 2.660937174178558e-07, "loss": 0.0346, "step": 963875 }, { "epoch": 9.48, "grad_norm": 5.62591028213501, "learning_rate": 2.6596959496360734e-07, "loss": 0.0074, "step": 963900 }, { "epoch": 9.48, "grad_norm": 0.05152685567736626, "learning_rate": 2.6584547250935887e-07, "loss": 0.036, "step": 963925 }, { "epoch": 9.48, "grad_norm": 3.4011762142181396, "learning_rate": 2.657213500551104e-07, "loss": 0.0194, "step": 963950 }, { "epoch": 9.48, "grad_norm": 3.0502896308898926, "learning_rate": 2.6559722760086193e-07, "loss": 0.0284, "step": 963975 }, { "epoch": 9.48, "grad_norm": 2.3082120418548584, "learning_rate": 2.6547310514661346e-07, "loss": 0.0243, "step": 964000 }, { "epoch": 9.48, "grad_norm": 1.2917052507400513, "learning_rate": 2.65348982692365e-07, "loss": 0.0196, "step": 964025 }, { "epoch": 9.48, "grad_norm": 9.298259735107422, "learning_rate": 2.652248602381165e-07, "loss": 0.0086, "step": 964050 }, { "epoch": 9.48, "grad_norm": 1.6679472923278809, "learning_rate": 2.651007377838681e-07, "loss": 0.0499, "step": 964075 }, { "epoch": 9.48, "grad_norm": 11.410063743591309, "learning_rate": 2.649766153296196e-07, "loss": 0.0226, "step": 964100 }, { "epoch": 9.48, "grad_norm": 0.07578682154417038, "learning_rate": 2.6485249287537116e-07, "loss": 0.041, "step": 964125 }, { "epoch": 9.48, "grad_norm": 4.323683261871338, "learning_rate": 2.647283704211227e-07, "loss": 0.0097, "step": 964150 }, { "epoch": 9.48, "grad_norm": 0.05170261487364769, "learning_rate": 2.646042479668742e-07, "loss": 0.0239, "step": 964175 }, { "epoch": 9.48, "grad_norm": 1.874188780784607, "learning_rate": 2.6448012551262574e-07, "loss": 0.0174, "step": 964200 }, { "epoch": 9.48, "grad_norm": 0.16794070601463318, "learning_rate": 2.6435600305837727e-07, "loss": 0.0445, "step": 964225 }, { "epoch": 9.48, "grad_norm": 10.165868759155273, "learning_rate": 2.642318806041288e-07, "loss": 0.0208, "step": 964250 }, { "epoch": 9.48, "grad_norm": 0.0526108518242836, "learning_rate": 2.641077581498804e-07, "loss": 0.0282, "step": 964275 }, { "epoch": 9.48, "grad_norm": 2.4421558380126953, "learning_rate": 2.639836356956319e-07, "loss": 0.0183, "step": 964300 }, { "epoch": 9.48, "grad_norm": 0.7048917412757874, "learning_rate": 2.6385951324138344e-07, "loss": 0.0282, "step": 964325 }, { "epoch": 9.48, "grad_norm": 7.454432487487793, "learning_rate": 2.6373539078713497e-07, "loss": 0.0089, "step": 964350 }, { "epoch": 9.48, "grad_norm": 0.004141403827816248, "learning_rate": 2.636112683328865e-07, "loss": 0.04, "step": 964375 }, { "epoch": 9.48, "grad_norm": 0.07435307651758194, "learning_rate": 2.6348714587863803e-07, "loss": 0.0272, "step": 964400 }, { "epoch": 9.48, "grad_norm": 0.056459710001945496, "learning_rate": 2.6336798832255953e-07, "loss": 0.0473, "step": 964425 }, { "epoch": 9.48, "grad_norm": 0.7368431687355042, "learning_rate": 2.6324386586831106e-07, "loss": 0.0186, "step": 964450 }, { "epoch": 9.48, "grad_norm": 1.7585006952285767, "learning_rate": 2.631197434140626e-07, "loss": 0.036, "step": 964475 }, { "epoch": 9.48, "grad_norm": 1.0726745128631592, "learning_rate": 2.629956209598141e-07, "loss": 0.0097, "step": 964500 }, { "epoch": 9.48, "grad_norm": 0.06370259076356888, "learning_rate": 2.628714985055657e-07, "loss": 0.0395, "step": 964525 }, { "epoch": 9.48, "grad_norm": 7.447238445281982, "learning_rate": 2.6274737605131723e-07, "loss": 0.0213, "step": 964550 }, { "epoch": 9.48, "grad_norm": 0.0871925950050354, "learning_rate": 2.6262325359706876e-07, "loss": 0.024, "step": 964575 }, { "epoch": 9.48, "grad_norm": 0.9977402091026306, "learning_rate": 2.624991311428203e-07, "loss": 0.0102, "step": 964600 }, { "epoch": 9.48, "grad_norm": 0.1520831435918808, "learning_rate": 2.623750086885718e-07, "loss": 0.0428, "step": 964625 }, { "epoch": 9.48, "grad_norm": 6.73971700668335, "learning_rate": 2.6225088623432335e-07, "loss": 0.0135, "step": 964650 }, { "epoch": 9.48, "grad_norm": 0.3281862735748291, "learning_rate": 2.621267637800749e-07, "loss": 0.0429, "step": 964675 }, { "epoch": 9.49, "grad_norm": 15.244391441345215, "learning_rate": 2.620026413258264e-07, "loss": 0.0097, "step": 964700 }, { "epoch": 9.49, "grad_norm": 0.16924980282783508, "learning_rate": 2.6187851887157793e-07, "loss": 0.0221, "step": 964725 }, { "epoch": 9.49, "grad_norm": 1.0490844249725342, "learning_rate": 2.617543964173295e-07, "loss": 0.0094, "step": 964750 }, { "epoch": 9.49, "grad_norm": 4.113550662994385, "learning_rate": 2.6163027396308105e-07, "loss": 0.0399, "step": 964775 }, { "epoch": 9.49, "grad_norm": 2.753976821899414, "learning_rate": 2.615061515088326e-07, "loss": 0.0099, "step": 964800 }, { "epoch": 9.49, "grad_norm": 2.912424087524414, "learning_rate": 2.613820290545841e-07, "loss": 0.0692, "step": 964825 }, { "epoch": 9.49, "grad_norm": 5.263787269592285, "learning_rate": 2.6125790660033563e-07, "loss": 0.0107, "step": 964850 }, { "epoch": 9.49, "grad_norm": 0.3502976894378662, "learning_rate": 2.6113378414608716e-07, "loss": 0.0418, "step": 964875 }, { "epoch": 9.49, "grad_norm": 1.3957306146621704, "learning_rate": 2.6100966169183874e-07, "loss": 0.006, "step": 964900 }, { "epoch": 9.49, "grad_norm": 1.3876690864562988, "learning_rate": 2.6088553923759027e-07, "loss": 0.0289, "step": 964925 }, { "epoch": 9.49, "grad_norm": 1.6626830101013184, "learning_rate": 2.607614167833418e-07, "loss": 0.0205, "step": 964950 }, { "epoch": 9.49, "grad_norm": 0.36570367217063904, "learning_rate": 2.6063729432909333e-07, "loss": 0.0263, "step": 964975 }, { "epoch": 9.49, "grad_norm": 9.211922645568848, "learning_rate": 2.6051317187484486e-07, "loss": 0.0157, "step": 965000 }, { "epoch": 9.49, "grad_norm": 0.18699602782726288, "learning_rate": 2.603890494205964e-07, "loss": 0.0308, "step": 965025 }, { "epoch": 9.49, "grad_norm": 2.803394317626953, "learning_rate": 2.6026492696634797e-07, "loss": 0.0145, "step": 965050 }, { "epoch": 9.49, "grad_norm": 0.02150217443704605, "learning_rate": 2.601408045120995e-07, "loss": 0.0502, "step": 965075 }, { "epoch": 9.49, "grad_norm": 1.5963636636734009, "learning_rate": 2.6001668205785103e-07, "loss": 0.0067, "step": 965100 }, { "epoch": 9.49, "grad_norm": 2.1068007946014404, "learning_rate": 2.5989255960360256e-07, "loss": 0.0446, "step": 965125 }, { "epoch": 9.49, "grad_norm": 3.31601881980896, "learning_rate": 2.597684371493541e-07, "loss": 0.0129, "step": 965150 }, { "epoch": 9.49, "grad_norm": 0.013011914677917957, "learning_rate": 2.596443146951056e-07, "loss": 0.0408, "step": 965175 }, { "epoch": 9.49, "grad_norm": 0.5759641528129578, "learning_rate": 2.5952019224085715e-07, "loss": 0.0114, "step": 965200 }, { "epoch": 9.49, "grad_norm": 0.10025477409362793, "learning_rate": 2.5939606978660873e-07, "loss": 0.0148, "step": 965225 }, { "epoch": 9.49, "grad_norm": 11.763481140136719, "learning_rate": 2.5927194733236026e-07, "loss": 0.023, "step": 965250 }, { "epoch": 9.49, "grad_norm": 0.11486819386482239, "learning_rate": 2.591478248781118e-07, "loss": 0.0319, "step": 965275 }, { "epoch": 9.49, "grad_norm": 5.300703525543213, "learning_rate": 2.590237024238633e-07, "loss": 0.01, "step": 965300 }, { "epoch": 9.49, "grad_norm": 0.6341438293457031, "learning_rate": 2.5889957996961484e-07, "loss": 0.035, "step": 965325 }, { "epoch": 9.49, "grad_norm": 12.417943954467773, "learning_rate": 2.5877545751536637e-07, "loss": 0.0194, "step": 965350 }, { "epoch": 9.49, "grad_norm": 0.01520483661442995, "learning_rate": 2.586513350611179e-07, "loss": 0.0213, "step": 965375 }, { "epoch": 9.49, "grad_norm": 1.011568307876587, "learning_rate": 2.5852721260686943e-07, "loss": 0.0022, "step": 965400 }, { "epoch": 9.49, "grad_norm": 1.1003776788711548, "learning_rate": 2.5840309015262096e-07, "loss": 0.0325, "step": 965425 }, { "epoch": 9.49, "grad_norm": 0.6912355422973633, "learning_rate": 2.582789676983725e-07, "loss": 0.0107, "step": 965450 }, { "epoch": 9.49, "grad_norm": 0.8980022668838501, "learning_rate": 2.5815484524412407e-07, "loss": 0.024, "step": 965475 }, { "epoch": 9.49, "grad_norm": 7.4200544357299805, "learning_rate": 2.580307227898756e-07, "loss": 0.0038, "step": 965500 }, { "epoch": 9.49, "grad_norm": 1.359203577041626, "learning_rate": 2.5790660033562713e-07, "loss": 0.0389, "step": 965525 }, { "epoch": 9.49, "grad_norm": 8.78989315032959, "learning_rate": 2.5778247788137866e-07, "loss": 0.0139, "step": 965550 }, { "epoch": 9.49, "grad_norm": 0.14963692426681519, "learning_rate": 2.576583554271302e-07, "loss": 0.0314, "step": 965575 }, { "epoch": 9.49, "grad_norm": 9.965466499328613, "learning_rate": 2.5753423297288177e-07, "loss": 0.0099, "step": 965600 }, { "epoch": 9.49, "grad_norm": 0.033100347965955734, "learning_rate": 2.574101105186333e-07, "loss": 0.0456, "step": 965625 }, { "epoch": 9.49, "grad_norm": 16.982011795043945, "learning_rate": 2.5728598806438483e-07, "loss": 0.0188, "step": 965650 }, { "epoch": 9.49, "grad_norm": 0.6968051195144653, "learning_rate": 2.5716186561013636e-07, "loss": 0.049, "step": 965675 }, { "epoch": 9.49, "grad_norm": 8.907755851745605, "learning_rate": 2.570377431558879e-07, "loss": 0.0348, "step": 965700 }, { "epoch": 9.5, "grad_norm": 1.480342149734497, "learning_rate": 2.569136207016394e-07, "loss": 0.0333, "step": 965725 }, { "epoch": 9.5, "grad_norm": 17.42441749572754, "learning_rate": 2.56789498247391e-07, "loss": 0.0178, "step": 965750 }, { "epoch": 9.5, "grad_norm": 3.847675085067749, "learning_rate": 2.5666537579314253e-07, "loss": 0.0406, "step": 965775 }, { "epoch": 9.5, "grad_norm": 10.206896781921387, "learning_rate": 2.5654125333889406e-07, "loss": 0.0082, "step": 965800 }, { "epoch": 9.5, "grad_norm": 0.024193620309233665, "learning_rate": 2.564171308846456e-07, "loss": 0.0287, "step": 965825 }, { "epoch": 9.5, "grad_norm": 3.6215078830718994, "learning_rate": 2.562930084303971e-07, "loss": 0.0148, "step": 965850 }, { "epoch": 9.5, "grad_norm": 0.7114834189414978, "learning_rate": 2.5616888597614864e-07, "loss": 0.0432, "step": 965875 }, { "epoch": 9.5, "grad_norm": 8.243295669555664, "learning_rate": 2.5604476352190017e-07, "loss": 0.0266, "step": 965900 }, { "epoch": 9.5, "grad_norm": 0.5504003763198853, "learning_rate": 2.5592064106765175e-07, "loss": 0.0284, "step": 965925 }, { "epoch": 9.5, "grad_norm": 11.323022842407227, "learning_rate": 2.557965186134033e-07, "loss": 0.0136, "step": 965950 }, { "epoch": 9.5, "grad_norm": 0.6369190812110901, "learning_rate": 2.556723961591548e-07, "loss": 0.0236, "step": 965975 }, { "epoch": 9.5, "grad_norm": 10.0177583694458, "learning_rate": 2.5554827370490634e-07, "loss": 0.0217, "step": 966000 }, { "epoch": 9.5, "grad_norm": 0.18041296303272247, "learning_rate": 2.5542415125065787e-07, "loss": 0.0373, "step": 966025 }, { "epoch": 9.5, "grad_norm": 6.376460075378418, "learning_rate": 2.553000287964094e-07, "loss": 0.0153, "step": 966050 }, { "epoch": 9.5, "grad_norm": 0.018807657063007355, "learning_rate": 2.5517590634216093e-07, "loss": 0.0765, "step": 966075 }, { "epoch": 9.5, "grad_norm": 0.5037594437599182, "learning_rate": 2.5505178388791246e-07, "loss": 0.0157, "step": 966100 }, { "epoch": 9.5, "grad_norm": 0.0015655334573239088, "learning_rate": 2.54927661433664e-07, "loss": 0.0505, "step": 966125 }, { "epoch": 9.5, "grad_norm": 5.397114276885986, "learning_rate": 2.548035389794155e-07, "loss": 0.0158, "step": 966150 }, { "epoch": 9.5, "grad_norm": 0.002817769767716527, "learning_rate": 2.546794165251671e-07, "loss": 0.0286, "step": 966175 }, { "epoch": 9.5, "grad_norm": 0.4579051733016968, "learning_rate": 2.5455529407091863e-07, "loss": 0.0228, "step": 966200 }, { "epoch": 9.5, "grad_norm": 1.1865004301071167, "learning_rate": 2.5443117161667016e-07, "loss": 0.0383, "step": 966225 }, { "epoch": 9.5, "grad_norm": 1.1065564155578613, "learning_rate": 2.543070491624217e-07, "loss": 0.0169, "step": 966250 }, { "epoch": 9.5, "grad_norm": 0.012135909870266914, "learning_rate": 2.541829267081732e-07, "loss": 0.0494, "step": 966275 }, { "epoch": 9.5, "grad_norm": 10.23420238494873, "learning_rate": 2.540588042539248e-07, "loss": 0.0209, "step": 966300 }, { "epoch": 9.5, "grad_norm": 2.0342562198638916, "learning_rate": 2.539346817996763e-07, "loss": 0.0454, "step": 966325 }, { "epoch": 9.5, "grad_norm": 3.8246216773986816, "learning_rate": 2.5381055934542785e-07, "loss": 0.0075, "step": 966350 }, { "epoch": 9.5, "grad_norm": 0.02130410075187683, "learning_rate": 2.536864368911794e-07, "loss": 0.0287, "step": 966375 }, { "epoch": 9.5, "grad_norm": 7.682073593139648, "learning_rate": 2.535623144369309e-07, "loss": 0.0146, "step": 966400 }, { "epoch": 9.5, "grad_norm": 0.0033976619597524405, "learning_rate": 2.5343819198268244e-07, "loss": 0.0241, "step": 966425 }, { "epoch": 9.5, "grad_norm": 0.42686301469802856, "learning_rate": 2.53314069528434e-07, "loss": 0.0125, "step": 966450 }, { "epoch": 9.5, "grad_norm": 2.1664302349090576, "learning_rate": 2.5318994707418555e-07, "loss": 0.0355, "step": 966475 }, { "epoch": 9.5, "grad_norm": 3.125638008117676, "learning_rate": 2.530658246199371e-07, "loss": 0.0056, "step": 966500 }, { "epoch": 9.5, "grad_norm": 0.35593825578689575, "learning_rate": 2.529417021656886e-07, "loss": 0.0443, "step": 966525 }, { "epoch": 9.5, "grad_norm": 1.2089097499847412, "learning_rate": 2.5281757971144014e-07, "loss": 0.0083, "step": 966550 }, { "epoch": 9.5, "grad_norm": 0.01930662803351879, "learning_rate": 2.5269345725719167e-07, "loss": 0.025, "step": 966575 }, { "epoch": 9.5, "grad_norm": 1.6318963766098022, "learning_rate": 2.525693348029432e-07, "loss": 0.0239, "step": 966600 }, { "epoch": 9.5, "grad_norm": 0.0029326146468520164, "learning_rate": 2.524452123486948e-07, "loss": 0.0502, "step": 966625 }, { "epoch": 9.5, "grad_norm": 0.5048936009407043, "learning_rate": 2.523210898944463e-07, "loss": 0.0209, "step": 966650 }, { "epoch": 9.5, "grad_norm": 0.037537749856710434, "learning_rate": 2.5219696744019784e-07, "loss": 0.0412, "step": 966675 }, { "epoch": 9.5, "grad_norm": 15.935345649719238, "learning_rate": 2.5207284498594937e-07, "loss": 0.0156, "step": 966700 }, { "epoch": 9.5, "grad_norm": 0.9867718815803528, "learning_rate": 2.519487225317009e-07, "loss": 0.0592, "step": 966725 }, { "epoch": 9.51, "grad_norm": 17.34587860107422, "learning_rate": 2.518246000774524e-07, "loss": 0.0128, "step": 966750 }, { "epoch": 9.51, "grad_norm": 0.05315737798810005, "learning_rate": 2.5170047762320396e-07, "loss": 0.0383, "step": 966775 }, { "epoch": 9.51, "grad_norm": 1.4560915231704712, "learning_rate": 2.515763551689555e-07, "loss": 0.023, "step": 966800 }, { "epoch": 9.51, "grad_norm": 0.08146395534276962, "learning_rate": 2.51452232714707e-07, "loss": 0.0416, "step": 966825 }, { "epoch": 9.51, "grad_norm": 6.807024955749512, "learning_rate": 2.5132811026045854e-07, "loss": 0.0084, "step": 966850 }, { "epoch": 9.51, "grad_norm": 0.5532087087631226, "learning_rate": 2.512039878062101e-07, "loss": 0.0317, "step": 966875 }, { "epoch": 9.51, "grad_norm": 0.06277067959308624, "learning_rate": 2.5107986535196165e-07, "loss": 0.0178, "step": 966900 }, { "epoch": 9.51, "grad_norm": 1.3001253604888916, "learning_rate": 2.509557428977132e-07, "loss": 0.0216, "step": 966925 }, { "epoch": 9.51, "grad_norm": 8.892690658569336, "learning_rate": 2.508316204434647e-07, "loss": 0.0182, "step": 966950 }, { "epoch": 9.51, "grad_norm": 0.03180401772260666, "learning_rate": 2.5070749798921624e-07, "loss": 0.0503, "step": 966975 }, { "epoch": 9.51, "grad_norm": 0.8602986931800842, "learning_rate": 2.5058337553496777e-07, "loss": 0.0115, "step": 967000 }, { "epoch": 9.51, "grad_norm": 0.006702135782688856, "learning_rate": 2.5045925308071935e-07, "loss": 0.0393, "step": 967025 }, { "epoch": 9.51, "grad_norm": 11.935375213623047, "learning_rate": 2.503351306264709e-07, "loss": 0.0144, "step": 967050 }, { "epoch": 9.51, "grad_norm": 0.025369739159941673, "learning_rate": 2.502110081722224e-07, "loss": 0.0248, "step": 967075 }, { "epoch": 9.51, "grad_norm": 12.184596061706543, "learning_rate": 2.5008688571797394e-07, "loss": 0.0143, "step": 967100 }, { "epoch": 9.51, "grad_norm": 0.5399293899536133, "learning_rate": 2.4996276326372547e-07, "loss": 0.021, "step": 967125 }, { "epoch": 9.51, "grad_norm": 0.5436526536941528, "learning_rate": 2.4983864080947705e-07, "loss": 0.0106, "step": 967150 }, { "epoch": 9.51, "grad_norm": 0.0546601377427578, "learning_rate": 2.497145183552286e-07, "loss": 0.0427, "step": 967175 }, { "epoch": 9.51, "grad_norm": 7.185077667236328, "learning_rate": 2.495903959009801e-07, "loss": 0.0136, "step": 967200 }, { "epoch": 9.51, "grad_norm": 1.1848022937774658, "learning_rate": 2.4946627344673164e-07, "loss": 0.0368, "step": 967225 }, { "epoch": 9.51, "grad_norm": 9.154365539550781, "learning_rate": 2.4934215099248317e-07, "loss": 0.0129, "step": 967250 }, { "epoch": 9.51, "grad_norm": 1.1762099266052246, "learning_rate": 2.492180285382347e-07, "loss": 0.0271, "step": 967275 }, { "epoch": 9.51, "grad_norm": 3.959289073944092, "learning_rate": 2.490939060839863e-07, "loss": 0.0189, "step": 967300 }, { "epoch": 9.51, "grad_norm": 18.008991241455078, "learning_rate": 2.489697836297378e-07, "loss": 0.0382, "step": 967325 }, { "epoch": 9.51, "grad_norm": 3.7366855144500732, "learning_rate": 2.4884566117548934e-07, "loss": 0.0093, "step": 967350 }, { "epoch": 9.51, "grad_norm": 4.971316814422607, "learning_rate": 2.4872153872124087e-07, "loss": 0.0409, "step": 967375 }, { "epoch": 9.51, "grad_norm": 0.4592166244983673, "learning_rate": 2.485974162669924e-07, "loss": 0.0063, "step": 967400 }, { "epoch": 9.51, "grad_norm": 1.358150839805603, "learning_rate": 2.484732938127439e-07, "loss": 0.0318, "step": 967425 }, { "epoch": 9.51, "grad_norm": 26.277576446533203, "learning_rate": 2.4834917135849545e-07, "loss": 0.0086, "step": 967450 }, { "epoch": 9.51, "grad_norm": 0.07418584078550339, "learning_rate": 2.48225048904247e-07, "loss": 0.0414, "step": 967475 }, { "epoch": 9.51, "grad_norm": 2.9237875938415527, "learning_rate": 2.481009264499985e-07, "loss": 0.0095, "step": 967500 }, { "epoch": 9.51, "grad_norm": 3.274712324142456, "learning_rate": 2.4797680399575004e-07, "loss": 0.0563, "step": 967525 }, { "epoch": 9.51, "grad_norm": 9.207869529724121, "learning_rate": 2.4785268154150157e-07, "loss": 0.0122, "step": 967550 }, { "epoch": 9.51, "grad_norm": 0.19662800431251526, "learning_rate": 2.4772855908725315e-07, "loss": 0.0582, "step": 967575 }, { "epoch": 9.51, "grad_norm": 11.016534805297852, "learning_rate": 2.476044366330047e-07, "loss": 0.0111, "step": 967600 }, { "epoch": 9.51, "grad_norm": 0.01126345619559288, "learning_rate": 2.474803141787562e-07, "loss": 0.0293, "step": 967625 }, { "epoch": 9.51, "grad_norm": 5.485821723937988, "learning_rate": 2.4735619172450774e-07, "loss": 0.0281, "step": 967650 }, { "epoch": 9.51, "grad_norm": 0.017246386036276817, "learning_rate": 2.4723206927025927e-07, "loss": 0.0324, "step": 967675 }, { "epoch": 9.51, "grad_norm": 0.13775929808616638, "learning_rate": 2.471079468160108e-07, "loss": 0.0133, "step": 967700 }, { "epoch": 9.51, "grad_norm": 0.05801993981003761, "learning_rate": 2.469838243617624e-07, "loss": 0.0296, "step": 967725 }, { "epoch": 9.52, "grad_norm": 7.765800476074219, "learning_rate": 2.468597019075139e-07, "loss": 0.0138, "step": 967750 }, { "epoch": 9.52, "grad_norm": 1.4454079866409302, "learning_rate": 2.4673557945326544e-07, "loss": 0.0385, "step": 967775 }, { "epoch": 9.52, "grad_norm": 0.4498841166496277, "learning_rate": 2.4661145699901697e-07, "loss": 0.0124, "step": 967800 }, { "epoch": 9.52, "grad_norm": 1.0664753913879395, "learning_rate": 2.464873345447685e-07, "loss": 0.0534, "step": 967825 }, { "epoch": 9.52, "grad_norm": 1.4329417943954468, "learning_rate": 2.4636321209052e-07, "loss": 0.0115, "step": 967850 }, { "epoch": 9.52, "grad_norm": 0.1310070902109146, "learning_rate": 2.462390896362716e-07, "loss": 0.019, "step": 967875 }, { "epoch": 9.52, "grad_norm": 17.496681213378906, "learning_rate": 2.4611496718202314e-07, "loss": 0.0128, "step": 967900 }, { "epoch": 9.52, "grad_norm": 0.03645811229944229, "learning_rate": 2.4599084472777466e-07, "loss": 0.0256, "step": 967925 }, { "epoch": 9.52, "grad_norm": 0.23286311328411102, "learning_rate": 2.458667222735262e-07, "loss": 0.0103, "step": 967950 }, { "epoch": 9.52, "grad_norm": 0.038765836507081985, "learning_rate": 2.457425998192777e-07, "loss": 0.0356, "step": 967975 }, { "epoch": 9.52, "grad_norm": 2.992678165435791, "learning_rate": 2.456184773650293e-07, "loss": 0.0139, "step": 968000 }, { "epoch": 9.52, "grad_norm": 0.06578067690134048, "learning_rate": 2.4549435491078083e-07, "loss": 0.038, "step": 968025 }, { "epoch": 9.52, "grad_norm": 9.235725402832031, "learning_rate": 2.4537023245653236e-07, "loss": 0.0237, "step": 968050 }, { "epoch": 9.52, "grad_norm": 1.2813953161239624, "learning_rate": 2.452461100022839e-07, "loss": 0.0416, "step": 968075 }, { "epoch": 9.52, "grad_norm": 8.097380638122559, "learning_rate": 2.451219875480354e-07, "loss": 0.0165, "step": 968100 }, { "epoch": 9.52, "grad_norm": 3.9078898429870605, "learning_rate": 2.4499786509378695e-07, "loss": 0.0304, "step": 968125 }, { "epoch": 9.52, "grad_norm": 2.4634876251220703, "learning_rate": 2.448737426395385e-07, "loss": 0.0187, "step": 968150 }, { "epoch": 9.52, "grad_norm": 0.4296697676181793, "learning_rate": 2.4474962018529e-07, "loss": 0.0281, "step": 968175 }, { "epoch": 9.52, "grad_norm": 20.47854232788086, "learning_rate": 2.4462549773104154e-07, "loss": 0.0106, "step": 968200 }, { "epoch": 9.52, "grad_norm": 1.2288861274719238, "learning_rate": 2.4450137527679307e-07, "loss": 0.0347, "step": 968225 }, { "epoch": 9.52, "grad_norm": 6.584914684295654, "learning_rate": 2.443772528225446e-07, "loss": 0.0086, "step": 968250 }, { "epoch": 9.52, "grad_norm": 1.9734079837799072, "learning_rate": 2.442531303682962e-07, "loss": 0.0207, "step": 968275 }, { "epoch": 9.52, "grad_norm": 5.938160419464111, "learning_rate": 2.441290079140477e-07, "loss": 0.0238, "step": 968300 }, { "epoch": 9.52, "grad_norm": 0.02296927012503147, "learning_rate": 2.4400488545979924e-07, "loss": 0.0337, "step": 968325 }, { "epoch": 9.52, "grad_norm": 2.115764856338501, "learning_rate": 2.4388076300555076e-07, "loss": 0.0064, "step": 968350 }, { "epoch": 9.52, "grad_norm": 0.07057475298643112, "learning_rate": 2.437566405513023e-07, "loss": 0.0355, "step": 968375 }, { "epoch": 9.52, "grad_norm": 12.50157642364502, "learning_rate": 2.436325180970538e-07, "loss": 0.0196, "step": 968400 }, { "epoch": 9.52, "grad_norm": 0.10797959566116333, "learning_rate": 2.4351336054097533e-07, "loss": 0.0466, "step": 968425 }, { "epoch": 9.52, "grad_norm": 10.0698823928833, "learning_rate": 2.4338923808672686e-07, "loss": 0.0208, "step": 968450 }, { "epoch": 9.52, "grad_norm": 0.050098612904548645, "learning_rate": 2.4326511563247844e-07, "loss": 0.0321, "step": 968475 }, { "epoch": 9.52, "grad_norm": 6.4663801193237305, "learning_rate": 2.4314099317822997e-07, "loss": 0.0075, "step": 968500 }, { "epoch": 9.52, "grad_norm": 2.2860686779022217, "learning_rate": 2.430168707239815e-07, "loss": 0.0442, "step": 968525 }, { "epoch": 9.52, "grad_norm": 0.759580671787262, "learning_rate": 2.42892748269733e-07, "loss": 0.0047, "step": 968550 }, { "epoch": 9.52, "grad_norm": 0.03266651928424835, "learning_rate": 2.4276862581548455e-07, "loss": 0.0335, "step": 968575 }, { "epoch": 9.52, "grad_norm": 3.530491590499878, "learning_rate": 2.426445033612361e-07, "loss": 0.0239, "step": 968600 }, { "epoch": 9.52, "grad_norm": 0.014100040309131145, "learning_rate": 2.425203809069876e-07, "loss": 0.0253, "step": 968625 }, { "epoch": 9.52, "grad_norm": 0.8337181210517883, "learning_rate": 2.4239625845273914e-07, "loss": 0.009, "step": 968650 }, { "epoch": 9.52, "grad_norm": 0.001468883827328682, "learning_rate": 2.4227710089666065e-07, "loss": 0.0484, "step": 968675 }, { "epoch": 9.52, "grad_norm": 4.962528705596924, "learning_rate": 2.421529784424122e-07, "loss": 0.0115, "step": 968700 }, { "epoch": 9.52, "grad_norm": 0.008011624217033386, "learning_rate": 2.420288559881637e-07, "loss": 0.0417, "step": 968725 }, { "epoch": 9.52, "grad_norm": 3.6452786922454834, "learning_rate": 2.4190473353391523e-07, "loss": 0.0226, "step": 968750 }, { "epoch": 9.53, "grad_norm": 0.00607473636046052, "learning_rate": 2.4178061107966676e-07, "loss": 0.0478, "step": 968775 }, { "epoch": 9.53, "grad_norm": 1.0281798839569092, "learning_rate": 2.416564886254183e-07, "loss": 0.0096, "step": 968800 }, { "epoch": 9.53, "grad_norm": 0.06174534559249878, "learning_rate": 2.4153236617116987e-07, "loss": 0.032, "step": 968825 }, { "epoch": 9.53, "grad_norm": 4.0281195640563965, "learning_rate": 2.414082437169214e-07, "loss": 0.0127, "step": 968850 }, { "epoch": 9.53, "grad_norm": 0.03270820528268814, "learning_rate": 2.4128412126267293e-07, "loss": 0.0229, "step": 968875 }, { "epoch": 9.53, "grad_norm": 8.347216606140137, "learning_rate": 2.4115999880842446e-07, "loss": 0.0125, "step": 968900 }, { "epoch": 9.53, "grad_norm": 5.414042949676514, "learning_rate": 2.41035876354176e-07, "loss": 0.0454, "step": 968925 }, { "epoch": 9.53, "grad_norm": 0.4510125517845154, "learning_rate": 2.4091175389992757e-07, "loss": 0.01, "step": 968950 }, { "epoch": 9.53, "grad_norm": 1.8254735469818115, "learning_rate": 2.407876314456791e-07, "loss": 0.0259, "step": 968975 }, { "epoch": 9.53, "grad_norm": 2.604377031326294, "learning_rate": 2.4066350899143063e-07, "loss": 0.0133, "step": 969000 }, { "epoch": 9.53, "grad_norm": 0.03602069988846779, "learning_rate": 2.4053938653718216e-07, "loss": 0.0332, "step": 969025 }, { "epoch": 9.53, "grad_norm": 0.21318753063678741, "learning_rate": 2.404152640829337e-07, "loss": 0.0065, "step": 969050 }, { "epoch": 9.53, "grad_norm": 0.07854795455932617, "learning_rate": 2.402911416286852e-07, "loss": 0.04, "step": 969075 }, { "epoch": 9.53, "grad_norm": 0.5793028473854065, "learning_rate": 2.4016701917443675e-07, "loss": 0.0162, "step": 969100 }, { "epoch": 9.53, "grad_norm": 0.037162650376558304, "learning_rate": 2.400428967201883e-07, "loss": 0.0235, "step": 969125 }, { "epoch": 9.53, "grad_norm": 3.9395833015441895, "learning_rate": 2.399187742659398e-07, "loss": 0.0143, "step": 969150 }, { "epoch": 9.53, "grad_norm": 0.07376473397016525, "learning_rate": 2.3979465181169133e-07, "loss": 0.0322, "step": 969175 }, { "epoch": 9.53, "grad_norm": 2.709587812423706, "learning_rate": 2.3967052935744286e-07, "loss": 0.0157, "step": 969200 }, { "epoch": 9.53, "grad_norm": 4.638577461242676, "learning_rate": 2.3954640690319444e-07, "loss": 0.0274, "step": 969225 }, { "epoch": 9.53, "grad_norm": 6.555943489074707, "learning_rate": 2.39422284448946e-07, "loss": 0.0115, "step": 969250 }, { "epoch": 9.53, "grad_norm": 0.12859909236431122, "learning_rate": 2.392981619946975e-07, "loss": 0.0244, "step": 969275 }, { "epoch": 9.53, "grad_norm": 9.165563583374023, "learning_rate": 2.3917403954044903e-07, "loss": 0.0167, "step": 969300 }, { "epoch": 9.53, "grad_norm": 4.1137189865112305, "learning_rate": 2.3904991708620056e-07, "loss": 0.0338, "step": 969325 }, { "epoch": 9.53, "grad_norm": 12.466114044189453, "learning_rate": 2.389257946319521e-07, "loss": 0.0265, "step": 969350 }, { "epoch": 9.53, "grad_norm": 0.00806452240794897, "learning_rate": 2.3880167217770367e-07, "loss": 0.0368, "step": 969375 }, { "epoch": 9.53, "grad_norm": 9.745540618896484, "learning_rate": 2.386775497234552e-07, "loss": 0.0136, "step": 969400 }, { "epoch": 9.53, "grad_norm": 0.13791750371456146, "learning_rate": 2.3855342726920673e-07, "loss": 0.0348, "step": 969425 }, { "epoch": 9.53, "grad_norm": 2.247373104095459, "learning_rate": 2.3842930481495826e-07, "loss": 0.0093, "step": 969450 }, { "epoch": 9.53, "grad_norm": 0.07037455588579178, "learning_rate": 2.383051823607098e-07, "loss": 0.0343, "step": 969475 }, { "epoch": 9.53, "grad_norm": 1.2738219499588013, "learning_rate": 2.3818105990646132e-07, "loss": 0.0084, "step": 969500 }, { "epoch": 9.53, "grad_norm": 0.018790876492857933, "learning_rate": 2.380569374522129e-07, "loss": 0.0285, "step": 969525 }, { "epoch": 9.53, "grad_norm": 11.123822212219238, "learning_rate": 2.3793281499796443e-07, "loss": 0.01, "step": 969550 }, { "epoch": 9.53, "grad_norm": 0.493113249540329, "learning_rate": 2.3780869254371596e-07, "loss": 0.0306, "step": 969575 }, { "epoch": 9.53, "grad_norm": 11.975421905517578, "learning_rate": 2.376845700894675e-07, "loss": 0.0215, "step": 969600 }, { "epoch": 9.53, "grad_norm": 0.030968783423304558, "learning_rate": 2.3756044763521902e-07, "loss": 0.0338, "step": 969625 }, { "epoch": 9.53, "grad_norm": 2.6411850452423096, "learning_rate": 2.3743632518097057e-07, "loss": 0.007, "step": 969650 }, { "epoch": 9.53, "grad_norm": 0.060094986110925674, "learning_rate": 2.373122027267221e-07, "loss": 0.0423, "step": 969675 }, { "epoch": 9.53, "grad_norm": 24.913860321044922, "learning_rate": 2.3718808027247363e-07, "loss": 0.018, "step": 969700 }, { "epoch": 9.53, "grad_norm": 0.14598317444324493, "learning_rate": 2.3706395781822516e-07, "loss": 0.0297, "step": 969725 }, { "epoch": 9.53, "grad_norm": 3.789094924926758, "learning_rate": 2.369398353639767e-07, "loss": 0.0101, "step": 969750 }, { "epoch": 9.53, "grad_norm": 0.43805232644081116, "learning_rate": 2.3681571290972822e-07, "loss": 0.0424, "step": 969775 }, { "epoch": 9.54, "grad_norm": 2.4144206047058105, "learning_rate": 2.366915904554798e-07, "loss": 0.0274, "step": 969800 }, { "epoch": 9.54, "grad_norm": 3.8733015060424805, "learning_rate": 2.3656746800123133e-07, "loss": 0.0399, "step": 969825 }, { "epoch": 9.54, "grad_norm": 2.3598625659942627, "learning_rate": 2.3644334554698286e-07, "loss": 0.026, "step": 969850 }, { "epoch": 9.54, "grad_norm": 6.123117446899414, "learning_rate": 2.3631922309273439e-07, "loss": 0.0239, "step": 969875 }, { "epoch": 9.54, "grad_norm": 8.402602195739746, "learning_rate": 2.3619510063848592e-07, "loss": 0.0193, "step": 969900 }, { "epoch": 9.54, "grad_norm": 1.392469048500061, "learning_rate": 2.3607097818423744e-07, "loss": 0.0416, "step": 969925 }, { "epoch": 9.54, "grad_norm": 3.068197727203369, "learning_rate": 2.35946855729989e-07, "loss": 0.0151, "step": 969950 }, { "epoch": 9.54, "grad_norm": 3.4024555683135986, "learning_rate": 2.3582273327574053e-07, "loss": 0.0214, "step": 969975 }, { "epoch": 9.54, "grad_norm": 13.817142486572266, "learning_rate": 2.3569861082149206e-07, "loss": 0.023, "step": 970000 }, { "epoch": 9.54, "grad_norm": 0.02992238663136959, "learning_rate": 2.355744883672436e-07, "loss": 0.0261, "step": 970025 }, { "epoch": 9.54, "grad_norm": 0.3751245141029358, "learning_rate": 2.3545036591299514e-07, "loss": 0.0061, "step": 970050 }, { "epoch": 9.54, "grad_norm": 1.6960794925689697, "learning_rate": 2.353262434587467e-07, "loss": 0.0556, "step": 970075 }, { "epoch": 9.54, "grad_norm": 0.7227498292922974, "learning_rate": 2.3520212100449823e-07, "loss": 0.0137, "step": 970100 }, { "epoch": 9.54, "grad_norm": 1.1538975238800049, "learning_rate": 2.3507799855024976e-07, "loss": 0.0184, "step": 970125 }, { "epoch": 9.54, "grad_norm": 0.07914706319570541, "learning_rate": 2.3495387609600129e-07, "loss": 0.0211, "step": 970150 }, { "epoch": 9.54, "grad_norm": 0.04064148664474487, "learning_rate": 2.3482975364175281e-07, "loss": 0.05, "step": 970175 }, { "epoch": 9.54, "grad_norm": 9.909010887145996, "learning_rate": 2.3470563118750434e-07, "loss": 0.0131, "step": 970200 }, { "epoch": 9.54, "grad_norm": 0.17215535044670105, "learning_rate": 2.3458150873325593e-07, "loss": 0.0283, "step": 970225 }, { "epoch": 9.54, "grad_norm": 8.521060943603516, "learning_rate": 2.3445738627900746e-07, "loss": 0.0094, "step": 970250 }, { "epoch": 9.54, "grad_norm": 14.117029190063477, "learning_rate": 2.3433326382475898e-07, "loss": 0.0456, "step": 970275 }, { "epoch": 9.54, "grad_norm": 5.615100383758545, "learning_rate": 2.3420914137051051e-07, "loss": 0.0198, "step": 970300 }, { "epoch": 9.54, "grad_norm": 0.11723896116018295, "learning_rate": 2.3408501891626204e-07, "loss": 0.056, "step": 970325 }, { "epoch": 9.54, "grad_norm": 1.6526145935058594, "learning_rate": 2.3396089646201357e-07, "loss": 0.016, "step": 970350 }, { "epoch": 9.54, "grad_norm": 0.10004731267690659, "learning_rate": 2.3383677400776513e-07, "loss": 0.0375, "step": 970375 }, { "epoch": 9.54, "grad_norm": 3.833115816116333, "learning_rate": 2.3371265155351666e-07, "loss": 0.0157, "step": 970400 }, { "epoch": 9.54, "grad_norm": 0.08786852657794952, "learning_rate": 2.3358852909926819e-07, "loss": 0.0358, "step": 970425 }, { "epoch": 9.54, "grad_norm": 8.231697082519531, "learning_rate": 2.3346440664501971e-07, "loss": 0.0102, "step": 970450 }, { "epoch": 9.54, "grad_norm": 0.07289015501737595, "learning_rate": 2.3334028419077124e-07, "loss": 0.0284, "step": 970475 }, { "epoch": 9.54, "grad_norm": 7.031651020050049, "learning_rate": 2.3321616173652283e-07, "loss": 0.0206, "step": 970500 }, { "epoch": 9.54, "grad_norm": 0.021497057750821114, "learning_rate": 2.3309203928227435e-07, "loss": 0.0398, "step": 970525 }, { "epoch": 9.54, "grad_norm": 1.8562910556793213, "learning_rate": 2.3296791682802588e-07, "loss": 0.0182, "step": 970550 }, { "epoch": 9.54, "grad_norm": 1.5945453643798828, "learning_rate": 2.328437943737774e-07, "loss": 0.0417, "step": 970575 }, { "epoch": 9.54, "grad_norm": 0.19660894572734833, "learning_rate": 2.3271967191952894e-07, "loss": 0.0125, "step": 970600 }, { "epoch": 9.54, "grad_norm": 0.7986852526664734, "learning_rate": 2.3259554946528047e-07, "loss": 0.0263, "step": 970625 }, { "epoch": 9.54, "grad_norm": 2.0048952102661133, "learning_rate": 2.3247142701103203e-07, "loss": 0.0065, "step": 970650 }, { "epoch": 9.54, "grad_norm": 0.011192385107278824, "learning_rate": 2.3234730455678356e-07, "loss": 0.0351, "step": 970675 }, { "epoch": 9.54, "grad_norm": 0.815254271030426, "learning_rate": 2.3222318210253508e-07, "loss": 0.0085, "step": 970700 }, { "epoch": 9.54, "grad_norm": 0.07962148636579514, "learning_rate": 2.3209905964828664e-07, "loss": 0.0391, "step": 970725 }, { "epoch": 9.54, "grad_norm": 9.279925346374512, "learning_rate": 2.3197493719403817e-07, "loss": 0.0098, "step": 970750 }, { "epoch": 9.54, "grad_norm": 1.8831666707992554, "learning_rate": 2.3185081473978972e-07, "loss": 0.0225, "step": 970775 }, { "epoch": 9.55, "grad_norm": 0.051876500248909, "learning_rate": 2.3172669228554125e-07, "loss": 0.0153, "step": 970800 }, { "epoch": 9.55, "grad_norm": 0.256132036447525, "learning_rate": 2.3160256983129278e-07, "loss": 0.0314, "step": 970825 }, { "epoch": 9.55, "grad_norm": 6.5303473472595215, "learning_rate": 2.314784473770443e-07, "loss": 0.0275, "step": 970850 }, { "epoch": 9.55, "grad_norm": 1.2585073709487915, "learning_rate": 2.3135432492279584e-07, "loss": 0.0354, "step": 970875 }, { "epoch": 9.55, "grad_norm": 16.18411636352539, "learning_rate": 2.3123020246854737e-07, "loss": 0.0137, "step": 970900 }, { "epoch": 9.55, "grad_norm": 0.03921513259410858, "learning_rate": 2.3110608001429895e-07, "loss": 0.0247, "step": 970925 }, { "epoch": 9.55, "grad_norm": 4.807402610778809, "learning_rate": 2.3098195756005048e-07, "loss": 0.0132, "step": 970950 }, { "epoch": 9.55, "grad_norm": 0.18179847300052643, "learning_rate": 2.30857835105802e-07, "loss": 0.0263, "step": 970975 }, { "epoch": 9.55, "grad_norm": 3.3625214099884033, "learning_rate": 2.3073371265155354e-07, "loss": 0.021, "step": 971000 }, { "epoch": 9.55, "grad_norm": 0.2632668912410736, "learning_rate": 2.3060959019730507e-07, "loss": 0.0302, "step": 971025 }, { "epoch": 9.55, "grad_norm": 3.115412473678589, "learning_rate": 2.304854677430566e-07, "loss": 0.0095, "step": 971050 }, { "epoch": 9.55, "grad_norm": 1.3436917066574097, "learning_rate": 2.3036134528880815e-07, "loss": 0.0425, "step": 971075 }, { "epoch": 9.55, "grad_norm": 1.253250241279602, "learning_rate": 2.3023722283455968e-07, "loss": 0.0115, "step": 971100 }, { "epoch": 9.55, "grad_norm": 3.3456246852874756, "learning_rate": 2.3011806527848116e-07, "loss": 0.0503, "step": 971125 }, { "epoch": 9.55, "grad_norm": 5.9390974044799805, "learning_rate": 2.299939428242327e-07, "loss": 0.0125, "step": 971150 }, { "epoch": 9.55, "grad_norm": 0.029003027826547623, "learning_rate": 2.2986982036998422e-07, "loss": 0.0419, "step": 971175 }, { "epoch": 9.55, "grad_norm": 13.98532772064209, "learning_rate": 2.2974569791573577e-07, "loss": 0.0267, "step": 971200 }, { "epoch": 9.55, "grad_norm": 0.28861284255981445, "learning_rate": 2.296215754614873e-07, "loss": 0.0245, "step": 971225 }, { "epoch": 9.55, "grad_norm": 1.014206051826477, "learning_rate": 2.2949745300723886e-07, "loss": 0.0137, "step": 971250 }, { "epoch": 9.55, "grad_norm": 0.10803069919347763, "learning_rate": 2.293733305529904e-07, "loss": 0.0299, "step": 971275 }, { "epoch": 9.55, "grad_norm": 4.940308094024658, "learning_rate": 2.2924920809874192e-07, "loss": 0.0211, "step": 971300 }, { "epoch": 9.55, "grad_norm": 0.8465250134468079, "learning_rate": 2.2912508564449345e-07, "loss": 0.0362, "step": 971325 }, { "epoch": 9.55, "grad_norm": 0.6685778498649597, "learning_rate": 2.2900096319024497e-07, "loss": 0.0115, "step": 971350 }, { "epoch": 9.55, "grad_norm": 1.262785792350769, "learning_rate": 2.288768407359965e-07, "loss": 0.05, "step": 971375 }, { "epoch": 9.55, "grad_norm": 7.663695335388184, "learning_rate": 2.2875271828174809e-07, "loss": 0.0167, "step": 971400 }, { "epoch": 9.55, "grad_norm": 0.055299777537584305, "learning_rate": 2.2862859582749962e-07, "loss": 0.0556, "step": 971425 }, { "epoch": 9.55, "grad_norm": 11.680581092834473, "learning_rate": 2.2850447337325114e-07, "loss": 0.0144, "step": 971450 }, { "epoch": 9.55, "grad_norm": 1.076369047164917, "learning_rate": 2.2838035091900267e-07, "loss": 0.0313, "step": 971475 }, { "epoch": 9.55, "grad_norm": 9.071435928344727, "learning_rate": 2.282562284647542e-07, "loss": 0.0054, "step": 971500 }, { "epoch": 9.55, "grad_norm": 3.2302730083465576, "learning_rate": 2.2813210601050573e-07, "loss": 0.0269, "step": 971525 }, { "epoch": 9.55, "grad_norm": 2.9526498317718506, "learning_rate": 2.280079835562573e-07, "loss": 0.0147, "step": 971550 }, { "epoch": 9.55, "grad_norm": 0.02311255969107151, "learning_rate": 2.2788386110200882e-07, "loss": 0.0254, "step": 971575 }, { "epoch": 9.55, "grad_norm": 3.11457896232605, "learning_rate": 2.2775973864776035e-07, "loss": 0.0247, "step": 971600 }, { "epoch": 9.55, "grad_norm": 4.542696475982666, "learning_rate": 2.2763561619351187e-07, "loss": 0.0504, "step": 971625 }, { "epoch": 9.55, "grad_norm": 14.314400672912598, "learning_rate": 2.275114937392634e-07, "loss": 0.0185, "step": 971650 }, { "epoch": 9.55, "grad_norm": 1.2657326459884644, "learning_rate": 2.2738737128501499e-07, "loss": 0.0427, "step": 971675 }, { "epoch": 9.55, "grad_norm": 9.163630485534668, "learning_rate": 2.2726324883076651e-07, "loss": 0.0219, "step": 971700 }, { "epoch": 9.55, "grad_norm": 0.0851212739944458, "learning_rate": 2.2713912637651804e-07, "loss": 0.0284, "step": 971725 }, { "epoch": 9.55, "grad_norm": 4.106457233428955, "learning_rate": 2.2701500392226957e-07, "loss": 0.0194, "step": 971750 }, { "epoch": 9.55, "grad_norm": 1.4351297616958618, "learning_rate": 2.268908814680211e-07, "loss": 0.0338, "step": 971775 }, { "epoch": 9.55, "grad_norm": 9.453105926513672, "learning_rate": 2.2676675901377263e-07, "loss": 0.0146, "step": 971800 }, { "epoch": 9.56, "grad_norm": 0.24563178420066833, "learning_rate": 2.2664263655952419e-07, "loss": 0.0302, "step": 971825 }, { "epoch": 9.56, "grad_norm": 1.3084070682525635, "learning_rate": 2.2651851410527572e-07, "loss": 0.0062, "step": 971850 }, { "epoch": 9.56, "grad_norm": 0.015138383023440838, "learning_rate": 2.2639439165102727e-07, "loss": 0.0399, "step": 971875 }, { "epoch": 9.56, "grad_norm": 0.5575569868087769, "learning_rate": 2.262702691967788e-07, "loss": 0.0078, "step": 971900 }, { "epoch": 9.56, "grad_norm": 0.015917737036943436, "learning_rate": 2.2614614674253033e-07, "loss": 0.038, "step": 971925 }, { "epoch": 9.56, "grad_norm": 6.621768474578857, "learning_rate": 2.2602202428828186e-07, "loss": 0.0131, "step": 971950 }, { "epoch": 9.56, "grad_norm": 0.018337424844503403, "learning_rate": 2.2589790183403341e-07, "loss": 0.0386, "step": 971975 }, { "epoch": 9.56, "grad_norm": 1.538283109664917, "learning_rate": 2.2577377937978494e-07, "loss": 0.0143, "step": 972000 }, { "epoch": 9.56, "grad_norm": 0.649360716342926, "learning_rate": 2.2564965692553647e-07, "loss": 0.0407, "step": 972025 }, { "epoch": 9.56, "grad_norm": 3.5777173042297363, "learning_rate": 2.25525534471288e-07, "loss": 0.0221, "step": 972050 }, { "epoch": 9.56, "grad_norm": 0.13462437689304352, "learning_rate": 2.2540141201703953e-07, "loss": 0.0362, "step": 972075 }, { "epoch": 9.56, "grad_norm": 10.28186321258545, "learning_rate": 2.252772895627911e-07, "loss": 0.0134, "step": 972100 }, { "epoch": 9.56, "grad_norm": 1.1346933841705322, "learning_rate": 2.2515316710854264e-07, "loss": 0.0424, "step": 972125 }, { "epoch": 9.56, "grad_norm": 1.0529592037200928, "learning_rate": 2.2502904465429417e-07, "loss": 0.0042, "step": 972150 }, { "epoch": 9.56, "grad_norm": 0.012363670393824577, "learning_rate": 2.249049222000457e-07, "loss": 0.03, "step": 972175 }, { "epoch": 9.56, "grad_norm": 3.62600040435791, "learning_rate": 2.2478079974579723e-07, "loss": 0.0154, "step": 972200 }, { "epoch": 9.56, "grad_norm": 5.4997477531433105, "learning_rate": 2.2465667729154876e-07, "loss": 0.0211, "step": 972225 }, { "epoch": 9.56, "grad_norm": 26.201126098632812, "learning_rate": 2.2453255483730031e-07, "loss": 0.0247, "step": 972250 }, { "epoch": 9.56, "grad_norm": 3.7574968338012695, "learning_rate": 2.2440843238305184e-07, "loss": 0.0336, "step": 972275 }, { "epoch": 9.56, "grad_norm": 0.7494056224822998, "learning_rate": 2.2428430992880337e-07, "loss": 0.0131, "step": 972300 }, { "epoch": 9.56, "grad_norm": 5.326322078704834, "learning_rate": 2.241601874745549e-07, "loss": 0.0331, "step": 972325 }, { "epoch": 9.56, "grad_norm": 10.783034324645996, "learning_rate": 2.2403606502030643e-07, "loss": 0.0166, "step": 972350 }, { "epoch": 9.56, "grad_norm": 0.2553349733352661, "learning_rate": 2.2391194256605796e-07, "loss": 0.0338, "step": 972375 }, { "epoch": 9.56, "grad_norm": 0.2946441173553467, "learning_rate": 2.2378782011180954e-07, "loss": 0.0179, "step": 972400 }, { "epoch": 9.56, "grad_norm": 0.059225261211395264, "learning_rate": 2.2366369765756107e-07, "loss": 0.0443, "step": 972425 }, { "epoch": 9.56, "grad_norm": 11.637368202209473, "learning_rate": 2.235395752033126e-07, "loss": 0.0187, "step": 972450 }, { "epoch": 9.56, "grad_norm": 1.5689849853515625, "learning_rate": 2.2341545274906413e-07, "loss": 0.0321, "step": 972475 }, { "epoch": 9.56, "grad_norm": 1.0288054943084717, "learning_rate": 2.2329133029481566e-07, "loss": 0.007, "step": 972500 }, { "epoch": 9.56, "grad_norm": 0.020327966660261154, "learning_rate": 2.231672078405672e-07, "loss": 0.0483, "step": 972525 }, { "epoch": 9.56, "grad_norm": 12.257787704467773, "learning_rate": 2.2304308538631874e-07, "loss": 0.0171, "step": 972550 }, { "epoch": 9.56, "grad_norm": 0.1727207899093628, "learning_rate": 2.229189629320703e-07, "loss": 0.0439, "step": 972575 }, { "epoch": 9.56, "grad_norm": 14.477718353271484, "learning_rate": 2.2279484047782183e-07, "loss": 0.0134, "step": 972600 }, { "epoch": 9.56, "grad_norm": 0.25056931376457214, "learning_rate": 2.2267071802357336e-07, "loss": 0.0259, "step": 972625 }, { "epoch": 9.56, "grad_norm": 8.022568702697754, "learning_rate": 2.2254659556932488e-07, "loss": 0.0218, "step": 972650 }, { "epoch": 9.56, "grad_norm": 0.0027034806553274393, "learning_rate": 2.2242247311507644e-07, "loss": 0.0272, "step": 972675 }, { "epoch": 9.56, "grad_norm": 1.5418660640716553, "learning_rate": 2.2229835066082797e-07, "loss": 0.0115, "step": 972700 }, { "epoch": 9.56, "grad_norm": 0.13204307854175568, "learning_rate": 2.221742282065795e-07, "loss": 0.0403, "step": 972725 }, { "epoch": 9.56, "grad_norm": 0.44693195819854736, "learning_rate": 2.2205010575233103e-07, "loss": 0.0198, "step": 972750 }, { "epoch": 9.56, "grad_norm": 0.04313376173377037, "learning_rate": 2.2192598329808256e-07, "loss": 0.0524, "step": 972775 }, { "epoch": 9.56, "grad_norm": 6.0581560134887695, "learning_rate": 2.2180186084383414e-07, "loss": 0.025, "step": 972800 }, { "epoch": 9.56, "grad_norm": 1.6932268142700195, "learning_rate": 2.2167773838958567e-07, "loss": 0.031, "step": 972825 }, { "epoch": 9.57, "grad_norm": 2.3268444538116455, "learning_rate": 2.215536159353372e-07, "loss": 0.0221, "step": 972850 }, { "epoch": 9.57, "grad_norm": 0.7963128685951233, "learning_rate": 2.2142949348108873e-07, "loss": 0.0544, "step": 972875 }, { "epoch": 9.57, "grad_norm": 0.06348173320293427, "learning_rate": 2.2130537102684026e-07, "loss": 0.0201, "step": 972900 }, { "epoch": 9.57, "grad_norm": 9.724570274353027, "learning_rate": 2.2118124857259178e-07, "loss": 0.0307, "step": 972925 }, { "epoch": 9.57, "grad_norm": 5.76796817779541, "learning_rate": 2.2105712611834334e-07, "loss": 0.0123, "step": 972950 }, { "epoch": 9.57, "grad_norm": 0.02351270243525505, "learning_rate": 2.2093300366409487e-07, "loss": 0.0397, "step": 972975 }, { "epoch": 9.57, "grad_norm": 5.6170430183410645, "learning_rate": 2.208088812098464e-07, "loss": 0.01, "step": 973000 }, { "epoch": 9.57, "grad_norm": 2.477686882019043, "learning_rate": 2.2068475875559793e-07, "loss": 0.0419, "step": 973025 }, { "epoch": 9.57, "grad_norm": 1.3597875833511353, "learning_rate": 2.2056063630134946e-07, "loss": 0.0052, "step": 973050 }, { "epoch": 9.57, "grad_norm": 0.043482113629579544, "learning_rate": 2.2043651384710099e-07, "loss": 0.0441, "step": 973075 }, { "epoch": 9.57, "grad_norm": 1.2641639709472656, "learning_rate": 2.2031239139285257e-07, "loss": 0.0054, "step": 973100 }, { "epoch": 9.57, "grad_norm": 0.007920211181044579, "learning_rate": 2.201882689386041e-07, "loss": 0.0358, "step": 973125 }, { "epoch": 9.57, "grad_norm": 1.6784099340438843, "learning_rate": 2.2006414648435563e-07, "loss": 0.0173, "step": 973150 }, { "epoch": 9.57, "grad_norm": 6.210472583770752, "learning_rate": 2.1994002403010715e-07, "loss": 0.0339, "step": 973175 }, { "epoch": 9.57, "grad_norm": 2.4005696773529053, "learning_rate": 2.1981590157585868e-07, "loss": 0.0158, "step": 973200 }, { "epoch": 9.57, "grad_norm": 0.09634790569543839, "learning_rate": 2.1969177912161024e-07, "loss": 0.0426, "step": 973225 }, { "epoch": 9.57, "grad_norm": 3.582404851913452, "learning_rate": 2.1956765666736177e-07, "loss": 0.0188, "step": 973250 }, { "epoch": 9.57, "grad_norm": 1.2964434623718262, "learning_rate": 2.1944353421311332e-07, "loss": 0.0355, "step": 973275 }, { "epoch": 9.57, "grad_norm": 6.5839948654174805, "learning_rate": 2.1931941175886485e-07, "loss": 0.0129, "step": 973300 }, { "epoch": 9.57, "grad_norm": 0.06196410208940506, "learning_rate": 2.1919528930461638e-07, "loss": 0.0292, "step": 973325 }, { "epoch": 9.57, "grad_norm": 0.4300350844860077, "learning_rate": 2.190711668503679e-07, "loss": 0.0235, "step": 973350 }, { "epoch": 9.57, "grad_norm": 0.05342354252934456, "learning_rate": 2.1894704439611947e-07, "loss": 0.0224, "step": 973375 }, { "epoch": 9.57, "grad_norm": 24.524215698242188, "learning_rate": 2.18822921941871e-07, "loss": 0.0104, "step": 973400 }, { "epoch": 9.57, "grad_norm": 0.05656055361032486, "learning_rate": 2.1869879948762252e-07, "loss": 0.0313, "step": 973425 }, { "epoch": 9.57, "grad_norm": 1.71878182888031, "learning_rate": 2.1857467703337405e-07, "loss": 0.0165, "step": 973450 }, { "epoch": 9.57, "grad_norm": 0.024455711245536804, "learning_rate": 2.1845055457912558e-07, "loss": 0.0246, "step": 973475 }, { "epoch": 9.57, "grad_norm": 9.477363586425781, "learning_rate": 2.183264321248771e-07, "loss": 0.0122, "step": 973500 }, { "epoch": 9.57, "grad_norm": 0.05427304282784462, "learning_rate": 2.182023096706287e-07, "loss": 0.0377, "step": 973525 }, { "epoch": 9.57, "grad_norm": 0.3450061082839966, "learning_rate": 2.1807818721638022e-07, "loss": 0.0112, "step": 973550 }, { "epoch": 9.57, "grad_norm": 0.01796451210975647, "learning_rate": 2.1795406476213175e-07, "loss": 0.0408, "step": 973575 }, { "epoch": 9.57, "grad_norm": 0.032746944576501846, "learning_rate": 2.1782994230788328e-07, "loss": 0.0108, "step": 973600 }, { "epoch": 9.57, "grad_norm": 4.089269161224365, "learning_rate": 2.177058198536348e-07, "loss": 0.0434, "step": 973625 }, { "epoch": 9.57, "grad_norm": 5.8541669845581055, "learning_rate": 2.1758169739938637e-07, "loss": 0.0195, "step": 973650 }, { "epoch": 9.57, "grad_norm": 0.03954401612281799, "learning_rate": 2.174575749451379e-07, "loss": 0.0464, "step": 973675 }, { "epoch": 9.57, "grad_norm": 13.405460357666016, "learning_rate": 2.1733345249088942e-07, "loss": 0.03, "step": 973700 }, { "epoch": 9.57, "grad_norm": 0.01974288560450077, "learning_rate": 2.1720933003664095e-07, "loss": 0.0265, "step": 973725 }, { "epoch": 9.57, "grad_norm": 0.8517428636550903, "learning_rate": 2.1708520758239248e-07, "loss": 0.0124, "step": 973750 }, { "epoch": 9.57, "grad_norm": 0.9323483109474182, "learning_rate": 2.1696108512814404e-07, "loss": 0.0339, "step": 973775 }, { "epoch": 9.57, "grad_norm": 0.1369052231311798, "learning_rate": 2.168369626738956e-07, "loss": 0.0187, "step": 973800 }, { "epoch": 9.57, "grad_norm": 0.03258330747485161, "learning_rate": 2.1671284021964712e-07, "loss": 0.0352, "step": 973825 }, { "epoch": 9.58, "grad_norm": 1.1918002367019653, "learning_rate": 2.1658871776539865e-07, "loss": 0.0233, "step": 973850 }, { "epoch": 9.58, "grad_norm": 2.9442155361175537, "learning_rate": 2.1646459531115018e-07, "loss": 0.0258, "step": 973875 }, { "epoch": 9.58, "grad_norm": 8.364409446716309, "learning_rate": 2.163404728569017e-07, "loss": 0.014, "step": 973900 }, { "epoch": 9.58, "grad_norm": 0.014529622159898281, "learning_rate": 2.1621635040265324e-07, "loss": 0.0234, "step": 973925 }, { "epoch": 9.58, "grad_norm": 9.140022277832031, "learning_rate": 2.160922279484048e-07, "loss": 0.0194, "step": 973950 }, { "epoch": 9.58, "grad_norm": 0.5580251216888428, "learning_rate": 2.1596810549415635e-07, "loss": 0.0328, "step": 973975 }, { "epoch": 9.58, "grad_norm": 5.24862003326416, "learning_rate": 2.1584398303990788e-07, "loss": 0.0086, "step": 974000 }, { "epoch": 9.58, "grad_norm": 0.013735879212617874, "learning_rate": 2.157198605856594e-07, "loss": 0.0411, "step": 974025 }, { "epoch": 9.58, "grad_norm": 13.281846046447754, "learning_rate": 2.1559573813141094e-07, "loss": 0.0189, "step": 974050 }, { "epoch": 9.58, "grad_norm": 0.05755803361535072, "learning_rate": 2.154716156771625e-07, "loss": 0.0317, "step": 974075 }, { "epoch": 9.58, "grad_norm": 2.8958020210266113, "learning_rate": 2.1534749322291402e-07, "loss": 0.0183, "step": 974100 }, { "epoch": 9.58, "grad_norm": 0.06620004773139954, "learning_rate": 2.1522337076866555e-07, "loss": 0.0286, "step": 974125 }, { "epoch": 9.58, "grad_norm": 9.183210372924805, "learning_rate": 2.1509924831441708e-07, "loss": 0.0135, "step": 974150 }, { "epoch": 9.58, "grad_norm": 6.114920616149902, "learning_rate": 2.149751258601686e-07, "loss": 0.0417, "step": 974175 }, { "epoch": 9.58, "grad_norm": 2.2932374477386475, "learning_rate": 2.1485100340592014e-07, "loss": 0.0152, "step": 974200 }, { "epoch": 9.58, "grad_norm": 4.023693561553955, "learning_rate": 2.1472688095167172e-07, "loss": 0.0446, "step": 974225 }, { "epoch": 9.58, "grad_norm": 13.803010940551758, "learning_rate": 2.1460275849742325e-07, "loss": 0.0173, "step": 974250 }, { "epoch": 9.58, "grad_norm": 0.05379001051187515, "learning_rate": 2.1447863604317478e-07, "loss": 0.0315, "step": 974275 }, { "epoch": 9.58, "grad_norm": 8.783828735351562, "learning_rate": 2.143545135889263e-07, "loss": 0.0191, "step": 974300 }, { "epoch": 9.58, "grad_norm": 0.008559847250580788, "learning_rate": 2.1423039113467784e-07, "loss": 0.0365, "step": 974325 }, { "epoch": 9.58, "grad_norm": 3.538055181503296, "learning_rate": 2.1410626868042937e-07, "loss": 0.0109, "step": 974350 }, { "epoch": 9.58, "grad_norm": 6.472263813018799, "learning_rate": 2.1398214622618092e-07, "loss": 0.0581, "step": 974375 }, { "epoch": 9.58, "grad_norm": 17.426794052124023, "learning_rate": 2.1385802377193245e-07, "loss": 0.0137, "step": 974400 }, { "epoch": 9.58, "grad_norm": 1.870612621307373, "learning_rate": 2.1373390131768398e-07, "loss": 0.0279, "step": 974425 }, { "epoch": 9.58, "grad_norm": 3.786895751953125, "learning_rate": 2.136097788634355e-07, "loss": 0.0177, "step": 974450 }, { "epoch": 9.58, "grad_norm": 0.039820194244384766, "learning_rate": 2.1348565640918706e-07, "loss": 0.0249, "step": 974475 }, { "epoch": 9.58, "grad_norm": 6.498363494873047, "learning_rate": 2.1336153395493862e-07, "loss": 0.0203, "step": 974500 }, { "epoch": 9.58, "grad_norm": 0.838645339012146, "learning_rate": 2.1323741150069015e-07, "loss": 0.0203, "step": 974525 }, { "epoch": 9.58, "grad_norm": 3.275780439376831, "learning_rate": 2.1311328904644168e-07, "loss": 0.0112, "step": 974550 }, { "epoch": 9.58, "grad_norm": 1.209729790687561, "learning_rate": 2.129891665921932e-07, "loss": 0.0285, "step": 974575 }, { "epoch": 9.58, "grad_norm": 0.0447244718670845, "learning_rate": 2.1286504413794474e-07, "loss": 0.015, "step": 974600 }, { "epoch": 9.58, "grad_norm": 0.12157727032899857, "learning_rate": 2.1274092168369627e-07, "loss": 0.0472, "step": 974625 }, { "epoch": 9.58, "grad_norm": 0.21670909225940704, "learning_rate": 2.1261679922944785e-07, "loss": 0.0112, "step": 974650 }, { "epoch": 9.58, "grad_norm": 6.332337379455566, "learning_rate": 2.1249267677519938e-07, "loss": 0.041, "step": 974675 }, { "epoch": 9.58, "grad_norm": 0.3883007764816284, "learning_rate": 2.123685543209509e-07, "loss": 0.0266, "step": 974700 }, { "epoch": 9.58, "grad_norm": 0.048135388642549515, "learning_rate": 2.1224443186670243e-07, "loss": 0.0281, "step": 974725 }, { "epoch": 9.58, "grad_norm": 5.514507293701172, "learning_rate": 2.1212030941245396e-07, "loss": 0.0076, "step": 974750 }, { "epoch": 9.58, "grad_norm": 0.32000166177749634, "learning_rate": 2.1199618695820552e-07, "loss": 0.037, "step": 974775 }, { "epoch": 9.58, "grad_norm": 2.051560640335083, "learning_rate": 2.1187206450395705e-07, "loss": 0.0089, "step": 974800 }, { "epoch": 9.58, "grad_norm": 0.13036668300628662, "learning_rate": 2.117529069478785e-07, "loss": 0.0571, "step": 974825 }, { "epoch": 9.58, "grad_norm": 3.2213284969329834, "learning_rate": 2.1162878449363006e-07, "loss": 0.0201, "step": 974850 }, { "epoch": 9.59, "grad_norm": 0.0973752811551094, "learning_rate": 2.1150466203938158e-07, "loss": 0.0323, "step": 974875 }, { "epoch": 9.59, "grad_norm": 8.656510353088379, "learning_rate": 2.1138053958513311e-07, "loss": 0.0084, "step": 974900 }, { "epoch": 9.59, "grad_norm": 0.02321835421025753, "learning_rate": 2.1125641713088464e-07, "loss": 0.04, "step": 974925 }, { "epoch": 9.59, "grad_norm": 1.4737584590911865, "learning_rate": 2.111322946766362e-07, "loss": 0.0172, "step": 974950 }, { "epoch": 9.59, "grad_norm": 9.215932846069336, "learning_rate": 2.1100817222238775e-07, "loss": 0.0471, "step": 974975 }, { "epoch": 9.59, "grad_norm": 6.917673110961914, "learning_rate": 2.1088404976813928e-07, "loss": 0.0116, "step": 975000 }, { "epoch": 9.59, "grad_norm": 0.05150993913412094, "learning_rate": 2.107599273138908e-07, "loss": 0.0384, "step": 975025 }, { "epoch": 9.59, "grad_norm": 7.18499231338501, "learning_rate": 2.1063580485964234e-07, "loss": 0.0087, "step": 975050 }, { "epoch": 9.59, "grad_norm": 2.4500057697296143, "learning_rate": 2.1051168240539387e-07, "loss": 0.0409, "step": 975075 }, { "epoch": 9.59, "grad_norm": 2.1762521266937256, "learning_rate": 2.103875599511454e-07, "loss": 0.0065, "step": 975100 }, { "epoch": 9.59, "grad_norm": 0.17522241175174713, "learning_rate": 2.1026343749689698e-07, "loss": 0.0564, "step": 975125 }, { "epoch": 9.59, "grad_norm": 4.015005111694336, "learning_rate": 2.101393150426485e-07, "loss": 0.0142, "step": 975150 }, { "epoch": 9.59, "grad_norm": 0.019413424655795097, "learning_rate": 2.1001519258840004e-07, "loss": 0.0522, "step": 975175 }, { "epoch": 9.59, "grad_norm": 0.5179920196533203, "learning_rate": 2.0989107013415157e-07, "loss": 0.0179, "step": 975200 }, { "epoch": 9.59, "grad_norm": 0.09809897094964981, "learning_rate": 2.097669476799031e-07, "loss": 0.0372, "step": 975225 }, { "epoch": 9.59, "grad_norm": 2.8810534477233887, "learning_rate": 2.0964282522565465e-07, "loss": 0.0108, "step": 975250 }, { "epoch": 9.59, "grad_norm": 0.2421596348285675, "learning_rate": 2.0951870277140618e-07, "loss": 0.0385, "step": 975275 }, { "epoch": 9.59, "grad_norm": 2.307326078414917, "learning_rate": 2.093945803171577e-07, "loss": 0.0121, "step": 975300 }, { "epoch": 9.59, "grad_norm": 0.04687961935997009, "learning_rate": 2.0927045786290924e-07, "loss": 0.0387, "step": 975325 }, { "epoch": 9.59, "grad_norm": 1.801830768585205, "learning_rate": 2.0914633540866077e-07, "loss": 0.0046, "step": 975350 }, { "epoch": 9.59, "grad_norm": 0.10303357243537903, "learning_rate": 2.090222129544123e-07, "loss": 0.0237, "step": 975375 }, { "epoch": 9.59, "grad_norm": 8.811783790588379, "learning_rate": 2.0889809050016388e-07, "loss": 0.0141, "step": 975400 }, { "epoch": 9.59, "grad_norm": 1.0858334302902222, "learning_rate": 2.087739680459154e-07, "loss": 0.0387, "step": 975425 }, { "epoch": 9.59, "grad_norm": 10.356719970703125, "learning_rate": 2.0864984559166694e-07, "loss": 0.0142, "step": 975450 }, { "epoch": 9.59, "grad_norm": 0.12138742953538895, "learning_rate": 2.0852572313741847e-07, "loss": 0.037, "step": 975475 }, { "epoch": 9.59, "grad_norm": 1.224862813949585, "learning_rate": 2.0840160068317e-07, "loss": 0.0268, "step": 975500 }, { "epoch": 9.59, "grad_norm": 0.022306809201836586, "learning_rate": 2.0827747822892153e-07, "loss": 0.0314, "step": 975525 }, { "epoch": 9.59, "grad_norm": 7.678967475891113, "learning_rate": 2.0815335577467308e-07, "loss": 0.008, "step": 975550 }, { "epoch": 9.59, "grad_norm": 1.096982479095459, "learning_rate": 2.080292333204246e-07, "loss": 0.0321, "step": 975575 }, { "epoch": 9.59, "grad_norm": 3.327624797821045, "learning_rate": 2.0790511086617614e-07, "loss": 0.01, "step": 975600 }, { "epoch": 9.59, "grad_norm": 0.060461003333330154, "learning_rate": 2.077809884119277e-07, "loss": 0.0364, "step": 975625 }, { "epoch": 9.59, "grad_norm": 0.6669676303863525, "learning_rate": 2.0765686595767922e-07, "loss": 0.0151, "step": 975650 }, { "epoch": 9.59, "grad_norm": 0.038347937166690826, "learning_rate": 2.0753274350343078e-07, "loss": 0.0341, "step": 975675 }, { "epoch": 9.59, "grad_norm": 3.963334321975708, "learning_rate": 2.074086210491823e-07, "loss": 0.0232, "step": 975700 }, { "epoch": 9.59, "grad_norm": 0.058559298515319824, "learning_rate": 2.0728449859493384e-07, "loss": 0.0416, "step": 975725 }, { "epoch": 9.59, "grad_norm": 6.677001476287842, "learning_rate": 2.0716037614068537e-07, "loss": 0.0201, "step": 975750 }, { "epoch": 9.59, "grad_norm": 0.09321776777505875, "learning_rate": 2.070362536864369e-07, "loss": 0.0445, "step": 975775 }, { "epoch": 9.59, "grad_norm": 16.185739517211914, "learning_rate": 2.0691213123218843e-07, "loss": 0.0193, "step": 975800 }, { "epoch": 9.59, "grad_norm": 1.1797175407409668, "learning_rate": 2.0678800877794e-07, "loss": 0.0535, "step": 975825 }, { "epoch": 9.59, "grad_norm": 0.9995810985565186, "learning_rate": 2.0666388632369154e-07, "loss": 0.0277, "step": 975850 }, { "epoch": 9.59, "grad_norm": 2.645777940750122, "learning_rate": 2.0653976386944307e-07, "loss": 0.0274, "step": 975875 }, { "epoch": 9.6, "grad_norm": 0.39052000641822815, "learning_rate": 2.064156414151946e-07, "loss": 0.0164, "step": 975900 }, { "epoch": 9.6, "grad_norm": 0.03088727779686451, "learning_rate": 2.0629151896094612e-07, "loss": 0.0392, "step": 975925 }, { "epoch": 9.6, "grad_norm": 21.336410522460938, "learning_rate": 2.0616739650669765e-07, "loss": 0.0343, "step": 975950 }, { "epoch": 9.6, "grad_norm": 0.10784433782100677, "learning_rate": 2.060432740524492e-07, "loss": 0.0362, "step": 975975 }, { "epoch": 9.6, "grad_norm": 1.791175365447998, "learning_rate": 2.0591915159820074e-07, "loss": 0.0115, "step": 976000 }, { "epoch": 9.6, "grad_norm": 0.012470949441194534, "learning_rate": 2.0579502914395227e-07, "loss": 0.0362, "step": 976025 }, { "epoch": 9.6, "grad_norm": 11.067075729370117, "learning_rate": 2.056709066897038e-07, "loss": 0.014, "step": 976050 }, { "epoch": 9.6, "grad_norm": 3.1855733394622803, "learning_rate": 2.0554678423545533e-07, "loss": 0.0364, "step": 976075 }, { "epoch": 9.6, "grad_norm": 0.13979041576385498, "learning_rate": 2.054226617812069e-07, "loss": 0.0194, "step": 976100 }, { "epoch": 9.6, "grad_norm": 2.6454992294311523, "learning_rate": 2.0529853932695844e-07, "loss": 0.0469, "step": 976125 }, { "epoch": 9.6, "grad_norm": 1.3409576416015625, "learning_rate": 2.0517441687270997e-07, "loss": 0.0088, "step": 976150 }, { "epoch": 9.6, "grad_norm": 11.259638786315918, "learning_rate": 2.050502944184615e-07, "loss": 0.0252, "step": 976175 }, { "epoch": 9.6, "grad_norm": 7.9809346199035645, "learning_rate": 2.0492617196421302e-07, "loss": 0.0088, "step": 976200 }, { "epoch": 9.6, "grad_norm": 0.5433737635612488, "learning_rate": 2.0480204950996455e-07, "loss": 0.0277, "step": 976225 }, { "epoch": 9.6, "grad_norm": 4.726560115814209, "learning_rate": 2.046779270557161e-07, "loss": 0.0135, "step": 976250 }, { "epoch": 9.6, "grad_norm": 1.124742031097412, "learning_rate": 2.0455380460146764e-07, "loss": 0.025, "step": 976275 }, { "epoch": 9.6, "grad_norm": 0.1938445270061493, "learning_rate": 2.0442968214721917e-07, "loss": 0.0132, "step": 976300 }, { "epoch": 9.6, "grad_norm": 0.4342016577720642, "learning_rate": 2.0430555969297072e-07, "loss": 0.0283, "step": 976325 }, { "epoch": 9.6, "grad_norm": 0.10890165716409683, "learning_rate": 2.0418143723872225e-07, "loss": 0.0112, "step": 976350 }, { "epoch": 9.6, "grad_norm": 0.012600066140294075, "learning_rate": 2.0405731478447378e-07, "loss": 0.0326, "step": 976375 }, { "epoch": 9.6, "grad_norm": 2.2741706371307373, "learning_rate": 2.0393319233022534e-07, "loss": 0.0132, "step": 976400 }, { "epoch": 9.6, "grad_norm": 0.06777478754520416, "learning_rate": 2.0380906987597686e-07, "loss": 0.0275, "step": 976425 }, { "epoch": 9.6, "grad_norm": 7.759615898132324, "learning_rate": 2.036849474217284e-07, "loss": 0.0284, "step": 976450 }, { "epoch": 9.6, "grad_norm": 0.1790325790643692, "learning_rate": 2.0356082496747992e-07, "loss": 0.0368, "step": 976475 }, { "epoch": 9.6, "grad_norm": 5.117609977722168, "learning_rate": 2.0343670251323145e-07, "loss": 0.0057, "step": 976500 }, { "epoch": 9.6, "grad_norm": 0.40041065216064453, "learning_rate": 2.0331258005898303e-07, "loss": 0.0437, "step": 976525 }, { "epoch": 9.6, "grad_norm": 31.813749313354492, "learning_rate": 2.0318845760473456e-07, "loss": 0.0158, "step": 976550 }, { "epoch": 9.6, "grad_norm": 0.05354330316185951, "learning_rate": 2.030643351504861e-07, "loss": 0.05, "step": 976575 }, { "epoch": 9.6, "grad_norm": 4.0699896812438965, "learning_rate": 2.0294021269623762e-07, "loss": 0.0123, "step": 976600 }, { "epoch": 9.6, "grad_norm": 2.3636693954467773, "learning_rate": 2.0281609024198915e-07, "loss": 0.0265, "step": 976625 }, { "epoch": 9.6, "grad_norm": 4.912686347961426, "learning_rate": 2.0269196778774068e-07, "loss": 0.0086, "step": 976650 }, { "epoch": 9.6, "grad_norm": 2.1163814067840576, "learning_rate": 2.0256784533349224e-07, "loss": 0.0367, "step": 976675 }, { "epoch": 9.6, "grad_norm": 2.3450300693511963, "learning_rate": 2.0244372287924376e-07, "loss": 0.0155, "step": 976700 }, { "epoch": 9.6, "grad_norm": 0.5181971788406372, "learning_rate": 2.023196004249953e-07, "loss": 0.0521, "step": 976725 }, { "epoch": 9.6, "grad_norm": 2.884575128555298, "learning_rate": 2.0219547797074682e-07, "loss": 0.0079, "step": 976750 }, { "epoch": 9.6, "grad_norm": 0.21000340580940247, "learning_rate": 2.0207135551649835e-07, "loss": 0.0284, "step": 976775 }, { "epoch": 9.6, "grad_norm": 8.227453231811523, "learning_rate": 2.0194723306224993e-07, "loss": 0.0293, "step": 976800 }, { "epoch": 9.6, "grad_norm": 8.143471717834473, "learning_rate": 2.0182311060800146e-07, "loss": 0.0489, "step": 976825 }, { "epoch": 9.6, "grad_norm": 11.103124618530273, "learning_rate": 2.01698988153753e-07, "loss": 0.0172, "step": 976850 }, { "epoch": 9.6, "grad_norm": 0.18639419972896576, "learning_rate": 2.0157486569950452e-07, "loss": 0.0305, "step": 976875 }, { "epoch": 9.61, "grad_norm": 11.834212303161621, "learning_rate": 2.0145074324525605e-07, "loss": 0.0095, "step": 976900 }, { "epoch": 9.61, "grad_norm": 3.7905426025390625, "learning_rate": 2.0132662079100758e-07, "loss": 0.0311, "step": 976925 }, { "epoch": 9.61, "grad_norm": 6.675472736358643, "learning_rate": 2.0120249833675913e-07, "loss": 0.0268, "step": 976950 }, { "epoch": 9.61, "grad_norm": 0.008550863713026047, "learning_rate": 2.0107837588251066e-07, "loss": 0.0244, "step": 976975 }, { "epoch": 9.61, "grad_norm": 5.97639274597168, "learning_rate": 2.009542534282622e-07, "loss": 0.0143, "step": 977000 }, { "epoch": 9.61, "grad_norm": 0.9157442450523376, "learning_rate": 2.0083013097401375e-07, "loss": 0.0246, "step": 977025 }, { "epoch": 9.61, "grad_norm": 6.006672382354736, "learning_rate": 2.0070600851976528e-07, "loss": 0.0183, "step": 977050 }, { "epoch": 9.61, "grad_norm": 0.017990615218877792, "learning_rate": 2.005818860655168e-07, "loss": 0.0276, "step": 977075 }, { "epoch": 9.61, "grad_norm": 3.5457563400268555, "learning_rate": 2.0045776361126836e-07, "loss": 0.0144, "step": 977100 }, { "epoch": 9.61, "grad_norm": 0.6032819747924805, "learning_rate": 2.003336411570199e-07, "loss": 0.0232, "step": 977125 }, { "epoch": 9.61, "grad_norm": 13.090851783752441, "learning_rate": 2.0020951870277142e-07, "loss": 0.0089, "step": 977150 }, { "epoch": 9.61, "grad_norm": 3.375868320465088, "learning_rate": 2.0008539624852295e-07, "loss": 0.0412, "step": 977175 }, { "epoch": 9.61, "grad_norm": 0.13274186849594116, "learning_rate": 1.9996127379427448e-07, "loss": 0.0172, "step": 977200 }, { "epoch": 9.61, "grad_norm": 0.017199410125613213, "learning_rate": 1.9983715134002606e-07, "loss": 0.0387, "step": 977225 }, { "epoch": 9.61, "grad_norm": 1.1967380046844482, "learning_rate": 1.997130288857776e-07, "loss": 0.0117, "step": 977250 }, { "epoch": 9.61, "grad_norm": 0.37246739864349365, "learning_rate": 1.9958890643152912e-07, "loss": 0.0513, "step": 977275 }, { "epoch": 9.61, "grad_norm": 0.6165207624435425, "learning_rate": 1.9946478397728065e-07, "loss": 0.0176, "step": 977300 }, { "epoch": 9.61, "grad_norm": 0.04049589857459068, "learning_rate": 1.9934066152303218e-07, "loss": 0.046, "step": 977325 }, { "epoch": 9.61, "grad_norm": 1.0345447063446045, "learning_rate": 1.992165390687837e-07, "loss": 0.0114, "step": 977350 }, { "epoch": 9.61, "grad_norm": 0.013184495270252228, "learning_rate": 1.9909241661453526e-07, "loss": 0.0357, "step": 977375 }, { "epoch": 9.61, "grad_norm": 0.38713589310646057, "learning_rate": 1.989682941602868e-07, "loss": 0.0103, "step": 977400 }, { "epoch": 9.61, "grad_norm": 1.370193362236023, "learning_rate": 1.9884417170603832e-07, "loss": 0.0379, "step": 977425 }, { "epoch": 9.61, "grad_norm": 8.623882293701172, "learning_rate": 1.9872004925178985e-07, "loss": 0.0085, "step": 977450 }, { "epoch": 9.61, "grad_norm": 4.133882999420166, "learning_rate": 1.9859592679754138e-07, "loss": 0.0444, "step": 977475 }, { "epoch": 9.61, "grad_norm": 0.8977329134941101, "learning_rate": 1.984718043432929e-07, "loss": 0.016, "step": 977500 }, { "epoch": 9.61, "grad_norm": 0.8296647071838379, "learning_rate": 1.983476818890445e-07, "loss": 0.0437, "step": 977525 }, { "epoch": 9.61, "grad_norm": 1.9327603578567505, "learning_rate": 1.9822355943479602e-07, "loss": 0.0157, "step": 977550 }, { "epoch": 9.61, "grad_norm": 0.1258988082408905, "learning_rate": 1.9809943698054755e-07, "loss": 0.0392, "step": 977575 }, { "epoch": 9.61, "grad_norm": 0.05727938190102577, "learning_rate": 1.9797531452629908e-07, "loss": 0.01, "step": 977600 }, { "epoch": 9.61, "grad_norm": 2.017871856689453, "learning_rate": 1.978511920720506e-07, "loss": 0.0396, "step": 977625 }, { "epoch": 9.61, "grad_norm": 5.046768665313721, "learning_rate": 1.9772706961780216e-07, "loss": 0.0169, "step": 977650 }, { "epoch": 9.61, "grad_norm": 0.008217268623411655, "learning_rate": 1.976029471635537e-07, "loss": 0.0206, "step": 977675 }, { "epoch": 9.61, "grad_norm": 0.2656939625740051, "learning_rate": 1.9747882470930522e-07, "loss": 0.025, "step": 977700 }, { "epoch": 9.61, "grad_norm": 0.09731155633926392, "learning_rate": 1.9735470225505677e-07, "loss": 0.0321, "step": 977725 }, { "epoch": 9.61, "grad_norm": 12.101512908935547, "learning_rate": 1.972305798008083e-07, "loss": 0.0149, "step": 977750 }, { "epoch": 9.61, "grad_norm": 0.03720643371343613, "learning_rate": 1.9710645734655983e-07, "loss": 0.03, "step": 977775 }, { "epoch": 9.61, "grad_norm": 2.632134437561035, "learning_rate": 1.969823348923114e-07, "loss": 0.0097, "step": 977800 }, { "epoch": 9.61, "grad_norm": 0.01935841701924801, "learning_rate": 1.9686317733623284e-07, "loss": 0.0439, "step": 977825 }, { "epoch": 9.61, "grad_norm": 2.2289953231811523, "learning_rate": 1.967390548819844e-07, "loss": 0.0204, "step": 977850 }, { "epoch": 9.61, "grad_norm": 1.2426222562789917, "learning_rate": 1.9661493242773592e-07, "loss": 0.0209, "step": 977875 }, { "epoch": 9.61, "grad_norm": 7.404618740081787, "learning_rate": 1.9649080997348745e-07, "loss": 0.0212, "step": 977900 }, { "epoch": 9.62, "grad_norm": 0.11676474660634995, "learning_rate": 1.9636668751923898e-07, "loss": 0.0311, "step": 977925 }, { "epoch": 9.62, "grad_norm": 9.543611526489258, "learning_rate": 1.962425650649905e-07, "loss": 0.0201, "step": 977950 }, { "epoch": 9.62, "grad_norm": 0.09240484982728958, "learning_rate": 1.9611844261074204e-07, "loss": 0.0315, "step": 977975 }, { "epoch": 9.62, "grad_norm": 0.8171278238296509, "learning_rate": 1.9599432015649362e-07, "loss": 0.0107, "step": 978000 }, { "epoch": 9.62, "grad_norm": 0.09858787804841995, "learning_rate": 1.9587019770224515e-07, "loss": 0.0387, "step": 978025 }, { "epoch": 9.62, "grad_norm": 4.9974284172058105, "learning_rate": 1.9574607524799668e-07, "loss": 0.0252, "step": 978050 }, { "epoch": 9.62, "grad_norm": 6.449034214019775, "learning_rate": 1.956219527937482e-07, "loss": 0.0195, "step": 978075 }, { "epoch": 9.62, "grad_norm": 0.35129791498184204, "learning_rate": 1.9549783033949974e-07, "loss": 0.0035, "step": 978100 }, { "epoch": 9.62, "grad_norm": 1.806728482246399, "learning_rate": 1.953737078852513e-07, "loss": 0.0299, "step": 978125 }, { "epoch": 9.62, "grad_norm": 15.363359451293945, "learning_rate": 1.9524958543100282e-07, "loss": 0.0178, "step": 978150 }, { "epoch": 9.62, "grad_norm": 0.004878561478108168, "learning_rate": 1.9512546297675438e-07, "loss": 0.033, "step": 978175 }, { "epoch": 9.62, "grad_norm": 2.8074021339416504, "learning_rate": 1.950013405225059e-07, "loss": 0.0149, "step": 978200 }, { "epoch": 9.62, "grad_norm": 0.032384004443883896, "learning_rate": 1.9487721806825744e-07, "loss": 0.0535, "step": 978225 }, { "epoch": 9.62, "grad_norm": 2.0013206005096436, "learning_rate": 1.9475309561400897e-07, "loss": 0.0199, "step": 978250 }, { "epoch": 9.62, "grad_norm": 1.087753176689148, "learning_rate": 1.9462897315976052e-07, "loss": 0.0255, "step": 978275 }, { "epoch": 9.62, "grad_norm": 9.150458335876465, "learning_rate": 1.9450485070551205e-07, "loss": 0.0212, "step": 978300 }, { "epoch": 9.62, "grad_norm": 1.2211439609527588, "learning_rate": 1.9438072825126358e-07, "loss": 0.0294, "step": 978325 }, { "epoch": 9.62, "grad_norm": 6.666106700897217, "learning_rate": 1.942566057970151e-07, "loss": 0.0129, "step": 978350 }, { "epoch": 9.62, "grad_norm": 0.06866811960935593, "learning_rate": 1.9413248334276664e-07, "loss": 0.031, "step": 978375 }, { "epoch": 9.62, "grad_norm": 0.25383657217025757, "learning_rate": 1.9400836088851817e-07, "loss": 0.0092, "step": 978400 }, { "epoch": 9.62, "grad_norm": 1.2626214027404785, "learning_rate": 1.9388423843426975e-07, "loss": 0.0274, "step": 978425 }, { "epoch": 9.62, "grad_norm": 0.20561517775058746, "learning_rate": 1.9376011598002128e-07, "loss": 0.0143, "step": 978450 }, { "epoch": 9.62, "grad_norm": 1.0516104698181152, "learning_rate": 1.936359935257728e-07, "loss": 0.047, "step": 978475 }, { "epoch": 9.62, "grad_norm": 0.5510926842689514, "learning_rate": 1.9351187107152434e-07, "loss": 0.0225, "step": 978500 }, { "epoch": 9.62, "grad_norm": 0.011143457144498825, "learning_rate": 1.9338774861727587e-07, "loss": 0.0244, "step": 978525 }, { "epoch": 9.62, "grad_norm": 9.925678253173828, "learning_rate": 1.9326362616302742e-07, "loss": 0.0138, "step": 978550 }, { "epoch": 9.62, "grad_norm": 5.206704139709473, "learning_rate": 1.9313950370877895e-07, "loss": 0.0381, "step": 978575 }, { "epoch": 9.62, "grad_norm": 7.431684494018555, "learning_rate": 1.9301538125453048e-07, "loss": 0.0232, "step": 978600 }, { "epoch": 9.62, "grad_norm": 0.22274240851402283, "learning_rate": 1.92891258800282e-07, "loss": 0.0431, "step": 978625 }, { "epoch": 9.62, "grad_norm": 18.899789810180664, "learning_rate": 1.9276713634603354e-07, "loss": 0.0098, "step": 978650 }, { "epoch": 9.62, "grad_norm": 0.12787456810474396, "learning_rate": 1.9264301389178507e-07, "loss": 0.0381, "step": 978675 }, { "epoch": 9.62, "grad_norm": 1.6119805574417114, "learning_rate": 1.9251889143753665e-07, "loss": 0.0111, "step": 978700 }, { "epoch": 9.62, "grad_norm": 0.3800361156463623, "learning_rate": 1.9239476898328818e-07, "loss": 0.0224, "step": 978725 }, { "epoch": 9.62, "grad_norm": 2.1317412853240967, "learning_rate": 1.922706465290397e-07, "loss": 0.0069, "step": 978750 }, { "epoch": 9.62, "grad_norm": 1.1539192199707031, "learning_rate": 1.9214652407479124e-07, "loss": 0.0373, "step": 978775 }, { "epoch": 9.62, "grad_norm": 10.69077205657959, "learning_rate": 1.9202240162054277e-07, "loss": 0.0134, "step": 978800 }, { "epoch": 9.62, "grad_norm": 4.067607879638672, "learning_rate": 1.9189827916629432e-07, "loss": 0.0316, "step": 978825 }, { "epoch": 9.62, "grad_norm": 8.88094711303711, "learning_rate": 1.9177415671204585e-07, "loss": 0.029, "step": 978850 }, { "epoch": 9.62, "grad_norm": 4.479471683502197, "learning_rate": 1.916500342577974e-07, "loss": 0.0409, "step": 978875 }, { "epoch": 9.62, "grad_norm": 2.3697500228881836, "learning_rate": 1.9152591180354893e-07, "loss": 0.0149, "step": 978900 }, { "epoch": 9.62, "grad_norm": 2.9353857040405273, "learning_rate": 1.9140178934930046e-07, "loss": 0.0434, "step": 978925 }, { "epoch": 9.63, "grad_norm": 7.1967878341674805, "learning_rate": 1.91277666895052e-07, "loss": 0.016, "step": 978950 }, { "epoch": 9.63, "grad_norm": 0.005636042915284634, "learning_rate": 1.9115354444080355e-07, "loss": 0.0323, "step": 978975 }, { "epoch": 9.63, "grad_norm": 7.1631245613098145, "learning_rate": 1.9102942198655508e-07, "loss": 0.011, "step": 979000 }, { "epoch": 9.63, "grad_norm": 8.04008960723877, "learning_rate": 1.909052995323066e-07, "loss": 0.0228, "step": 979025 }, { "epoch": 9.63, "grad_norm": 1.2769750356674194, "learning_rate": 1.9078117707805814e-07, "loss": 0.017, "step": 979050 }, { "epoch": 9.63, "grad_norm": 0.02586016058921814, "learning_rate": 1.9065705462380966e-07, "loss": 0.0333, "step": 979075 }, { "epoch": 9.63, "grad_norm": 0.0800110474228859, "learning_rate": 1.905329321695612e-07, "loss": 0.0185, "step": 979100 }, { "epoch": 9.63, "grad_norm": 0.005740617401897907, "learning_rate": 1.9040880971531278e-07, "loss": 0.038, "step": 979125 }, { "epoch": 9.63, "grad_norm": 5.484188079833984, "learning_rate": 1.902846872610643e-07, "loss": 0.0131, "step": 979150 }, { "epoch": 9.63, "grad_norm": 1.4457682371139526, "learning_rate": 1.9016056480681583e-07, "loss": 0.036, "step": 979175 }, { "epoch": 9.63, "grad_norm": 4.75478982925415, "learning_rate": 1.9003644235256736e-07, "loss": 0.0108, "step": 979200 }, { "epoch": 9.63, "grad_norm": 0.04018102586269379, "learning_rate": 1.899123198983189e-07, "loss": 0.0304, "step": 979225 }, { "epoch": 9.63, "grad_norm": 0.08691278100013733, "learning_rate": 1.8978819744407045e-07, "loss": 0.0143, "step": 979250 }, { "epoch": 9.63, "grad_norm": 0.16522316634655, "learning_rate": 1.8966407498982198e-07, "loss": 0.0271, "step": 979275 }, { "epoch": 9.63, "grad_norm": 4.297933101654053, "learning_rate": 1.895399525355735e-07, "loss": 0.0105, "step": 979300 }, { "epoch": 9.63, "grad_norm": 0.1670273244380951, "learning_rate": 1.8941583008132504e-07, "loss": 0.0404, "step": 979325 }, { "epoch": 9.63, "grad_norm": 5.996910095214844, "learning_rate": 1.8929170762707656e-07, "loss": 0.0191, "step": 979350 }, { "epoch": 9.63, "grad_norm": 0.23444542288780212, "learning_rate": 1.8916758517282812e-07, "loss": 0.0282, "step": 979375 }, { "epoch": 9.63, "grad_norm": 2.5832297801971436, "learning_rate": 1.8904346271857968e-07, "loss": 0.0112, "step": 979400 }, { "epoch": 9.63, "grad_norm": 1.5891269445419312, "learning_rate": 1.889193402643312e-07, "loss": 0.0236, "step": 979425 }, { "epoch": 9.63, "grad_norm": 0.11387281864881516, "learning_rate": 1.8879521781008273e-07, "loss": 0.0121, "step": 979450 }, { "epoch": 9.63, "grad_norm": 1.7499449253082275, "learning_rate": 1.8867109535583426e-07, "loss": 0.0404, "step": 979475 }, { "epoch": 9.63, "grad_norm": 6.232461452484131, "learning_rate": 1.885469729015858e-07, "loss": 0.0199, "step": 979500 }, { "epoch": 9.63, "grad_norm": 0.18595246970653534, "learning_rate": 1.8842285044733732e-07, "loss": 0.0296, "step": 979525 }, { "epoch": 9.63, "grad_norm": 8.196715354919434, "learning_rate": 1.8829872799308888e-07, "loss": 0.0152, "step": 979550 }, { "epoch": 9.63, "grad_norm": 7.371064186096191, "learning_rate": 1.8817460553884043e-07, "loss": 0.0294, "step": 979575 }, { "epoch": 9.63, "grad_norm": 5.834207534790039, "learning_rate": 1.8805048308459196e-07, "loss": 0.0186, "step": 979600 }, { "epoch": 9.63, "grad_norm": 4.4265055656433105, "learning_rate": 1.879263606303435e-07, "loss": 0.0527, "step": 979625 }, { "epoch": 9.63, "grad_norm": 7.174193859100342, "learning_rate": 1.8780223817609502e-07, "loss": 0.009, "step": 979650 }, { "epoch": 9.63, "grad_norm": 0.005439075641334057, "learning_rate": 1.8767811572184657e-07, "loss": 0.0297, "step": 979675 }, { "epoch": 9.63, "grad_norm": 12.640421867370605, "learning_rate": 1.875539932675981e-07, "loss": 0.0196, "step": 979700 }, { "epoch": 9.63, "grad_norm": 0.06609600782394409, "learning_rate": 1.8742987081334963e-07, "loss": 0.0309, "step": 979725 }, { "epoch": 9.63, "grad_norm": 9.032833099365234, "learning_rate": 1.8730574835910116e-07, "loss": 0.0254, "step": 979750 }, { "epoch": 9.63, "grad_norm": 0.5738015174865723, "learning_rate": 1.871816259048527e-07, "loss": 0.0237, "step": 979775 }, { "epoch": 9.63, "grad_norm": 1.5172368288040161, "learning_rate": 1.8705750345060422e-07, "loss": 0.0163, "step": 979800 }, { "epoch": 9.63, "grad_norm": 0.0330994613468647, "learning_rate": 1.869333809963558e-07, "loss": 0.0242, "step": 979825 }, { "epoch": 9.63, "grad_norm": 3.627183198928833, "learning_rate": 1.8680925854210733e-07, "loss": 0.0125, "step": 979850 }, { "epoch": 9.63, "grad_norm": 0.010796930640935898, "learning_rate": 1.8668513608785886e-07, "loss": 0.0318, "step": 979875 }, { "epoch": 9.63, "grad_norm": 14.478455543518066, "learning_rate": 1.865610136336104e-07, "loss": 0.0146, "step": 979900 }, { "epoch": 9.63, "grad_norm": 4.060970783233643, "learning_rate": 1.8643689117936192e-07, "loss": 0.0277, "step": 979925 }, { "epoch": 9.64, "grad_norm": 0.53973788022995, "learning_rate": 1.8631276872511345e-07, "loss": 0.0148, "step": 979950 }, { "epoch": 9.64, "grad_norm": 0.03910386562347412, "learning_rate": 1.86188646270865e-07, "loss": 0.0634, "step": 979975 }, { "epoch": 9.64, "grad_norm": 0.04539106413722038, "learning_rate": 1.8606452381661653e-07, "loss": 0.0157, "step": 980000 }, { "epoch": 9.64, "eval_loss": 0.921241044998169, "eval_runtime": 6049.3609, "eval_samples_per_second": 1.565, "eval_steps_per_second": 0.196, "eval_wer": 0.11010429408356402, "step": 980000 }, { "epoch": 9.64, "grad_norm": 1.0969743728637695, "learning_rate": 1.8594536626053804e-07, "loss": 0.0384, "step": 980025 }, { "epoch": 9.64, "grad_norm": 15.452686309814453, "learning_rate": 1.8582124380628957e-07, "loss": 0.017, "step": 980050 }, { "epoch": 9.64, "grad_norm": 0.014825708232820034, "learning_rate": 1.856971213520411e-07, "loss": 0.032, "step": 980075 }, { "epoch": 9.64, "grad_norm": 7.918827056884766, "learning_rate": 1.8557299889779262e-07, "loss": 0.0159, "step": 980100 }, { "epoch": 9.64, "grad_norm": 0.2743096351623535, "learning_rate": 1.8544887644354415e-07, "loss": 0.0203, "step": 980125 }, { "epoch": 9.64, "grad_norm": 13.55541706085205, "learning_rate": 1.853247539892957e-07, "loss": 0.0134, "step": 980150 }, { "epoch": 9.64, "grad_norm": 0.01411558035761118, "learning_rate": 1.8520063153504724e-07, "loss": 0.0303, "step": 980175 }, { "epoch": 9.64, "grad_norm": 7.907729148864746, "learning_rate": 1.8507650908079877e-07, "loss": 0.0127, "step": 980200 }, { "epoch": 9.64, "grad_norm": 0.18828918039798737, "learning_rate": 1.849523866265503e-07, "loss": 0.0488, "step": 980225 }, { "epoch": 9.64, "grad_norm": 6.238397598266602, "learning_rate": 1.8482826417230182e-07, "loss": 0.013, "step": 980250 }, { "epoch": 9.64, "grad_norm": 0.010964788496494293, "learning_rate": 1.8470414171805335e-07, "loss": 0.0352, "step": 980275 }, { "epoch": 9.64, "grad_norm": 0.4582238495349884, "learning_rate": 1.8458001926380494e-07, "loss": 0.0195, "step": 980300 }, { "epoch": 9.64, "grad_norm": 0.0086551858112216, "learning_rate": 1.8445589680955646e-07, "loss": 0.0361, "step": 980325 }, { "epoch": 9.64, "grad_norm": 4.701450347900391, "learning_rate": 1.84331774355308e-07, "loss": 0.0219, "step": 980350 }, { "epoch": 9.64, "grad_norm": 0.05614260584115982, "learning_rate": 1.8420765190105952e-07, "loss": 0.0258, "step": 980375 }, { "epoch": 9.64, "grad_norm": 11.161888122558594, "learning_rate": 1.8408352944681105e-07, "loss": 0.0169, "step": 980400 }, { "epoch": 9.64, "grad_norm": 0.03674137592315674, "learning_rate": 1.8395940699256258e-07, "loss": 0.0376, "step": 980425 }, { "epoch": 9.64, "grad_norm": 0.9230325818061829, "learning_rate": 1.8383528453831414e-07, "loss": 0.0144, "step": 980450 }, { "epoch": 9.64, "grad_norm": 1.411720633506775, "learning_rate": 1.8371116208406567e-07, "loss": 0.0298, "step": 980475 }, { "epoch": 9.64, "grad_norm": 3.450579881668091, "learning_rate": 1.835870396298172e-07, "loss": 0.0092, "step": 980500 }, { "epoch": 9.64, "grad_norm": 0.03294716402888298, "learning_rate": 1.8346291717556875e-07, "loss": 0.0193, "step": 980525 }, { "epoch": 9.64, "grad_norm": 0.043652284890413284, "learning_rate": 1.8333879472132028e-07, "loss": 0.0165, "step": 980550 }, { "epoch": 9.64, "grad_norm": 1.2163468599319458, "learning_rate": 1.8321467226707184e-07, "loss": 0.0533, "step": 980575 }, { "epoch": 9.64, "grad_norm": 9.414804458618164, "learning_rate": 1.8309054981282336e-07, "loss": 0.0265, "step": 980600 }, { "epoch": 9.64, "grad_norm": 2.0209622383117676, "learning_rate": 1.829664273585749e-07, "loss": 0.0375, "step": 980625 }, { "epoch": 9.64, "grad_norm": 0.11608699709177017, "learning_rate": 1.8284230490432642e-07, "loss": 0.0091, "step": 980650 }, { "epoch": 9.64, "grad_norm": 0.058582741767168045, "learning_rate": 1.8271818245007795e-07, "loss": 0.052, "step": 980675 }, { "epoch": 9.64, "grad_norm": 9.558882713317871, "learning_rate": 1.8259405999582948e-07, "loss": 0.0113, "step": 980700 }, { "epoch": 9.64, "grad_norm": 0.9475706815719604, "learning_rate": 1.8246993754158106e-07, "loss": 0.0525, "step": 980725 }, { "epoch": 9.64, "grad_norm": 0.5016587376594543, "learning_rate": 1.823458150873326e-07, "loss": 0.0205, "step": 980750 }, { "epoch": 9.64, "grad_norm": 0.010607931762933731, "learning_rate": 1.8222169263308412e-07, "loss": 0.0177, "step": 980775 }, { "epoch": 9.64, "grad_norm": 2.696295976638794, "learning_rate": 1.8209757017883565e-07, "loss": 0.0206, "step": 980800 }, { "epoch": 9.64, "grad_norm": 0.023459114134311676, "learning_rate": 1.8197344772458718e-07, "loss": 0.0527, "step": 980825 }, { "epoch": 9.64, "grad_norm": 2.5278208255767822, "learning_rate": 1.818493252703387e-07, "loss": 0.0179, "step": 980850 }, { "epoch": 9.64, "grad_norm": 0.02101975306868553, "learning_rate": 1.8172520281609026e-07, "loss": 0.0511, "step": 980875 }, { "epoch": 9.64, "grad_norm": 1.3156306743621826, "learning_rate": 1.816010803618418e-07, "loss": 0.0211, "step": 980900 }, { "epoch": 9.64, "grad_norm": 1.210936188697815, "learning_rate": 1.8147695790759332e-07, "loss": 0.0294, "step": 980925 }, { "epoch": 9.64, "grad_norm": 10.999526023864746, "learning_rate": 1.8135283545334485e-07, "loss": 0.0107, "step": 980950 }, { "epoch": 9.65, "grad_norm": 0.3887912929058075, "learning_rate": 1.8122871299909638e-07, "loss": 0.0287, "step": 980975 }, { "epoch": 9.65, "grad_norm": 7.1477179527282715, "learning_rate": 1.8110459054484796e-07, "loss": 0.0101, "step": 981000 }, { "epoch": 9.65, "grad_norm": 0.01658678613603115, "learning_rate": 1.809804680905995e-07, "loss": 0.0398, "step": 981025 }, { "epoch": 9.65, "grad_norm": 2.68112850189209, "learning_rate": 1.8085634563635102e-07, "loss": 0.0089, "step": 981050 }, { "epoch": 9.65, "grad_norm": 3.563666343688965, "learning_rate": 1.8073222318210255e-07, "loss": 0.0379, "step": 981075 }, { "epoch": 9.65, "grad_norm": 0.42792513966560364, "learning_rate": 1.8060810072785408e-07, "loss": 0.0211, "step": 981100 }, { "epoch": 9.65, "grad_norm": 8.9957857131958, "learning_rate": 1.804839782736056e-07, "loss": 0.0393, "step": 981125 }, { "epoch": 9.65, "grad_norm": 4.7931318283081055, "learning_rate": 1.8035985581935716e-07, "loss": 0.0134, "step": 981150 }, { "epoch": 9.65, "grad_norm": 1.3772612810134888, "learning_rate": 1.802357333651087e-07, "loss": 0.0344, "step": 981175 }, { "epoch": 9.65, "grad_norm": 6.337512493133545, "learning_rate": 1.8011161091086022e-07, "loss": 0.0115, "step": 981200 }, { "epoch": 9.65, "grad_norm": 1.2725344896316528, "learning_rate": 1.7998748845661178e-07, "loss": 0.0422, "step": 981225 }, { "epoch": 9.65, "grad_norm": 2.2121176719665527, "learning_rate": 1.798633660023633e-07, "loss": 0.0161, "step": 981250 }, { "epoch": 9.65, "grad_norm": 0.02252083085477352, "learning_rate": 1.7973924354811486e-07, "loss": 0.0369, "step": 981275 }, { "epoch": 9.65, "grad_norm": 12.761452674865723, "learning_rate": 1.796151210938664e-07, "loss": 0.0211, "step": 981300 }, { "epoch": 9.65, "grad_norm": 0.8697158098220825, "learning_rate": 1.7949099863961792e-07, "loss": 0.0289, "step": 981325 }, { "epoch": 9.65, "grad_norm": 3.0868618488311768, "learning_rate": 1.7936687618536945e-07, "loss": 0.0084, "step": 981350 }, { "epoch": 9.65, "grad_norm": 0.06015344336628914, "learning_rate": 1.7924275373112098e-07, "loss": 0.0267, "step": 981375 }, { "epoch": 9.65, "grad_norm": 5.027022838592529, "learning_rate": 1.791186312768725e-07, "loss": 0.0186, "step": 981400 }, { "epoch": 9.65, "grad_norm": 0.11035081744194031, "learning_rate": 1.789945088226241e-07, "loss": 0.0287, "step": 981425 }, { "epoch": 9.65, "grad_norm": 0.10553515702486038, "learning_rate": 1.7887038636837562e-07, "loss": 0.0186, "step": 981450 }, { "epoch": 9.65, "grad_norm": 0.04089459031820297, "learning_rate": 1.7874626391412715e-07, "loss": 0.0308, "step": 981475 }, { "epoch": 9.65, "grad_norm": 0.1869850903749466, "learning_rate": 1.7862214145987868e-07, "loss": 0.0192, "step": 981500 }, { "epoch": 9.65, "grad_norm": 0.28843843936920166, "learning_rate": 1.784980190056302e-07, "loss": 0.0296, "step": 981525 }, { "epoch": 9.65, "grad_norm": 1.9438543319702148, "learning_rate": 1.7837389655138173e-07, "loss": 0.0096, "step": 981550 }, { "epoch": 9.65, "grad_norm": 8.004678726196289, "learning_rate": 1.782497740971333e-07, "loss": 0.0284, "step": 981575 }, { "epoch": 9.65, "grad_norm": 8.476289749145508, "learning_rate": 1.7812565164288482e-07, "loss": 0.0106, "step": 981600 }, { "epoch": 9.65, "grad_norm": 0.15949437022209167, "learning_rate": 1.7800152918863635e-07, "loss": 0.0427, "step": 981625 }, { "epoch": 9.65, "grad_norm": 4.290894985198975, "learning_rate": 1.7787740673438788e-07, "loss": 0.0126, "step": 981650 }, { "epoch": 9.65, "grad_norm": 0.0352124348282814, "learning_rate": 1.777532842801394e-07, "loss": 0.0331, "step": 981675 }, { "epoch": 9.65, "grad_norm": 1.9299492835998535, "learning_rate": 1.77629161825891e-07, "loss": 0.0048, "step": 981700 }, { "epoch": 9.65, "grad_norm": 9.645939826965332, "learning_rate": 1.7750503937164252e-07, "loss": 0.0551, "step": 981725 }, { "epoch": 9.65, "grad_norm": 0.2920699119567871, "learning_rate": 1.7738091691739405e-07, "loss": 0.0086, "step": 981750 }, { "epoch": 9.65, "grad_norm": 13.579214096069336, "learning_rate": 1.7725679446314558e-07, "loss": 0.0356, "step": 981775 }, { "epoch": 9.65, "grad_norm": 1.7177783250808716, "learning_rate": 1.771326720088971e-07, "loss": 0.0111, "step": 981800 }, { "epoch": 9.65, "grad_norm": 2.390806198120117, "learning_rate": 1.7700854955464863e-07, "loss": 0.031, "step": 981825 }, { "epoch": 9.65, "grad_norm": 2.1059927940368652, "learning_rate": 1.768844271004002e-07, "loss": 0.0107, "step": 981850 }, { "epoch": 9.65, "grad_norm": 0.8835592269897461, "learning_rate": 1.7676030464615172e-07, "loss": 0.0386, "step": 981875 }, { "epoch": 9.65, "grad_norm": 7.70688533782959, "learning_rate": 1.7663618219190325e-07, "loss": 0.0131, "step": 981900 }, { "epoch": 9.65, "grad_norm": 1.1527798175811768, "learning_rate": 1.765120597376548e-07, "loss": 0.0259, "step": 981925 }, { "epoch": 9.65, "grad_norm": 0.7708263993263245, "learning_rate": 1.7638793728340633e-07, "loss": 0.0213, "step": 981950 }, { "epoch": 9.65, "grad_norm": 0.8413852453231812, "learning_rate": 1.7626381482915786e-07, "loss": 0.0272, "step": 981975 }, { "epoch": 9.66, "grad_norm": 0.4505302906036377, "learning_rate": 1.7613969237490942e-07, "loss": 0.0069, "step": 982000 }, { "epoch": 9.66, "grad_norm": 1.9473634958267212, "learning_rate": 1.7601556992066095e-07, "loss": 0.0387, "step": 982025 }, { "epoch": 9.66, "grad_norm": 1.2980068922042847, "learning_rate": 1.7589144746641248e-07, "loss": 0.0172, "step": 982050 }, { "epoch": 9.66, "grad_norm": 0.03849369287490845, "learning_rate": 1.75767325012164e-07, "loss": 0.033, "step": 982075 }, { "epoch": 9.66, "grad_norm": 3.0677380561828613, "learning_rate": 1.7564320255791553e-07, "loss": 0.0147, "step": 982100 }, { "epoch": 9.66, "grad_norm": 0.06294285506010056, "learning_rate": 1.7551908010366712e-07, "loss": 0.0368, "step": 982125 }, { "epoch": 9.66, "grad_norm": 2.7301032543182373, "learning_rate": 1.7539495764941864e-07, "loss": 0.0184, "step": 982150 }, { "epoch": 9.66, "grad_norm": 0.006948218215256929, "learning_rate": 1.7527580009334012e-07, "loss": 0.0416, "step": 982175 }, { "epoch": 9.66, "grad_norm": 3.586878776550293, "learning_rate": 1.7515167763909165e-07, "loss": 0.0211, "step": 982200 }, { "epoch": 9.66, "grad_norm": 0.14405378699302673, "learning_rate": 1.7502755518484318e-07, "loss": 0.023, "step": 982225 }, { "epoch": 9.66, "grad_norm": 1.721113681793213, "learning_rate": 1.749034327305947e-07, "loss": 0.0073, "step": 982250 }, { "epoch": 9.66, "grad_norm": 1.396126627922058, "learning_rate": 1.7477931027634624e-07, "loss": 0.0301, "step": 982275 }, { "epoch": 9.66, "grad_norm": 1.2625346183776855, "learning_rate": 1.7465518782209777e-07, "loss": 0.0175, "step": 982300 }, { "epoch": 9.66, "grad_norm": 0.13230662047863007, "learning_rate": 1.7453106536784932e-07, "loss": 0.0267, "step": 982325 }, { "epoch": 9.66, "grad_norm": 8.113931655883789, "learning_rate": 1.7440694291360085e-07, "loss": 0.0192, "step": 982350 }, { "epoch": 9.66, "grad_norm": 0.009043651632964611, "learning_rate": 1.742828204593524e-07, "loss": 0.0314, "step": 982375 }, { "epoch": 9.66, "grad_norm": 8.40029525756836, "learning_rate": 1.7415869800510394e-07, "loss": 0.0136, "step": 982400 }, { "epoch": 9.66, "grad_norm": 0.15017572045326233, "learning_rate": 1.7403457555085547e-07, "loss": 0.041, "step": 982425 }, { "epoch": 9.66, "grad_norm": 0.06946081668138504, "learning_rate": 1.73910453096607e-07, "loss": 0.0133, "step": 982450 }, { "epoch": 9.66, "grad_norm": 0.0389554537832737, "learning_rate": 1.7378633064235855e-07, "loss": 0.0362, "step": 982475 }, { "epoch": 9.66, "grad_norm": 4.980523109436035, "learning_rate": 1.7366220818811008e-07, "loss": 0.0149, "step": 982500 }, { "epoch": 9.66, "grad_norm": 5.154813766479492, "learning_rate": 1.735380857338616e-07, "loss": 0.0449, "step": 982525 }, { "epoch": 9.66, "grad_norm": 9.448796272277832, "learning_rate": 1.7341396327961314e-07, "loss": 0.0215, "step": 982550 }, { "epoch": 9.66, "grad_norm": 2.844585418701172, "learning_rate": 1.7328984082536467e-07, "loss": 0.0424, "step": 982575 }, { "epoch": 9.66, "grad_norm": 13.275786399841309, "learning_rate": 1.7316571837111625e-07, "loss": 0.0068, "step": 982600 }, { "epoch": 9.66, "grad_norm": 6.047725200653076, "learning_rate": 1.7304159591686778e-07, "loss": 0.036, "step": 982625 }, { "epoch": 9.66, "grad_norm": 7.505938529968262, "learning_rate": 1.729174734626193e-07, "loss": 0.0095, "step": 982650 }, { "epoch": 9.66, "grad_norm": 0.32393679022789, "learning_rate": 1.7279335100837084e-07, "loss": 0.0416, "step": 982675 }, { "epoch": 9.66, "grad_norm": 22.265050888061523, "learning_rate": 1.7266922855412237e-07, "loss": 0.0124, "step": 982700 }, { "epoch": 9.66, "grad_norm": 0.021592849865555763, "learning_rate": 1.725451060998739e-07, "loss": 0.0356, "step": 982725 }, { "epoch": 9.66, "grad_norm": 14.941314697265625, "learning_rate": 1.7242098364562545e-07, "loss": 0.0168, "step": 982750 }, { "epoch": 9.66, "grad_norm": 0.0449497364461422, "learning_rate": 1.7229686119137698e-07, "loss": 0.025, "step": 982775 }, { "epoch": 9.66, "grad_norm": 0.4075601398944855, "learning_rate": 1.721727387371285e-07, "loss": 0.015, "step": 982800 }, { "epoch": 9.66, "grad_norm": 0.1701282411813736, "learning_rate": 1.7204861628288004e-07, "loss": 0.0411, "step": 982825 }, { "epoch": 9.66, "grad_norm": 0.17612741887569427, "learning_rate": 1.7192449382863157e-07, "loss": 0.0245, "step": 982850 }, { "epoch": 9.66, "grad_norm": 0.14441214501857758, "learning_rate": 1.718003713743831e-07, "loss": 0.0447, "step": 982875 }, { "epoch": 9.66, "grad_norm": 8.161413192749023, "learning_rate": 1.7167624892013468e-07, "loss": 0.0204, "step": 982900 }, { "epoch": 9.66, "grad_norm": 0.048749297857284546, "learning_rate": 1.715521264658862e-07, "loss": 0.039, "step": 982925 }, { "epoch": 9.66, "grad_norm": 1.690470814704895, "learning_rate": 1.7142800401163774e-07, "loss": 0.0155, "step": 982950 }, { "epoch": 9.66, "grad_norm": 2.9893839359283447, "learning_rate": 1.7130388155738926e-07, "loss": 0.0335, "step": 982975 }, { "epoch": 9.67, "grad_norm": 0.14374007284641266, "learning_rate": 1.711797591031408e-07, "loss": 0.0135, "step": 983000 }, { "epoch": 9.67, "grad_norm": 5.076016902923584, "learning_rate": 1.7105563664889235e-07, "loss": 0.0401, "step": 983025 }, { "epoch": 9.67, "grad_norm": 3.6678919792175293, "learning_rate": 1.7093151419464388e-07, "loss": 0.0149, "step": 983050 }, { "epoch": 9.67, "grad_norm": 0.049118805676698685, "learning_rate": 1.7080739174039543e-07, "loss": 0.0198, "step": 983075 }, { "epoch": 9.67, "grad_norm": 9.907928466796875, "learning_rate": 1.7068326928614696e-07, "loss": 0.0195, "step": 983100 }, { "epoch": 9.67, "grad_norm": 0.7760645747184753, "learning_rate": 1.705591468318985e-07, "loss": 0.0487, "step": 983125 }, { "epoch": 9.67, "grad_norm": 9.72106647491455, "learning_rate": 1.7043502437765002e-07, "loss": 0.0158, "step": 983150 }, { "epoch": 9.67, "grad_norm": 1.2018128633499146, "learning_rate": 1.7031090192340158e-07, "loss": 0.0511, "step": 983175 }, { "epoch": 9.67, "grad_norm": 0.6913723945617676, "learning_rate": 1.701867794691531e-07, "loss": 0.015, "step": 983200 }, { "epoch": 9.67, "grad_norm": 0.022446775808930397, "learning_rate": 1.7006265701490464e-07, "loss": 0.0219, "step": 983225 }, { "epoch": 9.67, "grad_norm": 4.6973185539245605, "learning_rate": 1.6993853456065616e-07, "loss": 0.016, "step": 983250 }, { "epoch": 9.67, "grad_norm": 1.2157057523727417, "learning_rate": 1.698144121064077e-07, "loss": 0.0607, "step": 983275 }, { "epoch": 9.67, "grad_norm": 4.4137654304504395, "learning_rate": 1.6969028965215928e-07, "loss": 0.0133, "step": 983300 }, { "epoch": 9.67, "grad_norm": 0.35231515765190125, "learning_rate": 1.695661671979108e-07, "loss": 0.0235, "step": 983325 }, { "epoch": 9.67, "grad_norm": 3.033630132675171, "learning_rate": 1.6944204474366233e-07, "loss": 0.0214, "step": 983350 }, { "epoch": 9.67, "grad_norm": 0.5593015551567078, "learning_rate": 1.6931792228941386e-07, "loss": 0.0202, "step": 983375 }, { "epoch": 9.67, "grad_norm": 11.492630004882812, "learning_rate": 1.691937998351654e-07, "loss": 0.0072, "step": 983400 }, { "epoch": 9.67, "grad_norm": 0.18696607649326324, "learning_rate": 1.6906967738091692e-07, "loss": 0.0298, "step": 983425 }, { "epoch": 9.67, "grad_norm": 0.07032451033592224, "learning_rate": 1.6894555492666848e-07, "loss": 0.0147, "step": 983450 }, { "epoch": 9.67, "grad_norm": 0.1731286644935608, "learning_rate": 1.6882143247242e-07, "loss": 0.0191, "step": 983475 }, { "epoch": 9.67, "grad_norm": 0.45793241262435913, "learning_rate": 1.6869731001817153e-07, "loss": 0.0164, "step": 983500 }, { "epoch": 9.67, "grad_norm": 0.20558054745197296, "learning_rate": 1.6857318756392306e-07, "loss": 0.0308, "step": 983525 }, { "epoch": 9.67, "grad_norm": 3.042012929916382, "learning_rate": 1.684490651096746e-07, "loss": 0.0096, "step": 983550 }, { "epoch": 9.67, "grad_norm": 9.063183784484863, "learning_rate": 1.6832494265542612e-07, "loss": 0.0473, "step": 983575 }, { "epoch": 9.67, "grad_norm": 0.2314891815185547, "learning_rate": 1.682008202011777e-07, "loss": 0.0207, "step": 983600 }, { "epoch": 9.67, "grad_norm": 0.03524072468280792, "learning_rate": 1.6807669774692923e-07, "loss": 0.053, "step": 983625 }, { "epoch": 9.67, "grad_norm": 7.352253437042236, "learning_rate": 1.6795257529268076e-07, "loss": 0.0173, "step": 983650 }, { "epoch": 9.67, "grad_norm": 0.3819020986557007, "learning_rate": 1.678284528384323e-07, "loss": 0.0457, "step": 983675 }, { "epoch": 9.67, "grad_norm": 4.109062671661377, "learning_rate": 1.6770433038418382e-07, "loss": 0.0052, "step": 983700 }, { "epoch": 9.67, "grad_norm": 6.325214385986328, "learning_rate": 1.6758020792993538e-07, "loss": 0.0369, "step": 983725 }, { "epoch": 9.67, "grad_norm": 0.8548529744148254, "learning_rate": 1.674560854756869e-07, "loss": 0.0089, "step": 983750 }, { "epoch": 9.67, "grad_norm": 0.11223957687616348, "learning_rate": 1.6733196302143846e-07, "loss": 0.0478, "step": 983775 }, { "epoch": 9.67, "grad_norm": 0.3299190104007721, "learning_rate": 1.6720784056719e-07, "loss": 0.0084, "step": 983800 }, { "epoch": 9.67, "grad_norm": 0.08902042359113693, "learning_rate": 1.6708371811294152e-07, "loss": 0.0419, "step": 983825 }, { "epoch": 9.67, "grad_norm": 7.070740222930908, "learning_rate": 1.6695959565869305e-07, "loss": 0.0159, "step": 983850 }, { "epoch": 9.67, "grad_norm": 0.3765157163143158, "learning_rate": 1.668354732044446e-07, "loss": 0.0323, "step": 983875 }, { "epoch": 9.67, "grad_norm": 3.4062657356262207, "learning_rate": 1.6671135075019613e-07, "loss": 0.0215, "step": 983900 }, { "epoch": 9.67, "grad_norm": 9.81689739227295, "learning_rate": 1.6658722829594766e-07, "loss": 0.037, "step": 983925 }, { "epoch": 9.67, "grad_norm": 2.851017713546753, "learning_rate": 1.664631058416992e-07, "loss": 0.016, "step": 983950 }, { "epoch": 9.67, "grad_norm": 0.017811445519328117, "learning_rate": 1.6633898338745072e-07, "loss": 0.0362, "step": 983975 }, { "epoch": 9.67, "grad_norm": 2.686802864074707, "learning_rate": 1.6621486093320225e-07, "loss": 0.0166, "step": 984000 }, { "epoch": 9.68, "grad_norm": 0.2665751278400421, "learning_rate": 1.6609073847895383e-07, "loss": 0.0289, "step": 984025 }, { "epoch": 9.68, "grad_norm": 6.954325199127197, "learning_rate": 1.6596661602470536e-07, "loss": 0.0151, "step": 984050 }, { "epoch": 9.68, "grad_norm": 3.3288068771362305, "learning_rate": 1.658424935704569e-07, "loss": 0.0276, "step": 984075 }, { "epoch": 9.68, "grad_norm": 7.891500473022461, "learning_rate": 1.6571837111620842e-07, "loss": 0.0161, "step": 984100 }, { "epoch": 9.68, "grad_norm": 1.3377220630645752, "learning_rate": 1.6559424866195995e-07, "loss": 0.0275, "step": 984125 }, { "epoch": 9.68, "grad_norm": 1.1721711158752441, "learning_rate": 1.654701262077115e-07, "loss": 0.0158, "step": 984150 }, { "epoch": 9.68, "grad_norm": 1.475309133529663, "learning_rate": 1.6534600375346303e-07, "loss": 0.0406, "step": 984175 }, { "epoch": 9.68, "grad_norm": 23.24230194091797, "learning_rate": 1.6522188129921456e-07, "loss": 0.0276, "step": 984200 }, { "epoch": 9.68, "grad_norm": 0.02225235477089882, "learning_rate": 1.650977588449661e-07, "loss": 0.0205, "step": 984225 }, { "epoch": 9.68, "grad_norm": 2.777453899383545, "learning_rate": 1.6497363639071762e-07, "loss": 0.0173, "step": 984250 }, { "epoch": 9.68, "grad_norm": 0.060021501034498215, "learning_rate": 1.6484951393646917e-07, "loss": 0.0332, "step": 984275 }, { "epoch": 9.68, "grad_norm": 2.0061538219451904, "learning_rate": 1.6472539148222073e-07, "loss": 0.0165, "step": 984300 }, { "epoch": 9.68, "grad_norm": 1.5257006883621216, "learning_rate": 1.6460126902797226e-07, "loss": 0.0393, "step": 984325 }, { "epoch": 9.68, "grad_norm": 6.011953353881836, "learning_rate": 1.644771465737238e-07, "loss": 0.0148, "step": 984350 }, { "epoch": 9.68, "grad_norm": 1.1578959226608276, "learning_rate": 1.6435302411947532e-07, "loss": 0.0306, "step": 984375 }, { "epoch": 9.68, "grad_norm": 7.419875144958496, "learning_rate": 1.6422890166522685e-07, "loss": 0.0212, "step": 984400 }, { "epoch": 9.68, "grad_norm": 0.018009968101978302, "learning_rate": 1.6410477921097838e-07, "loss": 0.0262, "step": 984425 }, { "epoch": 9.68, "grad_norm": 4.086841583251953, "learning_rate": 1.6398065675672993e-07, "loss": 0.0135, "step": 984450 }, { "epoch": 9.68, "grad_norm": 0.008494445122778416, "learning_rate": 1.638565343024815e-07, "loss": 0.0427, "step": 984475 }, { "epoch": 9.68, "grad_norm": 4.615043640136719, "learning_rate": 1.6373241184823302e-07, "loss": 0.009, "step": 984500 }, { "epoch": 9.68, "grad_norm": 6.333624362945557, "learning_rate": 1.6360828939398455e-07, "loss": 0.0284, "step": 984525 }, { "epoch": 9.68, "grad_norm": 4.334429740905762, "learning_rate": 1.6348416693973607e-07, "loss": 0.0208, "step": 984550 }, { "epoch": 9.68, "grad_norm": 0.06385612487792969, "learning_rate": 1.6336004448548763e-07, "loss": 0.0489, "step": 984575 }, { "epoch": 9.68, "grad_norm": 16.2380428314209, "learning_rate": 1.6323592203123916e-07, "loss": 0.0142, "step": 984600 }, { "epoch": 9.68, "grad_norm": 0.20570771396160126, "learning_rate": 1.631117995769907e-07, "loss": 0.0442, "step": 984625 }, { "epoch": 9.68, "grad_norm": 0.16306164860725403, "learning_rate": 1.6298767712274222e-07, "loss": 0.0111, "step": 984650 }, { "epoch": 9.68, "grad_norm": 0.05290300026535988, "learning_rate": 1.6286355466849375e-07, "loss": 0.0434, "step": 984675 }, { "epoch": 9.68, "grad_norm": 9.237746238708496, "learning_rate": 1.6273943221424528e-07, "loss": 0.0122, "step": 984700 }, { "epoch": 9.68, "grad_norm": 0.2493850141763687, "learning_rate": 1.6261530975999686e-07, "loss": 0.0256, "step": 984725 }, { "epoch": 9.68, "grad_norm": 11.6334810256958, "learning_rate": 1.6249118730574839e-07, "loss": 0.0262, "step": 984750 }, { "epoch": 9.68, "grad_norm": 7.403758525848389, "learning_rate": 1.6236706485149992e-07, "loss": 0.0273, "step": 984775 }, { "epoch": 9.68, "grad_norm": 10.134218215942383, "learning_rate": 1.6224294239725144e-07, "loss": 0.0174, "step": 984800 }, { "epoch": 9.68, "grad_norm": 1.9632763862609863, "learning_rate": 1.6211881994300297e-07, "loss": 0.0421, "step": 984825 }, { "epoch": 9.68, "grad_norm": 2.948575019836426, "learning_rate": 1.6199469748875453e-07, "loss": 0.0161, "step": 984850 }, { "epoch": 9.68, "grad_norm": 0.02388293296098709, "learning_rate": 1.6187553993267598e-07, "loss": 0.0342, "step": 984875 }, { "epoch": 9.68, "grad_norm": 22.005552291870117, "learning_rate": 1.617514174784275e-07, "loss": 0.0091, "step": 984900 }, { "epoch": 9.68, "grad_norm": 0.034558266401290894, "learning_rate": 1.616272950241791e-07, "loss": 0.0519, "step": 984925 }, { "epoch": 9.68, "grad_norm": 16.131620407104492, "learning_rate": 1.6150317256993062e-07, "loss": 0.0136, "step": 984950 }, { "epoch": 9.68, "grad_norm": 1.4402960538864136, "learning_rate": 1.6137905011568215e-07, "loss": 0.0454, "step": 984975 }, { "epoch": 9.68, "grad_norm": 17.90183448791504, "learning_rate": 1.6125492766143368e-07, "loss": 0.0107, "step": 985000 }, { "epoch": 9.68, "grad_norm": 0.10294441133737564, "learning_rate": 1.611308052071852e-07, "loss": 0.0383, "step": 985025 }, { "epoch": 9.69, "grad_norm": 0.26711949706077576, "learning_rate": 1.6100668275293676e-07, "loss": 0.011, "step": 985050 }, { "epoch": 9.69, "grad_norm": 1.2904008626937866, "learning_rate": 1.608825602986883e-07, "loss": 0.0376, "step": 985075 }, { "epoch": 9.69, "grad_norm": 10.687983512878418, "learning_rate": 1.6075843784443982e-07, "loss": 0.0115, "step": 985100 }, { "epoch": 9.69, "grad_norm": 0.0038281152956187725, "learning_rate": 1.6063431539019135e-07, "loss": 0.0392, "step": 985125 }, { "epoch": 9.69, "grad_norm": 3.4322805404663086, "learning_rate": 1.6051019293594288e-07, "loss": 0.0133, "step": 985150 }, { "epoch": 9.69, "grad_norm": 0.016500210389494896, "learning_rate": 1.603860704816944e-07, "loss": 0.0389, "step": 985175 }, { "epoch": 9.69, "grad_norm": 10.158833503723145, "learning_rate": 1.60261948027446e-07, "loss": 0.0121, "step": 985200 }, { "epoch": 9.69, "grad_norm": 0.018519660457968712, "learning_rate": 1.6013782557319752e-07, "loss": 0.0404, "step": 985225 }, { "epoch": 9.69, "grad_norm": 1.280031681060791, "learning_rate": 1.6001370311894905e-07, "loss": 0.0118, "step": 985250 }, { "epoch": 9.69, "grad_norm": 0.859063446521759, "learning_rate": 1.5988958066470058e-07, "loss": 0.0382, "step": 985275 }, { "epoch": 9.69, "grad_norm": 1.0314918756484985, "learning_rate": 1.597654582104521e-07, "loss": 0.0144, "step": 985300 }, { "epoch": 9.69, "grad_norm": 0.055862389504909515, "learning_rate": 1.5964133575620366e-07, "loss": 0.0421, "step": 985325 }, { "epoch": 9.69, "grad_norm": 10.837141036987305, "learning_rate": 1.595172133019552e-07, "loss": 0.018, "step": 985350 }, { "epoch": 9.69, "grad_norm": 9.763242721557617, "learning_rate": 1.5939309084770672e-07, "loss": 0.0464, "step": 985375 }, { "epoch": 9.69, "grad_norm": 4.4265031814575195, "learning_rate": 1.5926896839345825e-07, "loss": 0.0161, "step": 985400 }, { "epoch": 9.69, "grad_norm": 0.2547594904899597, "learning_rate": 1.5914484593920978e-07, "loss": 0.0377, "step": 985425 }, { "epoch": 9.69, "grad_norm": 7.193787097930908, "learning_rate": 1.5902072348496133e-07, "loss": 0.02, "step": 985450 }, { "epoch": 9.69, "grad_norm": 0.6944178938865662, "learning_rate": 1.588966010307129e-07, "loss": 0.0446, "step": 985475 }, { "epoch": 9.69, "grad_norm": 0.2687954306602478, "learning_rate": 1.5877247857646442e-07, "loss": 0.0211, "step": 985500 }, { "epoch": 9.69, "grad_norm": 6.0821614265441895, "learning_rate": 1.5864835612221595e-07, "loss": 0.0387, "step": 985525 }, { "epoch": 9.69, "grad_norm": 3.2944045066833496, "learning_rate": 1.5852423366796748e-07, "loss": 0.0175, "step": 985550 }, { "epoch": 9.69, "grad_norm": 0.022688787430524826, "learning_rate": 1.58400111213719e-07, "loss": 0.0395, "step": 985575 }, { "epoch": 9.69, "grad_norm": 1.841929316520691, "learning_rate": 1.5827598875947054e-07, "loss": 0.0282, "step": 985600 }, { "epoch": 9.69, "grad_norm": 0.01698530837893486, "learning_rate": 1.5815186630522212e-07, "loss": 0.0359, "step": 985625 }, { "epoch": 9.69, "grad_norm": 3.1759469509124756, "learning_rate": 1.5802774385097365e-07, "loss": 0.0082, "step": 985650 }, { "epoch": 9.69, "grad_norm": 1.6542772054672241, "learning_rate": 1.5790362139672518e-07, "loss": 0.0339, "step": 985675 }, { "epoch": 9.69, "grad_norm": 0.250849187374115, "learning_rate": 1.577794989424767e-07, "loss": 0.0142, "step": 985700 }, { "epoch": 9.69, "grad_norm": 0.0234025027602911, "learning_rate": 1.5765537648822823e-07, "loss": 0.0382, "step": 985725 }, { "epoch": 9.69, "grad_norm": 8.427675247192383, "learning_rate": 1.575312540339798e-07, "loss": 0.0092, "step": 985750 }, { "epoch": 9.69, "grad_norm": 1.2138196229934692, "learning_rate": 1.5740713157973132e-07, "loss": 0.04, "step": 985775 }, { "epoch": 9.69, "grad_norm": 0.2541772425174713, "learning_rate": 1.5728300912548285e-07, "loss": 0.0171, "step": 985800 }, { "epoch": 9.69, "grad_norm": 1.232251763343811, "learning_rate": 1.5715888667123438e-07, "loss": 0.0351, "step": 985825 }, { "epoch": 9.69, "grad_norm": 9.789664268493652, "learning_rate": 1.570347642169859e-07, "loss": 0.0139, "step": 985850 }, { "epoch": 9.69, "grad_norm": 2.370267868041992, "learning_rate": 1.5691064176273744e-07, "loss": 0.0371, "step": 985875 }, { "epoch": 9.69, "grad_norm": 0.21749109029769897, "learning_rate": 1.5678651930848902e-07, "loss": 0.0191, "step": 985900 }, { "epoch": 9.69, "grad_norm": 0.053113095462322235, "learning_rate": 1.5666239685424055e-07, "loss": 0.0527, "step": 985925 }, { "epoch": 9.69, "grad_norm": 10.368660926818848, "learning_rate": 1.5653827439999208e-07, "loss": 0.0116, "step": 985950 }, { "epoch": 9.69, "grad_norm": 0.0027886333409696817, "learning_rate": 1.564141519457436e-07, "loss": 0.0326, "step": 985975 }, { "epoch": 9.69, "grad_norm": 5.658227443695068, "learning_rate": 1.5629002949149513e-07, "loss": 0.0063, "step": 986000 }, { "epoch": 9.69, "grad_norm": 0.019168900325894356, "learning_rate": 1.561659070372467e-07, "loss": 0.0465, "step": 986025 }, { "epoch": 9.7, "grad_norm": 0.751640260219574, "learning_rate": 1.5604178458299822e-07, "loss": 0.0088, "step": 986050 }, { "epoch": 9.7, "grad_norm": 0.011510200798511505, "learning_rate": 1.5591766212874975e-07, "loss": 0.0368, "step": 986075 }, { "epoch": 9.7, "grad_norm": 0.28487634658813477, "learning_rate": 1.5579353967450128e-07, "loss": 0.0204, "step": 986100 }, { "epoch": 9.7, "grad_norm": 4.185334205627441, "learning_rate": 1.5566941722025283e-07, "loss": 0.0329, "step": 986125 }, { "epoch": 9.7, "grad_norm": 0.2563127875328064, "learning_rate": 1.5554529476600436e-07, "loss": 0.0157, "step": 986150 }, { "epoch": 9.7, "grad_norm": 3.3832929134368896, "learning_rate": 1.554211723117559e-07, "loss": 0.0485, "step": 986175 }, { "epoch": 9.7, "grad_norm": 4.107012748718262, "learning_rate": 1.5529704985750742e-07, "loss": 0.0119, "step": 986200 }, { "epoch": 9.7, "grad_norm": 2.2184879779815674, "learning_rate": 1.5517292740325898e-07, "loss": 0.0472, "step": 986225 }, { "epoch": 9.7, "grad_norm": 10.099973678588867, "learning_rate": 1.550488049490105e-07, "loss": 0.013, "step": 986250 }, { "epoch": 9.7, "grad_norm": 0.0655449628829956, "learning_rate": 1.5492468249476203e-07, "loss": 0.0333, "step": 986275 }, { "epoch": 9.7, "grad_norm": 3.4663045406341553, "learning_rate": 1.548005600405136e-07, "loss": 0.0089, "step": 986300 }, { "epoch": 9.7, "grad_norm": 0.005231853108853102, "learning_rate": 1.5467643758626512e-07, "loss": 0.0489, "step": 986325 }, { "epoch": 9.7, "grad_norm": 3.371624708175659, "learning_rate": 1.5455231513201667e-07, "loss": 0.0257, "step": 986350 }, { "epoch": 9.7, "grad_norm": 0.5989202857017517, "learning_rate": 1.544281926777682e-07, "loss": 0.0409, "step": 986375 }, { "epoch": 9.7, "grad_norm": 9.988656997680664, "learning_rate": 1.5430407022351973e-07, "loss": 0.0128, "step": 986400 }, { "epoch": 9.7, "grad_norm": 0.9426703453063965, "learning_rate": 1.5417994776927126e-07, "loss": 0.0173, "step": 986425 }, { "epoch": 9.7, "grad_norm": 5.136968612670898, "learning_rate": 1.540558253150228e-07, "loss": 0.0167, "step": 986450 }, { "epoch": 9.7, "grad_norm": 0.01583380065858364, "learning_rate": 1.5393170286077435e-07, "loss": 0.0285, "step": 986475 }, { "epoch": 9.7, "grad_norm": 5.903493881225586, "learning_rate": 1.5380758040652587e-07, "loss": 0.0227, "step": 986500 }, { "epoch": 9.7, "grad_norm": 5.935892581939697, "learning_rate": 1.536834579522774e-07, "loss": 0.0349, "step": 986525 }, { "epoch": 9.7, "grad_norm": 6.636220932006836, "learning_rate": 1.5355933549802893e-07, "loss": 0.0114, "step": 986550 }, { "epoch": 9.7, "grad_norm": 5.386896133422852, "learning_rate": 1.534352130437805e-07, "loss": 0.047, "step": 986575 }, { "epoch": 9.7, "grad_norm": 9.967033386230469, "learning_rate": 1.5331109058953202e-07, "loss": 0.0091, "step": 986600 }, { "epoch": 9.7, "grad_norm": 0.07769302278757095, "learning_rate": 1.5318696813528355e-07, "loss": 0.0361, "step": 986625 }, { "epoch": 9.7, "grad_norm": 22.39217185974121, "learning_rate": 1.530628456810351e-07, "loss": 0.014, "step": 986650 }, { "epoch": 9.7, "grad_norm": 0.01869777776300907, "learning_rate": 1.5293872322678663e-07, "loss": 0.0225, "step": 986675 }, { "epoch": 9.7, "grad_norm": 5.681530952453613, "learning_rate": 1.5281460077253819e-07, "loss": 0.0089, "step": 986700 }, { "epoch": 9.7, "grad_norm": 5.911351203918457, "learning_rate": 1.5269047831828972e-07, "loss": 0.0299, "step": 986725 }, { "epoch": 9.7, "grad_norm": 2.741584539413452, "learning_rate": 1.5256635586404124e-07, "loss": 0.0129, "step": 986750 }, { "epoch": 9.7, "grad_norm": 0.02061592787504196, "learning_rate": 1.5244223340979277e-07, "loss": 0.0459, "step": 986775 }, { "epoch": 9.7, "grad_norm": 0.05949454754590988, "learning_rate": 1.523181109555443e-07, "loss": 0.0248, "step": 986800 }, { "epoch": 9.7, "grad_norm": 0.14388328790664673, "learning_rate": 1.5219398850129586e-07, "loss": 0.0418, "step": 986825 }, { "epoch": 9.7, "grad_norm": 5.053715705871582, "learning_rate": 1.520698660470474e-07, "loss": 0.016, "step": 986850 }, { "epoch": 9.7, "grad_norm": 0.033976517617702484, "learning_rate": 1.5194574359279892e-07, "loss": 0.0418, "step": 986875 }, { "epoch": 9.7, "grad_norm": 2.7021307945251465, "learning_rate": 1.5182162113855045e-07, "loss": 0.017, "step": 986900 }, { "epoch": 9.7, "grad_norm": 1.6310226917266846, "learning_rate": 1.51697498684302e-07, "loss": 0.0552, "step": 986925 }, { "epoch": 9.7, "grad_norm": 5.302313804626465, "learning_rate": 1.5157337623005353e-07, "loss": 0.0187, "step": 986950 }, { "epoch": 9.7, "grad_norm": 4.921968936920166, "learning_rate": 1.5144925377580506e-07, "loss": 0.03, "step": 986975 }, { "epoch": 9.7, "grad_norm": 0.4971480667591095, "learning_rate": 1.5132513132155662e-07, "loss": 0.0131, "step": 987000 }, { "epoch": 9.7, "grad_norm": 2.3868565559387207, "learning_rate": 1.5120100886730814e-07, "loss": 0.0264, "step": 987025 }, { "epoch": 9.7, "grad_norm": 0.502719521522522, "learning_rate": 1.5107688641305967e-07, "loss": 0.0115, "step": 987050 }, { "epoch": 9.71, "grad_norm": 0.723351776599884, "learning_rate": 1.5095276395881123e-07, "loss": 0.0444, "step": 987075 }, { "epoch": 9.71, "grad_norm": 0.21671055257320404, "learning_rate": 1.5082864150456276e-07, "loss": 0.0095, "step": 987100 }, { "epoch": 9.71, "grad_norm": 0.010762413032352924, "learning_rate": 1.507045190503143e-07, "loss": 0.0419, "step": 987125 }, { "epoch": 9.71, "grad_norm": 3.2217183113098145, "learning_rate": 1.5058039659606582e-07, "loss": 0.0187, "step": 987150 }, { "epoch": 9.71, "grad_norm": 0.031889159232378006, "learning_rate": 1.5046123903998732e-07, "loss": 0.0357, "step": 987175 }, { "epoch": 9.71, "grad_norm": 0.292104572057724, "learning_rate": 1.5033711658573885e-07, "loss": 0.0144, "step": 987200 }, { "epoch": 9.71, "grad_norm": 0.001260278862901032, "learning_rate": 1.5021299413149038e-07, "loss": 0.047, "step": 987225 }, { "epoch": 9.71, "grad_norm": 0.16886447370052338, "learning_rate": 1.500888716772419e-07, "loss": 0.022, "step": 987250 }, { "epoch": 9.71, "grad_norm": 3.1816799640655518, "learning_rate": 1.4996474922299346e-07, "loss": 0.0339, "step": 987275 }, { "epoch": 9.71, "grad_norm": 2.333142042160034, "learning_rate": 1.49840626768745e-07, "loss": 0.0177, "step": 987300 }, { "epoch": 9.71, "grad_norm": 0.09731125831604004, "learning_rate": 1.4971650431449652e-07, "loss": 0.037, "step": 987325 }, { "epoch": 9.71, "grad_norm": 11.817414283752441, "learning_rate": 1.4959238186024805e-07, "loss": 0.0223, "step": 987350 }, { "epoch": 9.71, "grad_norm": 0.04154089838266373, "learning_rate": 1.4946825940599958e-07, "loss": 0.0348, "step": 987375 }, { "epoch": 9.71, "grad_norm": 0.07460134476423264, "learning_rate": 1.4934413695175113e-07, "loss": 0.0304, "step": 987400 }, { "epoch": 9.71, "grad_norm": 1.4547091722488403, "learning_rate": 1.4922001449750266e-07, "loss": 0.0461, "step": 987425 }, { "epoch": 9.71, "grad_norm": 3.718949794769287, "learning_rate": 1.490958920432542e-07, "loss": 0.0068, "step": 987450 }, { "epoch": 9.71, "grad_norm": 1.1240015029907227, "learning_rate": 1.4897176958900575e-07, "loss": 0.053, "step": 987475 }, { "epoch": 9.71, "grad_norm": 0.23089182376861572, "learning_rate": 1.4884764713475728e-07, "loss": 0.011, "step": 987500 }, { "epoch": 9.71, "grad_norm": 3.1349246501922607, "learning_rate": 1.487235246805088e-07, "loss": 0.0339, "step": 987525 }, { "epoch": 9.71, "grad_norm": 13.379895210266113, "learning_rate": 1.4859940222626036e-07, "loss": 0.0128, "step": 987550 }, { "epoch": 9.71, "grad_norm": 2.171708106994629, "learning_rate": 1.484752797720119e-07, "loss": 0.0475, "step": 987575 }, { "epoch": 9.71, "grad_norm": 14.046954154968262, "learning_rate": 1.4835115731776342e-07, "loss": 0.0052, "step": 987600 }, { "epoch": 9.71, "grad_norm": 0.008247696794569492, "learning_rate": 1.4822703486351498e-07, "loss": 0.0299, "step": 987625 }, { "epoch": 9.71, "grad_norm": 0.10397154092788696, "learning_rate": 1.481029124092665e-07, "loss": 0.0136, "step": 987650 }, { "epoch": 9.71, "grad_norm": 1.023922324180603, "learning_rate": 1.4797878995501803e-07, "loss": 0.0327, "step": 987675 }, { "epoch": 9.71, "grad_norm": 0.18317849934101105, "learning_rate": 1.4785466750076956e-07, "loss": 0.0094, "step": 987700 }, { "epoch": 9.71, "grad_norm": 0.006909356918185949, "learning_rate": 1.477305450465211e-07, "loss": 0.0285, "step": 987725 }, { "epoch": 9.71, "grad_norm": 0.047589261084795, "learning_rate": 1.4760642259227265e-07, "loss": 0.0099, "step": 987750 }, { "epoch": 9.71, "grad_norm": 0.01985175907611847, "learning_rate": 1.4748230013802418e-07, "loss": 0.0223, "step": 987775 }, { "epoch": 9.71, "grad_norm": 0.13821274042129517, "learning_rate": 1.473581776837757e-07, "loss": 0.0197, "step": 987800 }, { "epoch": 9.71, "grad_norm": 0.0021651419810950756, "learning_rate": 1.4723405522952726e-07, "loss": 0.0314, "step": 987825 }, { "epoch": 9.71, "grad_norm": 7.737050533294678, "learning_rate": 1.471099327752788e-07, "loss": 0.0175, "step": 987850 }, { "epoch": 9.71, "grad_norm": 0.8628458380699158, "learning_rate": 1.4698581032103032e-07, "loss": 0.0368, "step": 987875 }, { "epoch": 9.71, "grad_norm": 7.8378376960754395, "learning_rate": 1.4686168786678188e-07, "loss": 0.0101, "step": 987900 }, { "epoch": 9.71, "grad_norm": 0.0012751297326758504, "learning_rate": 1.467375654125334e-07, "loss": 0.0319, "step": 987925 }, { "epoch": 9.71, "grad_norm": 3.2868571281433105, "learning_rate": 1.4661344295828493e-07, "loss": 0.0128, "step": 987950 }, { "epoch": 9.71, "grad_norm": 3.1510732173919678, "learning_rate": 1.464893205040365e-07, "loss": 0.0458, "step": 987975 }, { "epoch": 9.71, "grad_norm": 1.8943078517913818, "learning_rate": 1.4636519804978802e-07, "loss": 0.0121, "step": 988000 }, { "epoch": 9.71, "grad_norm": 0.06281638890504837, "learning_rate": 1.4624107559553955e-07, "loss": 0.0369, "step": 988025 }, { "epoch": 9.71, "grad_norm": 0.8755411505699158, "learning_rate": 1.4611695314129108e-07, "loss": 0.0186, "step": 988050 }, { "epoch": 9.71, "grad_norm": 0.11288169771432877, "learning_rate": 1.459928306870426e-07, "loss": 0.0533, "step": 988075 }, { "epoch": 9.72, "grad_norm": 1.6858632564544678, "learning_rate": 1.4586870823279416e-07, "loss": 0.0084, "step": 988100 }, { "epoch": 9.72, "grad_norm": 1.3263517618179321, "learning_rate": 1.457445857785457e-07, "loss": 0.0352, "step": 988125 }, { "epoch": 9.72, "grad_norm": 0.18864256143569946, "learning_rate": 1.4562046332429722e-07, "loss": 0.0222, "step": 988150 }, { "epoch": 9.72, "grad_norm": 6.317187786102295, "learning_rate": 1.4549634087004878e-07, "loss": 0.0338, "step": 988175 }, { "epoch": 9.72, "grad_norm": 7.6142802238464355, "learning_rate": 1.453722184158003e-07, "loss": 0.0155, "step": 988200 }, { "epoch": 9.72, "grad_norm": 1.7793089151382446, "learning_rate": 1.4524809596155183e-07, "loss": 0.0501, "step": 988225 }, { "epoch": 9.72, "grad_norm": 2.320632219314575, "learning_rate": 1.451239735073034e-07, "loss": 0.024, "step": 988250 }, { "epoch": 9.72, "grad_norm": 0.16512422263622284, "learning_rate": 1.4499985105305492e-07, "loss": 0.0439, "step": 988275 }, { "epoch": 9.72, "grad_norm": 0.34447231888771057, "learning_rate": 1.4487572859880645e-07, "loss": 0.0123, "step": 988300 }, { "epoch": 9.72, "grad_norm": 0.13363265991210938, "learning_rate": 1.44751606144558e-07, "loss": 0.0368, "step": 988325 }, { "epoch": 9.72, "grad_norm": 10.903985023498535, "learning_rate": 1.4462748369030953e-07, "loss": 0.0117, "step": 988350 }, { "epoch": 9.72, "grad_norm": 0.08558197319507599, "learning_rate": 1.4450336123606106e-07, "loss": 0.0334, "step": 988375 }, { "epoch": 9.72, "grad_norm": 0.703376054763794, "learning_rate": 1.443792387818126e-07, "loss": 0.0096, "step": 988400 }, { "epoch": 9.72, "grad_norm": 0.09245087951421738, "learning_rate": 1.4425511632756412e-07, "loss": 0.0489, "step": 988425 }, { "epoch": 9.72, "grad_norm": 9.301435470581055, "learning_rate": 1.4413099387331567e-07, "loss": 0.0117, "step": 988450 }, { "epoch": 9.72, "grad_norm": 0.02177007682621479, "learning_rate": 1.440068714190672e-07, "loss": 0.0231, "step": 988475 }, { "epoch": 9.72, "grad_norm": 0.29731836915016174, "learning_rate": 1.4388274896481873e-07, "loss": 0.0087, "step": 988500 }, { "epoch": 9.72, "grad_norm": 0.34809309244155884, "learning_rate": 1.437586265105703e-07, "loss": 0.0399, "step": 988525 }, { "epoch": 9.72, "grad_norm": 0.17268703877925873, "learning_rate": 1.4363450405632182e-07, "loss": 0.0098, "step": 988550 }, { "epoch": 9.72, "grad_norm": 0.0021150093525648117, "learning_rate": 1.4351038160207335e-07, "loss": 0.0338, "step": 988575 }, { "epoch": 9.72, "grad_norm": 12.971600532531738, "learning_rate": 1.433862591478249e-07, "loss": 0.0142, "step": 988600 }, { "epoch": 9.72, "grad_norm": 0.013334640301764011, "learning_rate": 1.4326213669357643e-07, "loss": 0.0576, "step": 988625 }, { "epoch": 9.72, "grad_norm": 3.000242233276367, "learning_rate": 1.4313801423932796e-07, "loss": 0.0114, "step": 988650 }, { "epoch": 9.72, "grad_norm": 0.6760576963424683, "learning_rate": 1.4301389178507952e-07, "loss": 0.04, "step": 988675 }, { "epoch": 9.72, "grad_norm": 1.8045226335525513, "learning_rate": 1.4288976933083104e-07, "loss": 0.0108, "step": 988700 }, { "epoch": 9.72, "grad_norm": 0.0647573322057724, "learning_rate": 1.4276564687658257e-07, "loss": 0.0362, "step": 988725 }, { "epoch": 9.72, "grad_norm": 7.938751220703125, "learning_rate": 1.426415244223341e-07, "loss": 0.0205, "step": 988750 }, { "epoch": 9.72, "grad_norm": 7.589446067810059, "learning_rate": 1.4251740196808563e-07, "loss": 0.0472, "step": 988775 }, { "epoch": 9.72, "grad_norm": 0.48266923427581787, "learning_rate": 1.423932795138372e-07, "loss": 0.0175, "step": 988800 }, { "epoch": 9.72, "grad_norm": 0.18213506042957306, "learning_rate": 1.4226915705958872e-07, "loss": 0.0256, "step": 988825 }, { "epoch": 9.72, "grad_norm": 1.4700227975845337, "learning_rate": 1.4214503460534025e-07, "loss": 0.0172, "step": 988850 }, { "epoch": 9.72, "grad_norm": 0.02019120752811432, "learning_rate": 1.420209121510918e-07, "loss": 0.0382, "step": 988875 }, { "epoch": 9.72, "grad_norm": 14.735137939453125, "learning_rate": 1.4189678969684333e-07, "loss": 0.0086, "step": 988900 }, { "epoch": 9.72, "grad_norm": 0.7985033988952637, "learning_rate": 1.4177266724259486e-07, "loss": 0.036, "step": 988925 }, { "epoch": 9.72, "grad_norm": 0.3327030539512634, "learning_rate": 1.4164854478834642e-07, "loss": 0.0176, "step": 988950 }, { "epoch": 9.72, "grad_norm": 0.5026658177375793, "learning_rate": 1.4152442233409794e-07, "loss": 0.0248, "step": 988975 }, { "epoch": 9.72, "grad_norm": 0.1232822835445404, "learning_rate": 1.4140029987984947e-07, "loss": 0.0107, "step": 989000 }, { "epoch": 9.72, "grad_norm": 0.027144208550453186, "learning_rate": 1.4127617742560103e-07, "loss": 0.0423, "step": 989025 }, { "epoch": 9.72, "grad_norm": 10.496121406555176, "learning_rate": 1.4115205497135256e-07, "loss": 0.0136, "step": 989050 }, { "epoch": 9.72, "grad_norm": 3.3998162746429443, "learning_rate": 1.410279325171041e-07, "loss": 0.0342, "step": 989075 }, { "epoch": 9.72, "grad_norm": 1.0157908201217651, "learning_rate": 1.4090381006285562e-07, "loss": 0.016, "step": 989100 }, { "epoch": 9.73, "grad_norm": 0.06919409334659576, "learning_rate": 1.4077968760860715e-07, "loss": 0.0286, "step": 989125 }, { "epoch": 9.73, "grad_norm": 22.197683334350586, "learning_rate": 1.406555651543587e-07, "loss": 0.0246, "step": 989150 }, { "epoch": 9.73, "grad_norm": 0.06565980613231659, "learning_rate": 1.4053144270011023e-07, "loss": 0.0391, "step": 989175 }, { "epoch": 9.73, "grad_norm": 14.514863014221191, "learning_rate": 1.4040732024586176e-07, "loss": 0.0117, "step": 989200 }, { "epoch": 9.73, "grad_norm": 2.897085428237915, "learning_rate": 1.4028319779161331e-07, "loss": 0.0423, "step": 989225 }, { "epoch": 9.73, "grad_norm": 11.483810424804688, "learning_rate": 1.4015907533736484e-07, "loss": 0.0155, "step": 989250 }, { "epoch": 9.73, "grad_norm": 5.845162391662598, "learning_rate": 1.4003495288311637e-07, "loss": 0.0515, "step": 989275 }, { "epoch": 9.73, "grad_norm": 3.979553461074829, "learning_rate": 1.3991083042886793e-07, "loss": 0.0095, "step": 989300 }, { "epoch": 9.73, "grad_norm": 0.35110360383987427, "learning_rate": 1.3978670797461946e-07, "loss": 0.0239, "step": 989325 }, { "epoch": 9.73, "grad_norm": 0.5890882015228271, "learning_rate": 1.3966258552037099e-07, "loss": 0.009, "step": 989350 }, { "epoch": 9.73, "grad_norm": 0.7708621621131897, "learning_rate": 1.3953846306612254e-07, "loss": 0.0364, "step": 989375 }, { "epoch": 9.73, "grad_norm": 7.220273017883301, "learning_rate": 1.3941434061187407e-07, "loss": 0.0109, "step": 989400 }, { "epoch": 9.73, "grad_norm": 0.1730562299489975, "learning_rate": 1.392902181576256e-07, "loss": 0.0451, "step": 989425 }, { "epoch": 9.73, "grad_norm": 14.008079528808594, "learning_rate": 1.3916609570337713e-07, "loss": 0.0174, "step": 989450 }, { "epoch": 9.73, "grad_norm": 0.025302719324827194, "learning_rate": 1.3904197324912866e-07, "loss": 0.0251, "step": 989475 }, { "epoch": 9.73, "grad_norm": 0.20590271055698395, "learning_rate": 1.389178507948802e-07, "loss": 0.0101, "step": 989500 }, { "epoch": 9.73, "grad_norm": 0.05403028428554535, "learning_rate": 1.3879372834063174e-07, "loss": 0.0352, "step": 989525 }, { "epoch": 9.73, "grad_norm": 2.699817419052124, "learning_rate": 1.3866960588638327e-07, "loss": 0.0066, "step": 989550 }, { "epoch": 9.73, "grad_norm": 1.458260416984558, "learning_rate": 1.3854548343213483e-07, "loss": 0.0394, "step": 989575 }, { "epoch": 9.73, "grad_norm": 5.9055280685424805, "learning_rate": 1.3842136097788636e-07, "loss": 0.0307, "step": 989600 }, { "epoch": 9.73, "grad_norm": 6.665281772613525, "learning_rate": 1.3829723852363789e-07, "loss": 0.0229, "step": 989625 }, { "epoch": 9.73, "grad_norm": 1.4037789106369019, "learning_rate": 1.3817311606938944e-07, "loss": 0.0158, "step": 989650 }, { "epoch": 9.73, "grad_norm": 2.641554117202759, "learning_rate": 1.3804899361514097e-07, "loss": 0.038, "step": 989675 }, { "epoch": 9.73, "grad_norm": 4.538444995880127, "learning_rate": 1.379248711608925e-07, "loss": 0.0152, "step": 989700 }, { "epoch": 9.73, "grad_norm": 3.2414321899414062, "learning_rate": 1.3780074870664406e-07, "loss": 0.0438, "step": 989725 }, { "epoch": 9.73, "grad_norm": 2.6187658309936523, "learning_rate": 1.3767662625239558e-07, "loss": 0.0134, "step": 989750 }, { "epoch": 9.73, "grad_norm": 0.041796714067459106, "learning_rate": 1.3755250379814711e-07, "loss": 0.0328, "step": 989775 }, { "epoch": 9.73, "grad_norm": 6.206131458282471, "learning_rate": 1.3742838134389864e-07, "loss": 0.0296, "step": 989800 }, { "epoch": 9.73, "grad_norm": 0.029094211757183075, "learning_rate": 1.3730425888965017e-07, "loss": 0.0391, "step": 989825 }, { "epoch": 9.73, "grad_norm": 8.252053260803223, "learning_rate": 1.371801364354017e-07, "loss": 0.0251, "step": 989850 }, { "epoch": 9.73, "grad_norm": 0.3865422308444977, "learning_rate": 1.370609788793232e-07, "loss": 0.0573, "step": 989875 }, { "epoch": 9.73, "grad_norm": 16.387340545654297, "learning_rate": 1.3693685642507473e-07, "loss": 0.0261, "step": 989900 }, { "epoch": 9.73, "grad_norm": 2.7899582386016846, "learning_rate": 1.3681273397082626e-07, "loss": 0.0265, "step": 989925 }, { "epoch": 9.73, "grad_norm": 1.235345721244812, "learning_rate": 1.366886115165778e-07, "loss": 0.0162, "step": 989950 }, { "epoch": 9.73, "grad_norm": 1.6864182949066162, "learning_rate": 1.3656448906232932e-07, "loss": 0.0432, "step": 989975 }, { "epoch": 9.73, "grad_norm": 3.3085861206054688, "learning_rate": 1.3644036660808088e-07, "loss": 0.024, "step": 990000 }, { "epoch": 9.73, "grad_norm": 0.049171485006809235, "learning_rate": 1.363162441538324e-07, "loss": 0.0339, "step": 990025 }, { "epoch": 9.73, "grad_norm": 0.7627468109130859, "learning_rate": 1.3619212169958396e-07, "loss": 0.0135, "step": 990050 }, { "epoch": 9.73, "grad_norm": 1.8313560485839844, "learning_rate": 1.360679992453355e-07, "loss": 0.0382, "step": 990075 }, { "epoch": 9.73, "grad_norm": 4.679748058319092, "learning_rate": 1.3594387679108702e-07, "loss": 0.0087, "step": 990100 }, { "epoch": 9.74, "grad_norm": 1.6961997747421265, "learning_rate": 1.3581975433683858e-07, "loss": 0.0415, "step": 990125 }, { "epoch": 9.74, "grad_norm": 10.012556076049805, "learning_rate": 1.356956318825901e-07, "loss": 0.0108, "step": 990150 }, { "epoch": 9.74, "grad_norm": 0.003522462211549282, "learning_rate": 1.3557150942834163e-07, "loss": 0.0264, "step": 990175 }, { "epoch": 9.74, "grad_norm": 6.544907569885254, "learning_rate": 1.354473869740932e-07, "loss": 0.0244, "step": 990200 }, { "epoch": 9.74, "grad_norm": 4.047390460968018, "learning_rate": 1.3532326451984472e-07, "loss": 0.0283, "step": 990225 }, { "epoch": 9.74, "grad_norm": 2.867823839187622, "learning_rate": 1.3519914206559625e-07, "loss": 0.0074, "step": 990250 }, { "epoch": 9.74, "grad_norm": 0.07040661573410034, "learning_rate": 1.3507501961134778e-07, "loss": 0.0415, "step": 990275 }, { "epoch": 9.74, "grad_norm": 11.297651290893555, "learning_rate": 1.349508971570993e-07, "loss": 0.0272, "step": 990300 }, { "epoch": 9.74, "grad_norm": 1.28207528591156, "learning_rate": 1.3482677470285083e-07, "loss": 0.0294, "step": 990325 }, { "epoch": 9.74, "grad_norm": 10.48438549041748, "learning_rate": 1.347026522486024e-07, "loss": 0.0083, "step": 990350 }, { "epoch": 9.74, "grad_norm": 0.18956220149993896, "learning_rate": 1.3457852979435392e-07, "loss": 0.0312, "step": 990375 }, { "epoch": 9.74, "grad_norm": 7.882598876953125, "learning_rate": 1.3445440734010547e-07, "loss": 0.0199, "step": 990400 }, { "epoch": 9.74, "grad_norm": 0.05820545181632042, "learning_rate": 1.34330284885857e-07, "loss": 0.0529, "step": 990425 }, { "epoch": 9.74, "grad_norm": 9.38467025756836, "learning_rate": 1.3420616243160853e-07, "loss": 0.0228, "step": 990450 }, { "epoch": 9.74, "grad_norm": 0.12632939219474792, "learning_rate": 1.340820399773601e-07, "loss": 0.0484, "step": 990475 }, { "epoch": 9.74, "grad_norm": 8.827631950378418, "learning_rate": 1.3395791752311162e-07, "loss": 0.0144, "step": 990500 }, { "epoch": 9.74, "grad_norm": 1.2372018098831177, "learning_rate": 1.3383379506886315e-07, "loss": 0.0473, "step": 990525 }, { "epoch": 9.74, "grad_norm": 2.5961508750915527, "learning_rate": 1.337096726146147e-07, "loss": 0.0205, "step": 990550 }, { "epoch": 9.74, "grad_norm": 0.024197395890951157, "learning_rate": 1.3358555016036623e-07, "loss": 0.0234, "step": 990575 }, { "epoch": 9.74, "grad_norm": 2.6765434741973877, "learning_rate": 1.3346142770611776e-07, "loss": 0.0146, "step": 990600 }, { "epoch": 9.74, "grad_norm": 1.431044101715088, "learning_rate": 1.333373052518693e-07, "loss": 0.0231, "step": 990625 }, { "epoch": 9.74, "grad_norm": 2.5946404933929443, "learning_rate": 1.3321318279762082e-07, "loss": 0.0167, "step": 990650 }, { "epoch": 9.74, "grad_norm": 1.1538805961608887, "learning_rate": 1.3308906034337235e-07, "loss": 0.0368, "step": 990675 }, { "epoch": 9.74, "grad_norm": 0.510380744934082, "learning_rate": 1.329649378891239e-07, "loss": 0.0246, "step": 990700 }, { "epoch": 9.74, "grad_norm": 0.05149349942803383, "learning_rate": 1.3284081543487543e-07, "loss": 0.0442, "step": 990725 }, { "epoch": 9.74, "grad_norm": 0.6012045741081238, "learning_rate": 1.3271669298062696e-07, "loss": 0.0095, "step": 990750 }, { "epoch": 9.74, "grad_norm": 0.021055860444903374, "learning_rate": 1.3259257052637852e-07, "loss": 0.0263, "step": 990775 }, { "epoch": 9.74, "grad_norm": 3.1343376636505127, "learning_rate": 1.3246844807213005e-07, "loss": 0.0198, "step": 990800 }, { "epoch": 9.74, "grad_norm": 5.740607738494873, "learning_rate": 1.323443256178816e-07, "loss": 0.0334, "step": 990825 }, { "epoch": 9.74, "grad_norm": 1.7981880903244019, "learning_rate": 1.3222020316363313e-07, "loss": 0.008, "step": 990850 }, { "epoch": 9.74, "grad_norm": 0.011670567095279694, "learning_rate": 1.3209608070938466e-07, "loss": 0.0329, "step": 990875 }, { "epoch": 9.74, "grad_norm": 0.32700473070144653, "learning_rate": 1.3197195825513622e-07, "loss": 0.0172, "step": 990900 }, { "epoch": 9.74, "grad_norm": 0.20015479624271393, "learning_rate": 1.3184783580088774e-07, "loss": 0.033, "step": 990925 }, { "epoch": 9.74, "grad_norm": 1.6465104818344116, "learning_rate": 1.3172371334663927e-07, "loss": 0.0103, "step": 990950 }, { "epoch": 9.74, "grad_norm": 0.0539313443005085, "learning_rate": 1.315995908923908e-07, "loss": 0.0282, "step": 990975 }, { "epoch": 9.74, "grad_norm": 0.7013733983039856, "learning_rate": 1.3147546843814233e-07, "loss": 0.0151, "step": 991000 }, { "epoch": 9.74, "grad_norm": 0.004652473609894514, "learning_rate": 1.313513459838939e-07, "loss": 0.0539, "step": 991025 }, { "epoch": 9.74, "grad_norm": 1.7030972242355347, "learning_rate": 1.3122722352964542e-07, "loss": 0.0072, "step": 991050 }, { "epoch": 9.74, "grad_norm": 2.4946517944335938, "learning_rate": 1.3110310107539695e-07, "loss": 0.033, "step": 991075 }, { "epoch": 9.74, "grad_norm": 10.993897438049316, "learning_rate": 1.3097897862114847e-07, "loss": 0.0215, "step": 991100 }, { "epoch": 9.74, "grad_norm": 4.471258640289307, "learning_rate": 1.3085485616690003e-07, "loss": 0.0387, "step": 991125 }, { "epoch": 9.75, "grad_norm": 14.250107765197754, "learning_rate": 1.3073073371265156e-07, "loss": 0.0185, "step": 991150 }, { "epoch": 9.75, "grad_norm": 0.2122640460729599, "learning_rate": 1.3060661125840311e-07, "loss": 0.0332, "step": 991175 }, { "epoch": 9.75, "grad_norm": 4.184563159942627, "learning_rate": 1.3048248880415464e-07, "loss": 0.0142, "step": 991200 }, { "epoch": 9.75, "grad_norm": 1.349243402481079, "learning_rate": 1.3035836634990617e-07, "loss": 0.0448, "step": 991225 }, { "epoch": 9.75, "grad_norm": 2.731895685195923, "learning_rate": 1.3023424389565773e-07, "loss": 0.0192, "step": 991250 }, { "epoch": 9.75, "grad_norm": 0.019464826211333275, "learning_rate": 1.3011012144140926e-07, "loss": 0.0297, "step": 991275 }, { "epoch": 9.75, "grad_norm": 0.8760172128677368, "learning_rate": 1.299859989871608e-07, "loss": 0.0147, "step": 991300 }, { "epoch": 9.75, "grad_norm": 3.392125129699707, "learning_rate": 1.2986187653291232e-07, "loss": 0.0358, "step": 991325 }, { "epoch": 9.75, "grad_norm": 10.342581748962402, "learning_rate": 1.2973775407866385e-07, "loss": 0.0144, "step": 991350 }, { "epoch": 9.75, "grad_norm": 0.017290132120251656, "learning_rate": 1.296136316244154e-07, "loss": 0.0345, "step": 991375 }, { "epoch": 9.75, "grad_norm": 26.40736198425293, "learning_rate": 1.2948950917016693e-07, "loss": 0.0115, "step": 991400 }, { "epoch": 9.75, "grad_norm": 0.061995990574359894, "learning_rate": 1.2936538671591846e-07, "loss": 0.0319, "step": 991425 }, { "epoch": 9.75, "grad_norm": 9.16815185546875, "learning_rate": 1.2924126426167e-07, "loss": 0.01, "step": 991450 }, { "epoch": 9.75, "grad_norm": 2.9273526668548584, "learning_rate": 1.2911714180742154e-07, "loss": 0.0426, "step": 991475 }, { "epoch": 9.75, "grad_norm": 0.3008439540863037, "learning_rate": 1.2899301935317307e-07, "loss": 0.0208, "step": 991500 }, { "epoch": 9.75, "grad_norm": 0.6582844853401184, "learning_rate": 1.288688968989246e-07, "loss": 0.0306, "step": 991525 }, { "epoch": 9.75, "grad_norm": 8.732194900512695, "learning_rate": 1.2874477444467616e-07, "loss": 0.0205, "step": 991550 }, { "epoch": 9.75, "grad_norm": 1.5664182901382446, "learning_rate": 1.2862065199042769e-07, "loss": 0.0456, "step": 991575 }, { "epoch": 9.75, "grad_norm": 8.775825500488281, "learning_rate": 1.2849652953617924e-07, "loss": 0.0099, "step": 991600 }, { "epoch": 9.75, "grad_norm": 0.03581228852272034, "learning_rate": 1.2837240708193077e-07, "loss": 0.0268, "step": 991625 }, { "epoch": 9.75, "grad_norm": 13.133227348327637, "learning_rate": 1.282482846276823e-07, "loss": 0.0125, "step": 991650 }, { "epoch": 9.75, "grad_norm": 2.8846006393432617, "learning_rate": 1.2812416217343383e-07, "loss": 0.0514, "step": 991675 }, { "epoch": 9.75, "grad_norm": 21.311967849731445, "learning_rate": 1.2800003971918536e-07, "loss": 0.0165, "step": 991700 }, { "epoch": 9.75, "grad_norm": 2.412440538406372, "learning_rate": 1.2787591726493691e-07, "loss": 0.0317, "step": 991725 }, { "epoch": 9.75, "grad_norm": 5.130325794219971, "learning_rate": 1.2775179481068844e-07, "loss": 0.0055, "step": 991750 }, { "epoch": 9.75, "grad_norm": 0.2613150179386139, "learning_rate": 1.2762767235643997e-07, "loss": 0.0472, "step": 991775 }, { "epoch": 9.75, "grad_norm": 13.316168785095215, "learning_rate": 1.275035499021915e-07, "loss": 0.0097, "step": 991800 }, { "epoch": 9.75, "grad_norm": 0.24770323932170868, "learning_rate": 1.2737942744794306e-07, "loss": 0.0472, "step": 991825 }, { "epoch": 9.75, "grad_norm": 0.5426949858665466, "learning_rate": 1.2725530499369459e-07, "loss": 0.0127, "step": 991850 }, { "epoch": 9.75, "grad_norm": 0.01319113839417696, "learning_rate": 1.2713118253944611e-07, "loss": 0.0483, "step": 991875 }, { "epoch": 9.75, "grad_norm": 5.343593597412109, "learning_rate": 1.2700706008519767e-07, "loss": 0.0181, "step": 991900 }, { "epoch": 9.75, "grad_norm": 0.08164184540510178, "learning_rate": 1.268829376309492e-07, "loss": 0.0313, "step": 991925 }, { "epoch": 9.75, "grad_norm": 6.9335103034973145, "learning_rate": 1.2675881517670076e-07, "loss": 0.0188, "step": 991950 }, { "epoch": 9.75, "grad_norm": 0.032155923545360565, "learning_rate": 1.266396576206222e-07, "loss": 0.0444, "step": 991975 }, { "epoch": 9.75, "grad_norm": 2.1179003715515137, "learning_rate": 1.2651553516637374e-07, "loss": 0.0219, "step": 992000 }, { "epoch": 9.75, "grad_norm": 0.017962859943509102, "learning_rate": 1.263914127121253e-07, "loss": 0.0273, "step": 992025 }, { "epoch": 9.75, "grad_norm": 2.7898056507110596, "learning_rate": 1.2626729025787682e-07, "loss": 0.0102, "step": 992050 }, { "epoch": 9.75, "grad_norm": 0.002011946402490139, "learning_rate": 1.2614316780362838e-07, "loss": 0.0223, "step": 992075 }, { "epoch": 9.75, "grad_norm": 4.315571308135986, "learning_rate": 1.260190453493799e-07, "loss": 0.012, "step": 992100 }, { "epoch": 9.75, "grad_norm": 0.34930336475372314, "learning_rate": 1.2589492289513143e-07, "loss": 0.034, "step": 992125 }, { "epoch": 9.75, "grad_norm": 17.16689682006836, "learning_rate": 1.2577080044088296e-07, "loss": 0.0136, "step": 992150 }, { "epoch": 9.76, "grad_norm": 1.7611230611801147, "learning_rate": 1.256466779866345e-07, "loss": 0.0327, "step": 992175 }, { "epoch": 9.76, "grad_norm": 0.8297809362411499, "learning_rate": 1.2552255553238605e-07, "loss": 0.0047, "step": 992200 }, { "epoch": 9.76, "grad_norm": 0.09133051335811615, "learning_rate": 1.2539843307813758e-07, "loss": 0.0319, "step": 992225 }, { "epoch": 9.76, "grad_norm": 4.9891157150268555, "learning_rate": 1.252743106238891e-07, "loss": 0.0172, "step": 992250 }, { "epoch": 9.76, "grad_norm": 0.0898834690451622, "learning_rate": 1.2515018816964063e-07, "loss": 0.0323, "step": 992275 }, { "epoch": 9.76, "grad_norm": 0.8574533462524414, "learning_rate": 1.250260657153922e-07, "loss": 0.0211, "step": 992300 }, { "epoch": 9.76, "grad_norm": 0.07572042942047119, "learning_rate": 1.2490194326114372e-07, "loss": 0.0301, "step": 992325 }, { "epoch": 9.76, "grad_norm": 5.871239185333252, "learning_rate": 1.2477782080689525e-07, "loss": 0.0145, "step": 992350 }, { "epoch": 9.76, "grad_norm": 0.2646639347076416, "learning_rate": 1.246536983526468e-07, "loss": 0.0482, "step": 992375 }, { "epoch": 9.76, "grad_norm": 4.897892951965332, "learning_rate": 1.2452957589839833e-07, "loss": 0.0143, "step": 992400 }, { "epoch": 9.76, "grad_norm": 0.05913611501455307, "learning_rate": 1.244054534441499e-07, "loss": 0.039, "step": 992425 }, { "epoch": 9.76, "grad_norm": 9.555299758911133, "learning_rate": 1.2428133098990142e-07, "loss": 0.012, "step": 992450 }, { "epoch": 9.76, "grad_norm": 0.014228696934878826, "learning_rate": 1.2415720853565295e-07, "loss": 0.0395, "step": 992475 }, { "epoch": 9.76, "grad_norm": 6.5440449714660645, "learning_rate": 1.2403308608140448e-07, "loss": 0.0171, "step": 992500 }, { "epoch": 9.76, "grad_norm": 0.09713730961084366, "learning_rate": 1.2390896362715603e-07, "loss": 0.046, "step": 992525 }, { "epoch": 9.76, "grad_norm": 0.19087320566177368, "learning_rate": 1.2378484117290756e-07, "loss": 0.0173, "step": 992550 }, { "epoch": 9.76, "grad_norm": 0.011539004743099213, "learning_rate": 1.236607187186591e-07, "loss": 0.0433, "step": 992575 }, { "epoch": 9.76, "grad_norm": 10.991167068481445, "learning_rate": 1.2353659626441062e-07, "loss": 0.0257, "step": 992600 }, { "epoch": 9.76, "grad_norm": 0.09956273436546326, "learning_rate": 1.2341247381016215e-07, "loss": 0.0402, "step": 992625 }, { "epoch": 9.76, "grad_norm": 0.5398548245429993, "learning_rate": 1.232883513559137e-07, "loss": 0.0214, "step": 992650 }, { "epoch": 9.76, "grad_norm": 0.2197861522436142, "learning_rate": 1.2316422890166523e-07, "loss": 0.034, "step": 992675 }, { "epoch": 9.76, "grad_norm": 0.05639864504337311, "learning_rate": 1.2304010644741676e-07, "loss": 0.0084, "step": 992700 }, { "epoch": 9.76, "grad_norm": 1.2107609510421753, "learning_rate": 1.2291598399316832e-07, "loss": 0.0521, "step": 992725 }, { "epoch": 9.76, "grad_norm": 8.17867660522461, "learning_rate": 1.2279186153891985e-07, "loss": 0.0236, "step": 992750 }, { "epoch": 9.76, "grad_norm": 0.3804536759853363, "learning_rate": 1.2266773908467138e-07, "loss": 0.0469, "step": 992775 }, { "epoch": 9.76, "grad_norm": 2.240281343460083, "learning_rate": 1.2254361663042293e-07, "loss": 0.0108, "step": 992800 }, { "epoch": 9.76, "grad_norm": 0.25751441717147827, "learning_rate": 1.2241949417617446e-07, "loss": 0.0338, "step": 992825 }, { "epoch": 9.76, "grad_norm": 0.057819005101919174, "learning_rate": 1.22295371721926e-07, "loss": 0.0217, "step": 992850 }, { "epoch": 9.76, "grad_norm": 0.003926195204257965, "learning_rate": 1.2217124926767754e-07, "loss": 0.0254, "step": 992875 }, { "epoch": 9.76, "grad_norm": 0.07602700591087341, "learning_rate": 1.2204712681342907e-07, "loss": 0.008, "step": 992900 }, { "epoch": 9.76, "grad_norm": 3.1946818828582764, "learning_rate": 1.219230043591806e-07, "loss": 0.036, "step": 992925 }, { "epoch": 9.76, "grad_norm": 1.1002528667449951, "learning_rate": 1.2179888190493213e-07, "loss": 0.0068, "step": 992950 }, { "epoch": 9.76, "grad_norm": 0.2962642312049866, "learning_rate": 1.2167475945068366e-07, "loss": 0.0297, "step": 992975 }, { "epoch": 9.76, "grad_norm": 0.324700266122818, "learning_rate": 1.2155063699643522e-07, "loss": 0.0125, "step": 993000 }, { "epoch": 9.76, "grad_norm": 0.03589764982461929, "learning_rate": 1.2142651454218675e-07, "loss": 0.037, "step": 993025 }, { "epoch": 9.76, "grad_norm": 0.3521977961063385, "learning_rate": 1.2130239208793827e-07, "loss": 0.0099, "step": 993050 }, { "epoch": 9.76, "grad_norm": 1.7844313383102417, "learning_rate": 1.2117826963368983e-07, "loss": 0.0496, "step": 993075 }, { "epoch": 9.76, "grad_norm": 9.420079231262207, "learning_rate": 1.2105414717944136e-07, "loss": 0.0159, "step": 993100 }, { "epoch": 9.76, "grad_norm": 0.5365684628486633, "learning_rate": 1.209300247251929e-07, "loss": 0.0456, "step": 993125 }, { "epoch": 9.76, "grad_norm": 0.6455274820327759, "learning_rate": 1.2080590227094444e-07, "loss": 0.0215, "step": 993150 }, { "epoch": 9.77, "grad_norm": 1.7346868515014648, "learning_rate": 1.2068177981669597e-07, "loss": 0.032, "step": 993175 }, { "epoch": 9.77, "grad_norm": 1.804787278175354, "learning_rate": 1.205576573624475e-07, "loss": 0.0142, "step": 993200 }, { "epoch": 9.77, "grad_norm": 0.2153358906507492, "learning_rate": 1.2043353490819906e-07, "loss": 0.0307, "step": 993225 }, { "epoch": 9.77, "grad_norm": 5.579126358032227, "learning_rate": 1.203094124539506e-07, "loss": 0.0112, "step": 993250 }, { "epoch": 9.77, "grad_norm": 3.794156074523926, "learning_rate": 1.2018528999970212e-07, "loss": 0.0522, "step": 993275 }, { "epoch": 9.77, "grad_norm": 2.899919033050537, "learning_rate": 1.2006116754545365e-07, "loss": 0.0216, "step": 993300 }, { "epoch": 9.77, "grad_norm": 0.7975624203681946, "learning_rate": 1.1993704509120517e-07, "loss": 0.0415, "step": 993325 }, { "epoch": 9.77, "grad_norm": 1.342306137084961, "learning_rate": 1.1981292263695673e-07, "loss": 0.0178, "step": 993350 }, { "epoch": 9.77, "grad_norm": 0.015413668006658554, "learning_rate": 1.1968880018270826e-07, "loss": 0.0269, "step": 993375 }, { "epoch": 9.77, "grad_norm": 0.4477982223033905, "learning_rate": 1.195646777284598e-07, "loss": 0.0194, "step": 993400 }, { "epoch": 9.77, "grad_norm": 0.04348994046449661, "learning_rate": 1.1944055527421134e-07, "loss": 0.0471, "step": 993425 }, { "epoch": 9.77, "grad_norm": 4.918558597564697, "learning_rate": 1.1931643281996287e-07, "loss": 0.0139, "step": 993450 }, { "epoch": 9.77, "grad_norm": 0.11845806241035461, "learning_rate": 1.191923103657144e-07, "loss": 0.0463, "step": 993475 }, { "epoch": 9.77, "grad_norm": 15.049224853515625, "learning_rate": 1.1906818791146594e-07, "loss": 0.0166, "step": 993500 }, { "epoch": 9.77, "grad_norm": 1.3619155883789062, "learning_rate": 1.1894406545721749e-07, "loss": 0.028, "step": 993525 }, { "epoch": 9.77, "grad_norm": 0.22535307705402374, "learning_rate": 1.1881994300296902e-07, "loss": 0.0096, "step": 993550 }, { "epoch": 9.77, "grad_norm": 1.8079649209976196, "learning_rate": 1.1869582054872056e-07, "loss": 0.0558, "step": 993575 }, { "epoch": 9.77, "grad_norm": 0.5295144319534302, "learning_rate": 1.1857169809447209e-07, "loss": 0.024, "step": 993600 }, { "epoch": 9.77, "grad_norm": 0.005037653725594282, "learning_rate": 1.1844757564022364e-07, "loss": 0.0369, "step": 993625 }, { "epoch": 9.77, "grad_norm": 2.4016411304473877, "learning_rate": 1.1832345318597517e-07, "loss": 0.0075, "step": 993650 }, { "epoch": 9.77, "grad_norm": 0.12753361463546753, "learning_rate": 1.181993307317267e-07, "loss": 0.0335, "step": 993675 }, { "epoch": 9.77, "grad_norm": 39.30616760253906, "learning_rate": 1.1807520827747824e-07, "loss": 0.0216, "step": 993700 }, { "epoch": 9.77, "grad_norm": 0.12375669181346893, "learning_rate": 1.1795108582322977e-07, "loss": 0.028, "step": 993725 }, { "epoch": 9.77, "grad_norm": 0.051100943237543106, "learning_rate": 1.178269633689813e-07, "loss": 0.0074, "step": 993750 }, { "epoch": 9.77, "grad_norm": 0.005284495186060667, "learning_rate": 1.1770284091473286e-07, "loss": 0.0333, "step": 993775 }, { "epoch": 9.77, "grad_norm": 6.067919731140137, "learning_rate": 1.1757871846048439e-07, "loss": 0.0116, "step": 993800 }, { "epoch": 9.77, "grad_norm": 0.3145618438720703, "learning_rate": 1.1745459600623591e-07, "loss": 0.0331, "step": 993825 }, { "epoch": 9.77, "grad_norm": 1.4255290031433105, "learning_rate": 1.1733047355198746e-07, "loss": 0.0095, "step": 993850 }, { "epoch": 9.77, "grad_norm": 0.08663002401590347, "learning_rate": 1.17206351097739e-07, "loss": 0.0273, "step": 993875 }, { "epoch": 9.77, "grad_norm": 2.5740296840667725, "learning_rate": 1.1708222864349053e-07, "loss": 0.0045, "step": 993900 }, { "epoch": 9.77, "grad_norm": 0.35736265778541565, "learning_rate": 1.1695810618924207e-07, "loss": 0.0482, "step": 993925 }, { "epoch": 9.77, "grad_norm": 0.621764063835144, "learning_rate": 1.168339837349936e-07, "loss": 0.0115, "step": 993950 }, { "epoch": 9.77, "grad_norm": 2.051198959350586, "learning_rate": 1.1670986128074516e-07, "loss": 0.0345, "step": 993975 }, { "epoch": 9.77, "grad_norm": 1.341666340827942, "learning_rate": 1.1658573882649668e-07, "loss": 0.0108, "step": 994000 }, { "epoch": 9.77, "grad_norm": 0.14636297523975372, "learning_rate": 1.1646161637224821e-07, "loss": 0.0412, "step": 994025 }, { "epoch": 9.77, "grad_norm": 0.16214486956596375, "learning_rate": 1.1633749391799976e-07, "loss": 0.0082, "step": 994050 }, { "epoch": 9.77, "grad_norm": 0.9063239097595215, "learning_rate": 1.1621337146375129e-07, "loss": 0.035, "step": 994075 }, { "epoch": 9.77, "grad_norm": 3.3408892154693604, "learning_rate": 1.1608924900950281e-07, "loss": 0.0096, "step": 994100 }, { "epoch": 9.77, "grad_norm": 0.04560784623026848, "learning_rate": 1.1596512655525437e-07, "loss": 0.0334, "step": 994125 }, { "epoch": 9.77, "grad_norm": 2.433671236038208, "learning_rate": 1.158410041010059e-07, "loss": 0.0147, "step": 994150 }, { "epoch": 9.77, "grad_norm": 0.021894972771406174, "learning_rate": 1.1571688164675743e-07, "loss": 0.0348, "step": 994175 }, { "epoch": 9.78, "grad_norm": 5.733954429626465, "learning_rate": 1.1559275919250897e-07, "loss": 0.0208, "step": 994200 }, { "epoch": 9.78, "grad_norm": 0.035444989800453186, "learning_rate": 1.1546863673826051e-07, "loss": 0.0409, "step": 994225 }, { "epoch": 9.78, "grad_norm": 1.233374834060669, "learning_rate": 1.1534451428401204e-07, "loss": 0.0176, "step": 994250 }, { "epoch": 9.78, "grad_norm": 0.02754763327538967, "learning_rate": 1.1522039182976358e-07, "loss": 0.0387, "step": 994275 }, { "epoch": 9.78, "grad_norm": 6.731828212738037, "learning_rate": 1.1509626937551511e-07, "loss": 0.0218, "step": 994300 }, { "epoch": 9.78, "grad_norm": 0.041810207068920135, "learning_rate": 1.1497214692126664e-07, "loss": 0.026, "step": 994325 }, { "epoch": 9.78, "grad_norm": 0.8002138137817383, "learning_rate": 1.148480244670182e-07, "loss": 0.0107, "step": 994350 }, { "epoch": 9.78, "grad_norm": 0.0932169184088707, "learning_rate": 1.1472390201276973e-07, "loss": 0.0357, "step": 994375 }, { "epoch": 9.78, "grad_norm": 0.3312937021255493, "learning_rate": 1.1459977955852127e-07, "loss": 0.0115, "step": 994400 }, { "epoch": 9.78, "grad_norm": 0.8886268138885498, "learning_rate": 1.144756571042728e-07, "loss": 0.0468, "step": 994425 }, { "epoch": 9.78, "grad_norm": 2.6897778511047363, "learning_rate": 1.1435153465002433e-07, "loss": 0.0245, "step": 994450 }, { "epoch": 9.78, "grad_norm": 0.05270623788237572, "learning_rate": 1.1422741219577588e-07, "loss": 0.0326, "step": 994475 }, { "epoch": 9.78, "grad_norm": 2.490410089492798, "learning_rate": 1.1410328974152741e-07, "loss": 0.0173, "step": 994500 }, { "epoch": 9.78, "grad_norm": 0.9877527952194214, "learning_rate": 1.1397916728727894e-07, "loss": 0.0434, "step": 994525 }, { "epoch": 9.78, "grad_norm": 0.4124147295951843, "learning_rate": 1.1385504483303048e-07, "loss": 0.0112, "step": 994550 }, { "epoch": 9.78, "grad_norm": 3.8839101791381836, "learning_rate": 1.1373092237878203e-07, "loss": 0.038, "step": 994575 }, { "epoch": 9.78, "grad_norm": 4.527198314666748, "learning_rate": 1.1360679992453356e-07, "loss": 0.0176, "step": 994600 }, { "epoch": 9.78, "grad_norm": 0.5401166677474976, "learning_rate": 1.134826774702851e-07, "loss": 0.044, "step": 994625 }, { "epoch": 9.78, "grad_norm": 7.72452449798584, "learning_rate": 1.1335855501603663e-07, "loss": 0.0116, "step": 994650 }, { "epoch": 9.78, "grad_norm": 1.5294350385665894, "learning_rate": 1.1323939745995812e-07, "loss": 0.0402, "step": 994675 }, { "epoch": 9.78, "grad_norm": 6.1457037925720215, "learning_rate": 1.1311527500570965e-07, "loss": 0.0078, "step": 994700 }, { "epoch": 9.78, "grad_norm": 1.9526066780090332, "learning_rate": 1.1299115255146118e-07, "loss": 0.0325, "step": 994725 }, { "epoch": 9.78, "grad_norm": 15.432461738586426, "learning_rate": 1.1286703009721272e-07, "loss": 0.0209, "step": 994750 }, { "epoch": 9.78, "grad_norm": 1.7893253564834595, "learning_rate": 1.1274290764296425e-07, "loss": 0.0474, "step": 994775 }, { "epoch": 9.78, "grad_norm": 11.946745872497559, "learning_rate": 1.1261878518871578e-07, "loss": 0.0167, "step": 994800 }, { "epoch": 9.78, "grad_norm": 6.314443588256836, "learning_rate": 1.1249466273446733e-07, "loss": 0.0219, "step": 994825 }, { "epoch": 9.78, "grad_norm": 3.9840145111083984, "learning_rate": 1.1237054028021886e-07, "loss": 0.0097, "step": 994850 }, { "epoch": 9.78, "grad_norm": 0.017129870131611824, "learning_rate": 1.122464178259704e-07, "loss": 0.0316, "step": 994875 }, { "epoch": 9.78, "grad_norm": 5.763406753540039, "learning_rate": 1.1212229537172193e-07, "loss": 0.0242, "step": 994900 }, { "epoch": 9.78, "grad_norm": 0.0930558368563652, "learning_rate": 1.1199817291747346e-07, "loss": 0.0377, "step": 994925 }, { "epoch": 9.78, "grad_norm": 6.108133316040039, "learning_rate": 1.1187405046322502e-07, "loss": 0.0206, "step": 994950 }, { "epoch": 9.78, "grad_norm": 0.5268145203590393, "learning_rate": 1.1174992800897655e-07, "loss": 0.0311, "step": 994975 }, { "epoch": 9.78, "grad_norm": 6.6960601806640625, "learning_rate": 1.1162580555472807e-07, "loss": 0.0134, "step": 995000 }, { "epoch": 9.78, "grad_norm": 4.272887229919434, "learning_rate": 1.1150168310047963e-07, "loss": 0.0475, "step": 995025 }, { "epoch": 9.78, "grad_norm": 2.940953493118286, "learning_rate": 1.1137756064623116e-07, "loss": 0.0117, "step": 995050 }, { "epoch": 9.78, "grad_norm": 0.045931536704301834, "learning_rate": 1.1125343819198269e-07, "loss": 0.031, "step": 995075 }, { "epoch": 9.78, "grad_norm": 0.2426704615354538, "learning_rate": 1.1112931573773423e-07, "loss": 0.0173, "step": 995100 }, { "epoch": 9.78, "grad_norm": 0.036612678319215775, "learning_rate": 1.1100519328348576e-07, "loss": 0.0357, "step": 995125 }, { "epoch": 9.78, "grad_norm": 7.511331081390381, "learning_rate": 1.1088107082923729e-07, "loss": 0.0072, "step": 995150 }, { "epoch": 9.78, "grad_norm": 0.004245992284268141, "learning_rate": 1.1075694837498884e-07, "loss": 0.0217, "step": 995175 }, { "epoch": 9.78, "grad_norm": 8.251546859741211, "learning_rate": 1.1063282592074037e-07, "loss": 0.0144, "step": 995200 }, { "epoch": 9.79, "grad_norm": 6.084839820861816, "learning_rate": 1.1050870346649192e-07, "loss": 0.0376, "step": 995225 }, { "epoch": 9.79, "grad_norm": 1.5513719320297241, "learning_rate": 1.1038458101224345e-07, "loss": 0.0197, "step": 995250 }, { "epoch": 9.79, "grad_norm": 0.0167526938021183, "learning_rate": 1.1026045855799497e-07, "loss": 0.0288, "step": 995275 }, { "epoch": 9.79, "grad_norm": 1.7601964473724365, "learning_rate": 1.1013633610374653e-07, "loss": 0.0143, "step": 995300 }, { "epoch": 9.79, "grad_norm": 0.3317480981349945, "learning_rate": 1.1001221364949806e-07, "loss": 0.0355, "step": 995325 }, { "epoch": 9.79, "grad_norm": 5.969703197479248, "learning_rate": 1.0988809119524959e-07, "loss": 0.015, "step": 995350 }, { "epoch": 9.79, "grad_norm": 0.01557349506765604, "learning_rate": 1.0976396874100114e-07, "loss": 0.0219, "step": 995375 }, { "epoch": 9.79, "grad_norm": 3.063258409500122, "learning_rate": 1.0963984628675267e-07, "loss": 0.0148, "step": 995400 }, { "epoch": 9.79, "grad_norm": 0.02320828288793564, "learning_rate": 1.095157238325042e-07, "loss": 0.028, "step": 995425 }, { "epoch": 9.79, "grad_norm": 4.80686092376709, "learning_rate": 1.0939160137825574e-07, "loss": 0.0061, "step": 995450 }, { "epoch": 9.79, "grad_norm": 2.020941734313965, "learning_rate": 1.0926747892400727e-07, "loss": 0.0234, "step": 995475 }, { "epoch": 9.79, "grad_norm": 0.2849557101726532, "learning_rate": 1.091433564697588e-07, "loss": 0.0127, "step": 995500 }, { "epoch": 9.79, "grad_norm": 3.294890880584717, "learning_rate": 1.0901923401551036e-07, "loss": 0.0389, "step": 995525 }, { "epoch": 9.79, "grad_norm": 1.9730329513549805, "learning_rate": 1.0889511156126189e-07, "loss": 0.0103, "step": 995550 }, { "epoch": 9.79, "grad_norm": 0.02024879679083824, "learning_rate": 1.0877098910701342e-07, "loss": 0.0362, "step": 995575 }, { "epoch": 9.79, "grad_norm": 8.4320650100708, "learning_rate": 1.0864686665276496e-07, "loss": 0.0168, "step": 995600 }, { "epoch": 9.79, "grad_norm": 0.00804678350687027, "learning_rate": 1.085227441985165e-07, "loss": 0.0339, "step": 995625 }, { "epoch": 9.79, "grad_norm": 11.538476943969727, "learning_rate": 1.0839862174426804e-07, "loss": 0.0125, "step": 995650 }, { "epoch": 9.79, "grad_norm": 0.1590973287820816, "learning_rate": 1.0827449929001957e-07, "loss": 0.031, "step": 995675 }, { "epoch": 9.79, "grad_norm": 3.9560599327087402, "learning_rate": 1.081503768357711e-07, "loss": 0.0055, "step": 995700 }, { "epoch": 9.79, "grad_norm": 0.051787398755550385, "learning_rate": 1.0802625438152266e-07, "loss": 0.044, "step": 995725 }, { "epoch": 9.79, "grad_norm": 0.058369435369968414, "learning_rate": 1.0790213192727419e-07, "loss": 0.0063, "step": 995750 }, { "epoch": 9.79, "grad_norm": 0.1714951992034912, "learning_rate": 1.0777800947302572e-07, "loss": 0.0556, "step": 995775 }, { "epoch": 9.79, "grad_norm": 0.3276432454586029, "learning_rate": 1.0765388701877726e-07, "loss": 0.0195, "step": 995800 }, { "epoch": 9.79, "grad_norm": 0.013350569643080235, "learning_rate": 1.0752976456452879e-07, "loss": 0.0241, "step": 995825 }, { "epoch": 9.79, "grad_norm": 2.703659772872925, "learning_rate": 1.0740564211028032e-07, "loss": 0.0173, "step": 995850 }, { "epoch": 9.79, "grad_norm": 0.6451284289360046, "learning_rate": 1.0728151965603187e-07, "loss": 0.0266, "step": 995875 }, { "epoch": 9.79, "grad_norm": 6.200810432434082, "learning_rate": 1.071573972017834e-07, "loss": 0.0197, "step": 995900 }, { "epoch": 9.79, "grad_norm": 1.230393409729004, "learning_rate": 1.0703327474753493e-07, "loss": 0.0369, "step": 995925 }, { "epoch": 9.79, "grad_norm": 5.636495113372803, "learning_rate": 1.0690915229328647e-07, "loss": 0.0185, "step": 995950 }, { "epoch": 9.79, "grad_norm": 0.6355186104774475, "learning_rate": 1.0678502983903801e-07, "loss": 0.0268, "step": 995975 }, { "epoch": 9.79, "grad_norm": 0.6721299886703491, "learning_rate": 1.0666090738478954e-07, "loss": 0.0143, "step": 996000 }, { "epoch": 9.79, "grad_norm": 1.9790985584259033, "learning_rate": 1.0653678493054109e-07, "loss": 0.0353, "step": 996025 }, { "epoch": 9.79, "grad_norm": 13.22221851348877, "learning_rate": 1.0641266247629261e-07, "loss": 0.0162, "step": 996050 }, { "epoch": 9.79, "grad_norm": 0.13559815287590027, "learning_rate": 1.0628854002204417e-07, "loss": 0.032, "step": 996075 }, { "epoch": 9.79, "grad_norm": 15.766921043395996, "learning_rate": 1.061644175677957e-07, "loss": 0.0132, "step": 996100 }, { "epoch": 9.79, "grad_norm": 0.061931777745485306, "learning_rate": 1.0604029511354723e-07, "loss": 0.0453, "step": 996125 }, { "epoch": 9.79, "grad_norm": 0.04216356575489044, "learning_rate": 1.0591617265929877e-07, "loss": 0.0125, "step": 996150 }, { "epoch": 9.79, "grad_norm": 0.02167724072933197, "learning_rate": 1.057920502050503e-07, "loss": 0.0455, "step": 996175 }, { "epoch": 9.79, "grad_norm": 6.8630452156066895, "learning_rate": 1.0566792775080183e-07, "loss": 0.0125, "step": 996200 }, { "epoch": 9.8, "grad_norm": 2.004195213317871, "learning_rate": 1.0554380529655338e-07, "loss": 0.0502, "step": 996225 }, { "epoch": 9.8, "grad_norm": 18.167755126953125, "learning_rate": 1.0541968284230491e-07, "loss": 0.0224, "step": 996250 }, { "epoch": 9.8, "grad_norm": 0.017948448657989502, "learning_rate": 1.0529556038805644e-07, "loss": 0.0446, "step": 996275 }, { "epoch": 9.8, "grad_norm": 4.02579927444458, "learning_rate": 1.0517143793380798e-07, "loss": 0.0104, "step": 996300 }, { "epoch": 9.8, "grad_norm": 0.026580657809972763, "learning_rate": 1.0504731547955953e-07, "loss": 0.0399, "step": 996325 }, { "epoch": 9.8, "grad_norm": 12.363509178161621, "learning_rate": 1.0492319302531106e-07, "loss": 0.01, "step": 996350 }, { "epoch": 9.8, "grad_norm": 0.028565116226673126, "learning_rate": 1.047990705710626e-07, "loss": 0.0272, "step": 996375 }, { "epoch": 9.8, "grad_norm": 3.07437801361084, "learning_rate": 1.0467494811681413e-07, "loss": 0.0175, "step": 996400 }, { "epoch": 9.8, "grad_norm": 0.130935937166214, "learning_rate": 1.0455082566256568e-07, "loss": 0.0464, "step": 996425 }, { "epoch": 9.8, "grad_norm": 2.4524755477905273, "learning_rate": 1.0442670320831721e-07, "loss": 0.0107, "step": 996450 }, { "epoch": 9.8, "grad_norm": 0.014909773133695126, "learning_rate": 1.0430258075406874e-07, "loss": 0.0618, "step": 996475 }, { "epoch": 9.8, "grad_norm": 0.6091951727867126, "learning_rate": 1.0417845829982028e-07, "loss": 0.0186, "step": 996500 }, { "epoch": 9.8, "grad_norm": 0.008375575765967369, "learning_rate": 1.0405433584557181e-07, "loss": 0.0325, "step": 996525 }, { "epoch": 9.8, "grad_norm": 0.25269508361816406, "learning_rate": 1.0393021339132334e-07, "loss": 0.0131, "step": 996550 }, { "epoch": 9.8, "grad_norm": 0.10164861381053925, "learning_rate": 1.038060909370749e-07, "loss": 0.0356, "step": 996575 }, { "epoch": 9.8, "grad_norm": 1.7670332193374634, "learning_rate": 1.0368196848282643e-07, "loss": 0.0135, "step": 996600 }, { "epoch": 9.8, "grad_norm": 1.4151132106781006, "learning_rate": 1.0355784602857796e-07, "loss": 0.0586, "step": 996625 }, { "epoch": 9.8, "grad_norm": 9.34359359741211, "learning_rate": 1.034337235743295e-07, "loss": 0.014, "step": 996650 }, { "epoch": 9.8, "grad_norm": 1.2670005559921265, "learning_rate": 1.0330960112008104e-07, "loss": 0.0331, "step": 996675 }, { "epoch": 9.8, "grad_norm": 9.38571548461914, "learning_rate": 1.0318547866583257e-07, "loss": 0.0201, "step": 996700 }, { "epoch": 9.8, "grad_norm": 0.09653787314891815, "learning_rate": 1.0306135621158411e-07, "loss": 0.0367, "step": 996725 }, { "epoch": 9.8, "grad_norm": 5.082539081573486, "learning_rate": 1.0293723375733564e-07, "loss": 0.0195, "step": 996750 }, { "epoch": 9.8, "grad_norm": 0.08488316088914871, "learning_rate": 1.0281311130308717e-07, "loss": 0.0292, "step": 996775 }, { "epoch": 9.8, "grad_norm": 2.3947205543518066, "learning_rate": 1.0268898884883873e-07, "loss": 0.0188, "step": 996800 }, { "epoch": 9.8, "grad_norm": 0.0011721578193828464, "learning_rate": 1.0256983129276019e-07, "loss": 0.0345, "step": 996825 }, { "epoch": 9.8, "grad_norm": 8.534111022949219, "learning_rate": 1.0244570883851173e-07, "loss": 0.0155, "step": 996850 }, { "epoch": 9.8, "grad_norm": 2.2592904567718506, "learning_rate": 1.0232158638426326e-07, "loss": 0.0338, "step": 996875 }, { "epoch": 9.8, "grad_norm": 6.067329406738281, "learning_rate": 1.0219746393001482e-07, "loss": 0.0098, "step": 996900 }, { "epoch": 9.8, "grad_norm": 0.01374402642250061, "learning_rate": 1.0207334147576635e-07, "loss": 0.0616, "step": 996925 }, { "epoch": 9.8, "grad_norm": 3.7233972549438477, "learning_rate": 1.0194921902151787e-07, "loss": 0.0114, "step": 996950 }, { "epoch": 9.8, "grad_norm": 0.16045264899730682, "learning_rate": 1.0182509656726942e-07, "loss": 0.0345, "step": 996975 }, { "epoch": 9.8, "grad_norm": 5.21644401550293, "learning_rate": 1.0170097411302095e-07, "loss": 0.0144, "step": 997000 }, { "epoch": 9.8, "grad_norm": 0.016706159338355064, "learning_rate": 1.0157685165877248e-07, "loss": 0.0416, "step": 997025 }, { "epoch": 9.8, "grad_norm": 1.8521790504455566, "learning_rate": 1.0145272920452403e-07, "loss": 0.0091, "step": 997050 }, { "epoch": 9.8, "grad_norm": 0.044650305062532425, "learning_rate": 1.0132860675027556e-07, "loss": 0.0302, "step": 997075 }, { "epoch": 9.8, "grad_norm": 2.2211451530456543, "learning_rate": 1.0120448429602709e-07, "loss": 0.017, "step": 997100 }, { "epoch": 9.8, "grad_norm": 0.006815762259066105, "learning_rate": 1.0108036184177864e-07, "loss": 0.0302, "step": 997125 }, { "epoch": 9.8, "grad_norm": 1.507490634918213, "learning_rate": 1.0095623938753017e-07, "loss": 0.0083, "step": 997150 }, { "epoch": 9.8, "grad_norm": 1.6940038204193115, "learning_rate": 1.008321169332817e-07, "loss": 0.0234, "step": 997175 }, { "epoch": 9.8, "grad_norm": 12.049256324768066, "learning_rate": 1.0070799447903325e-07, "loss": 0.0187, "step": 997200 }, { "epoch": 9.8, "grad_norm": 0.043289534747600555, "learning_rate": 1.0058387202478477e-07, "loss": 0.0325, "step": 997225 }, { "epoch": 9.81, "grad_norm": 9.641937255859375, "learning_rate": 1.004597495705363e-07, "loss": 0.0105, "step": 997250 }, { "epoch": 9.81, "grad_norm": 1.4127188920974731, "learning_rate": 1.0033562711628786e-07, "loss": 0.0358, "step": 997275 }, { "epoch": 9.81, "grad_norm": 6.9742751121521, "learning_rate": 1.0021150466203939e-07, "loss": 0.0049, "step": 997300 }, { "epoch": 9.81, "grad_norm": 3.113990306854248, "learning_rate": 1.0008738220779093e-07, "loss": 0.0303, "step": 997325 }, { "epoch": 9.81, "grad_norm": 7.275633335113525, "learning_rate": 9.996325975354246e-08, "loss": 0.0143, "step": 997350 }, { "epoch": 9.81, "grad_norm": 1.2466634511947632, "learning_rate": 9.983913729929399e-08, "loss": 0.0295, "step": 997375 }, { "epoch": 9.81, "grad_norm": 0.22132748365402222, "learning_rate": 9.971501484504554e-08, "loss": 0.0161, "step": 997400 }, { "epoch": 9.81, "grad_norm": 0.050230223685503006, "learning_rate": 9.959089239079707e-08, "loss": 0.0394, "step": 997425 }, { "epoch": 9.81, "grad_norm": 10.293694496154785, "learning_rate": 9.94667699365486e-08, "loss": 0.0178, "step": 997450 }, { "epoch": 9.81, "grad_norm": 0.9105210900306702, "learning_rate": 9.934264748230016e-08, "loss": 0.0355, "step": 997475 }, { "epoch": 9.81, "grad_norm": 0.12918367981910706, "learning_rate": 9.921852502805169e-08, "loss": 0.0139, "step": 997500 }, { "epoch": 9.81, "grad_norm": 2.18813419342041, "learning_rate": 9.909440257380322e-08, "loss": 0.0437, "step": 997525 }, { "epoch": 9.81, "grad_norm": 1.4250707626342773, "learning_rate": 9.897028011955476e-08, "loss": 0.0151, "step": 997550 }, { "epoch": 9.81, "grad_norm": 0.015154298394918442, "learning_rate": 9.884615766530629e-08, "loss": 0.0399, "step": 997575 }, { "epoch": 9.81, "grad_norm": 7.246957778930664, "learning_rate": 9.872203521105782e-08, "loss": 0.0247, "step": 997600 }, { "epoch": 9.81, "grad_norm": 1.166759967803955, "learning_rate": 9.859791275680937e-08, "loss": 0.0377, "step": 997625 }, { "epoch": 9.81, "grad_norm": 2.482342481613159, "learning_rate": 9.84737903025609e-08, "loss": 0.0116, "step": 997650 }, { "epoch": 9.81, "grad_norm": 0.03507083281874657, "learning_rate": 9.834966784831244e-08, "loss": 0.0211, "step": 997675 }, { "epoch": 9.81, "grad_norm": 2.216276168823242, "learning_rate": 9.822554539406397e-08, "loss": 0.0087, "step": 997700 }, { "epoch": 9.81, "grad_norm": 0.17847009003162384, "learning_rate": 9.81014229398155e-08, "loss": 0.0335, "step": 997725 }, { "epoch": 9.81, "grad_norm": 0.2863829731941223, "learning_rate": 9.797730048556706e-08, "loss": 0.0094, "step": 997750 }, { "epoch": 9.81, "grad_norm": 0.006824998650699854, "learning_rate": 9.785317803131859e-08, "loss": 0.0362, "step": 997775 }, { "epoch": 9.81, "grad_norm": 7.473287105560303, "learning_rate": 9.772905557707012e-08, "loss": 0.0173, "step": 997800 }, { "epoch": 9.81, "grad_norm": 0.6828055381774902, "learning_rate": 9.760493312282167e-08, "loss": 0.0386, "step": 997825 }, { "epoch": 9.81, "grad_norm": 1.4084545373916626, "learning_rate": 9.74808106685732e-08, "loss": 0.0296, "step": 997850 }, { "epoch": 9.81, "grad_norm": 3.966738700866699, "learning_rate": 9.735668821432473e-08, "loss": 0.0505, "step": 997875 }, { "epoch": 9.81, "grad_norm": 3.279676914215088, "learning_rate": 9.723256576007627e-08, "loss": 0.0155, "step": 997900 }, { "epoch": 9.81, "grad_norm": 0.03632408007979393, "learning_rate": 9.71084433058278e-08, "loss": 0.0337, "step": 997925 }, { "epoch": 9.81, "grad_norm": 0.1505453735589981, "learning_rate": 9.698432085157933e-08, "loss": 0.0062, "step": 997950 }, { "epoch": 9.81, "grad_norm": 0.006651592440903187, "learning_rate": 9.686019839733089e-08, "loss": 0.0448, "step": 997975 }, { "epoch": 9.81, "grad_norm": 7.776208400726318, "learning_rate": 9.673607594308241e-08, "loss": 0.0105, "step": 998000 }, { "epoch": 9.81, "grad_norm": 4.16878080368042, "learning_rate": 9.661195348883394e-08, "loss": 0.0274, "step": 998025 }, { "epoch": 9.81, "grad_norm": 3.9801082611083984, "learning_rate": 9.648783103458549e-08, "loss": 0.0141, "step": 998050 }, { "epoch": 9.81, "grad_norm": 0.12437523901462555, "learning_rate": 9.636370858033702e-08, "loss": 0.0373, "step": 998075 }, { "epoch": 9.81, "grad_norm": 16.036401748657227, "learning_rate": 9.623958612608857e-08, "loss": 0.0258, "step": 998100 }, { "epoch": 9.81, "grad_norm": 0.1552978903055191, "learning_rate": 9.61154636718401e-08, "loss": 0.0472, "step": 998125 }, { "epoch": 9.81, "grad_norm": 8.138815879821777, "learning_rate": 9.599134121759163e-08, "loss": 0.0129, "step": 998150 }, { "epoch": 9.81, "grad_norm": 0.02193988487124443, "learning_rate": 9.586721876334318e-08, "loss": 0.0317, "step": 998175 }, { "epoch": 9.81, "grad_norm": 2.52738356590271, "learning_rate": 9.574309630909471e-08, "loss": 0.0162, "step": 998200 }, { "epoch": 9.81, "grad_norm": 0.07568664848804474, "learning_rate": 9.561897385484624e-08, "loss": 0.0326, "step": 998225 }, { "epoch": 9.81, "grad_norm": 0.16410188376903534, "learning_rate": 9.549485140059778e-08, "loss": 0.0137, "step": 998250 }, { "epoch": 9.82, "grad_norm": 0.07183060050010681, "learning_rate": 9.537072894634931e-08, "loss": 0.0152, "step": 998275 }, { "epoch": 9.82, "grad_norm": 3.7940406799316406, "learning_rate": 9.524660649210084e-08, "loss": 0.0111, "step": 998300 }, { "epoch": 9.82, "grad_norm": 0.20737452805042267, "learning_rate": 9.51224840378524e-08, "loss": 0.0338, "step": 998325 }, { "epoch": 9.82, "grad_norm": 9.811907768249512, "learning_rate": 9.499836158360393e-08, "loss": 0.0172, "step": 998350 }, { "epoch": 9.82, "grad_norm": 1.5093005895614624, "learning_rate": 9.487423912935546e-08, "loss": 0.0434, "step": 998375 }, { "epoch": 9.82, "grad_norm": 9.016846656799316, "learning_rate": 9.4750116675107e-08, "loss": 0.0173, "step": 998400 }, { "epoch": 9.82, "grad_norm": 1.8402889966964722, "learning_rate": 9.462599422085854e-08, "loss": 0.0237, "step": 998425 }, { "epoch": 9.82, "grad_norm": 0.4650607109069824, "learning_rate": 9.450187176661008e-08, "loss": 0.0029, "step": 998450 }, { "epoch": 9.82, "grad_norm": 0.3194728195667267, "learning_rate": 9.437774931236161e-08, "loss": 0.032, "step": 998475 }, { "epoch": 9.82, "grad_norm": 0.3270244300365448, "learning_rate": 9.425362685811314e-08, "loss": 0.0171, "step": 998500 }, { "epoch": 9.82, "grad_norm": 0.7209458351135254, "learning_rate": 9.41295044038647e-08, "loss": 0.044, "step": 998525 }, { "epoch": 9.82, "grad_norm": 0.28831014037132263, "learning_rate": 9.400538194961623e-08, "loss": 0.0301, "step": 998550 }, { "epoch": 9.82, "grad_norm": 0.25958457589149475, "learning_rate": 9.388125949536776e-08, "loss": 0.0442, "step": 998575 }, { "epoch": 9.82, "grad_norm": 0.47628554701805115, "learning_rate": 9.37571370411193e-08, "loss": 0.018, "step": 998600 }, { "epoch": 9.82, "grad_norm": 0.6103862524032593, "learning_rate": 9.363301458687083e-08, "loss": 0.0311, "step": 998625 }, { "epoch": 9.82, "grad_norm": 0.1155027225613594, "learning_rate": 9.350889213262236e-08, "loss": 0.0164, "step": 998650 }, { "epoch": 9.82, "grad_norm": 1.148632526397705, "learning_rate": 9.338476967837391e-08, "loss": 0.0352, "step": 998675 }, { "epoch": 9.82, "grad_norm": 16.366701126098633, "learning_rate": 9.326064722412544e-08, "loss": 0.0156, "step": 998700 }, { "epoch": 9.82, "grad_norm": 0.10384435206651688, "learning_rate": 9.313652476987697e-08, "loss": 0.0313, "step": 998725 }, { "epoch": 9.82, "grad_norm": 0.9688936471939087, "learning_rate": 9.301240231562851e-08, "loss": 0.0252, "step": 998750 }, { "epoch": 9.82, "grad_norm": 0.010068895295262337, "learning_rate": 9.288827986138005e-08, "loss": 0.0357, "step": 998775 }, { "epoch": 9.82, "grad_norm": 2.430774450302124, "learning_rate": 9.276415740713158e-08, "loss": 0.0059, "step": 998800 }, { "epoch": 9.82, "grad_norm": 8.301365852355957, "learning_rate": 9.264003495288313e-08, "loss": 0.0225, "step": 998825 }, { "epoch": 9.82, "grad_norm": 12.211933135986328, "learning_rate": 9.251591249863466e-08, "loss": 0.0267, "step": 998850 }, { "epoch": 9.82, "grad_norm": 3.940565824508667, "learning_rate": 9.239179004438621e-08, "loss": 0.0318, "step": 998875 }, { "epoch": 9.82, "grad_norm": 0.12814360857009888, "learning_rate": 9.226766759013774e-08, "loss": 0.0154, "step": 998900 }, { "epoch": 9.82, "grad_norm": 6.364516735076904, "learning_rate": 9.214354513588927e-08, "loss": 0.0285, "step": 998925 }, { "epoch": 9.82, "grad_norm": 3.7769055366516113, "learning_rate": 9.201942268164081e-08, "loss": 0.0091, "step": 998950 }, { "epoch": 9.82, "grad_norm": 1.9999151229858398, "learning_rate": 9.189530022739234e-08, "loss": 0.0267, "step": 998975 }, { "epoch": 9.82, "grad_norm": 0.1651286631822586, "learning_rate": 9.177117777314387e-08, "loss": 0.0121, "step": 999000 }, { "epoch": 9.82, "grad_norm": 0.012499856762588024, "learning_rate": 9.165202021706536e-08, "loss": 0.0293, "step": 999025 }, { "epoch": 9.82, "grad_norm": 8.189501762390137, "learning_rate": 9.152789776281689e-08, "loss": 0.0231, "step": 999050 }, { "epoch": 9.82, "grad_norm": 0.0704784095287323, "learning_rate": 9.140377530856843e-08, "loss": 0.035, "step": 999075 }, { "epoch": 9.82, "grad_norm": 2.5334055423736572, "learning_rate": 9.127965285431996e-08, "loss": 0.0158, "step": 999100 }, { "epoch": 9.82, "grad_norm": 0.020494762808084488, "learning_rate": 9.115553040007149e-08, "loss": 0.0312, "step": 999125 }, { "epoch": 9.82, "grad_norm": 18.359832763671875, "learning_rate": 9.103140794582305e-08, "loss": 0.0119, "step": 999150 }, { "epoch": 9.82, "grad_norm": 0.5641356110572815, "learning_rate": 9.090728549157457e-08, "loss": 0.036, "step": 999175 }, { "epoch": 9.82, "grad_norm": 22.303821563720703, "learning_rate": 9.07831630373261e-08, "loss": 0.0222, "step": 999200 }, { "epoch": 9.82, "grad_norm": 0.04141940921545029, "learning_rate": 9.065904058307765e-08, "loss": 0.0356, "step": 999225 }, { "epoch": 9.82, "grad_norm": 2.018284797668457, "learning_rate": 9.053491812882919e-08, "loss": 0.0135, "step": 999250 }, { "epoch": 9.83, "grad_norm": 0.32319751381874084, "learning_rate": 9.041079567458072e-08, "loss": 0.0286, "step": 999275 }, { "epoch": 9.83, "grad_norm": 13.063212394714355, "learning_rate": 9.028667322033226e-08, "loss": 0.0086, "step": 999300 }, { "epoch": 9.83, "grad_norm": 0.029430611059069633, "learning_rate": 9.016255076608379e-08, "loss": 0.0388, "step": 999325 }, { "epoch": 9.83, "grad_norm": 0.00969067495316267, "learning_rate": 9.003842831183534e-08, "loss": 0.0105, "step": 999350 }, { "epoch": 9.83, "grad_norm": 0.03137421980500221, "learning_rate": 8.991430585758687e-08, "loss": 0.0418, "step": 999375 }, { "epoch": 9.83, "grad_norm": 8.520099639892578, "learning_rate": 8.97901834033384e-08, "loss": 0.0146, "step": 999400 }, { "epoch": 9.83, "grad_norm": 0.009571965783834457, "learning_rate": 8.966606094908994e-08, "loss": 0.0261, "step": 999425 }, { "epoch": 9.83, "grad_norm": 14.342151641845703, "learning_rate": 8.954193849484147e-08, "loss": 0.0185, "step": 999450 }, { "epoch": 9.83, "grad_norm": 0.026662403717637062, "learning_rate": 8.9417816040593e-08, "loss": 0.0375, "step": 999475 }, { "epoch": 9.83, "grad_norm": 14.497929573059082, "learning_rate": 8.929369358634456e-08, "loss": 0.0145, "step": 999500 }, { "epoch": 9.83, "grad_norm": 5.478609085083008, "learning_rate": 8.916957113209609e-08, "loss": 0.0249, "step": 999525 }, { "epoch": 9.83, "grad_norm": 4.434149742126465, "learning_rate": 8.904544867784762e-08, "loss": 0.016, "step": 999550 }, { "epoch": 9.83, "grad_norm": 0.052876751869916916, "learning_rate": 8.892132622359916e-08, "loss": 0.0178, "step": 999575 }, { "epoch": 9.83, "grad_norm": 0.4230175018310547, "learning_rate": 8.87972037693507e-08, "loss": 0.0245, "step": 999600 }, { "epoch": 9.83, "grad_norm": 0.047897059470415115, "learning_rate": 8.867308131510223e-08, "loss": 0.0272, "step": 999625 }, { "epoch": 9.83, "grad_norm": 0.10381978005170822, "learning_rate": 8.854895886085377e-08, "loss": 0.0136, "step": 999650 }, { "epoch": 9.83, "grad_norm": 0.04548744484782219, "learning_rate": 8.84248364066053e-08, "loss": 0.0535, "step": 999675 }, { "epoch": 9.83, "grad_norm": 0.8236767053604126, "learning_rate": 8.830071395235686e-08, "loss": 0.0169, "step": 999700 }, { "epoch": 9.83, "grad_norm": 0.05193502828478813, "learning_rate": 8.817659149810839e-08, "loss": 0.0341, "step": 999725 }, { "epoch": 9.83, "grad_norm": 9.185050964355469, "learning_rate": 8.805246904385992e-08, "loss": 0.0093, "step": 999750 }, { "epoch": 9.83, "grad_norm": 0.04752854257822037, "learning_rate": 8.792834658961146e-08, "loss": 0.0299, "step": 999775 }, { "epoch": 9.83, "grad_norm": 1.1268277168273926, "learning_rate": 8.780422413536299e-08, "loss": 0.0329, "step": 999800 }, { "epoch": 9.83, "grad_norm": 4.788417816162109, "learning_rate": 8.768010168111452e-08, "loss": 0.0465, "step": 999825 }, { "epoch": 9.83, "grad_norm": 16.35651206970215, "learning_rate": 8.755597922686607e-08, "loss": 0.0251, "step": 999850 }, { "epoch": 9.83, "grad_norm": 0.13357043266296387, "learning_rate": 8.74318567726176e-08, "loss": 0.0241, "step": 999875 }, { "epoch": 9.83, "grad_norm": 7.6069793701171875, "learning_rate": 8.730773431836913e-08, "loss": 0.0093, "step": 999900 }, { "epoch": 9.83, "grad_norm": 0.010027465410530567, "learning_rate": 8.718361186412069e-08, "loss": 0.0266, "step": 999925 }, { "epoch": 9.83, "grad_norm": 16.27887535095215, "learning_rate": 8.705948940987221e-08, "loss": 0.0155, "step": 999950 }, { "epoch": 9.83, "grad_norm": 0.14963148534297943, "learning_rate": 8.693536695562374e-08, "loss": 0.0304, "step": 999975 }, { "epoch": 9.83, "grad_norm": 0.5768895149230957, "learning_rate": 8.681124450137529e-08, "loss": 0.0096, "step": 1000000 }, { "epoch": 9.83, "eval_loss": 0.931305468082428, "eval_runtime": 6045.8351, "eval_samples_per_second": 1.566, "eval_steps_per_second": 0.196, "eval_wer": 0.11031352604133136, "step": 1000000 }, { "epoch": 9.83, "grad_norm": 2.8041720390319824, "learning_rate": 8.668712204712682e-08, "loss": 0.0534, "step": 1000025 }, { "epoch": 9.83, "grad_norm": 7.843930721282959, "learning_rate": 8.656299959287834e-08, "loss": 0.0195, "step": 1000050 }, { "epoch": 9.83, "grad_norm": 0.2119150310754776, "learning_rate": 8.64388771386299e-08, "loss": 0.0118, "step": 1000075 }, { "epoch": 9.83, "grad_norm": 6.291942596435547, "learning_rate": 8.631475468438143e-08, "loss": 0.0157, "step": 1000100 }, { "epoch": 9.83, "grad_norm": 2.6893157958984375, "learning_rate": 8.619063223013297e-08, "loss": 0.048, "step": 1000125 }, { "epoch": 9.83, "grad_norm": 0.385637491941452, "learning_rate": 8.60665097758845e-08, "loss": 0.0159, "step": 1000150 }, { "epoch": 9.83, "grad_norm": 0.1175585389137268, "learning_rate": 8.594238732163603e-08, "loss": 0.039, "step": 1000175 }, { "epoch": 9.83, "grad_norm": 8.390949249267578, "learning_rate": 8.581826486738759e-08, "loss": 0.0166, "step": 1000200 }, { "epoch": 9.83, "grad_norm": 0.045457880944013596, "learning_rate": 8.569414241313911e-08, "loss": 0.0376, "step": 1000225 }, { "epoch": 9.83, "grad_norm": 0.26857250928878784, "learning_rate": 8.557001995889064e-08, "loss": 0.0075, "step": 1000250 }, { "epoch": 9.83, "grad_norm": 0.04866555705666542, "learning_rate": 8.54458975046422e-08, "loss": 0.0379, "step": 1000275 }, { "epoch": 9.84, "grad_norm": 12.452980041503906, "learning_rate": 8.532177505039373e-08, "loss": 0.0141, "step": 1000300 }, { "epoch": 9.84, "grad_norm": 0.4160732328891754, "learning_rate": 8.519765259614526e-08, "loss": 0.05, "step": 1000325 }, { "epoch": 9.84, "grad_norm": 4.220421314239502, "learning_rate": 8.50735301418968e-08, "loss": 0.0193, "step": 1000350 }, { "epoch": 9.84, "grad_norm": 2.58317494392395, "learning_rate": 8.494940768764833e-08, "loss": 0.0628, "step": 1000375 }, { "epoch": 9.84, "grad_norm": 8.908356666564941, "learning_rate": 8.482528523339986e-08, "loss": 0.0203, "step": 1000400 }, { "epoch": 9.84, "grad_norm": 1.1373825073242188, "learning_rate": 8.470116277915141e-08, "loss": 0.037, "step": 1000425 }, { "epoch": 9.84, "grad_norm": 13.392505645751953, "learning_rate": 8.457704032490294e-08, "loss": 0.018, "step": 1000450 }, { "epoch": 9.84, "grad_norm": 6.702615261077881, "learning_rate": 8.445291787065448e-08, "loss": 0.0378, "step": 1000475 }, { "epoch": 9.84, "grad_norm": 0.4450245499610901, "learning_rate": 8.432879541640601e-08, "loss": 0.0129, "step": 1000500 }, { "epoch": 9.84, "grad_norm": 0.008471274748444557, "learning_rate": 8.420467296215754e-08, "loss": 0.0344, "step": 1000525 }, { "epoch": 9.84, "grad_norm": 17.250835418701172, "learning_rate": 8.40805505079091e-08, "loss": 0.0166, "step": 1000550 }, { "epoch": 9.84, "grad_norm": 0.08181608468294144, "learning_rate": 8.395642805366063e-08, "loss": 0.0228, "step": 1000575 }, { "epoch": 9.84, "grad_norm": 0.6489899158477783, "learning_rate": 8.383230559941216e-08, "loss": 0.0131, "step": 1000600 }, { "epoch": 9.84, "grad_norm": 1.1899462938308716, "learning_rate": 8.370818314516371e-08, "loss": 0.0341, "step": 1000625 }, { "epoch": 9.84, "grad_norm": 5.887320518493652, "learning_rate": 8.358406069091524e-08, "loss": 0.0216, "step": 1000650 }, { "epoch": 9.84, "grad_norm": 0.13774828612804413, "learning_rate": 8.345993823666677e-08, "loss": 0.0503, "step": 1000675 }, { "epoch": 9.84, "grad_norm": 14.09028148651123, "learning_rate": 8.333581578241831e-08, "loss": 0.0103, "step": 1000700 }, { "epoch": 9.84, "grad_norm": 0.030354317277669907, "learning_rate": 8.321169332816984e-08, "loss": 0.0253, "step": 1000725 }, { "epoch": 9.84, "grad_norm": 0.14940597116947174, "learning_rate": 8.308757087392137e-08, "loss": 0.0288, "step": 1000750 }, { "epoch": 9.84, "grad_norm": 0.022938545793294907, "learning_rate": 8.296344841967293e-08, "loss": 0.0449, "step": 1000775 }, { "epoch": 9.84, "grad_norm": 1.5324437618255615, "learning_rate": 8.283932596542446e-08, "loss": 0.0117, "step": 1000800 }, { "epoch": 9.84, "grad_norm": 0.04481040686368942, "learning_rate": 8.271520351117598e-08, "loss": 0.0378, "step": 1000825 }, { "epoch": 9.84, "grad_norm": 0.09293368458747864, "learning_rate": 8.259108105692753e-08, "loss": 0.03, "step": 1000850 }, { "epoch": 9.84, "grad_norm": 4.573759078979492, "learning_rate": 8.246695860267907e-08, "loss": 0.0195, "step": 1000875 }, { "epoch": 9.84, "grad_norm": 0.878538966178894, "learning_rate": 8.234283614843061e-08, "loss": 0.0053, "step": 1000900 }, { "epoch": 9.84, "grad_norm": 0.044936440885066986, "learning_rate": 8.221871369418214e-08, "loss": 0.0425, "step": 1000925 }, { "epoch": 9.84, "grad_norm": 2.0779409408569336, "learning_rate": 8.209459123993367e-08, "loss": 0.0068, "step": 1000950 }, { "epoch": 9.84, "grad_norm": 0.011629940010607243, "learning_rate": 8.197046878568523e-08, "loss": 0.0333, "step": 1000975 }, { "epoch": 9.84, "grad_norm": 2.134610652923584, "learning_rate": 8.184634633143675e-08, "loss": 0.0156, "step": 1001000 }, { "epoch": 9.84, "grad_norm": 0.01631317287683487, "learning_rate": 8.172222387718828e-08, "loss": 0.0263, "step": 1001025 }, { "epoch": 9.84, "grad_norm": 6.899980068206787, "learning_rate": 8.159810142293983e-08, "loss": 0.0131, "step": 1001050 }, { "epoch": 9.84, "grad_norm": 0.056476935744285583, "learning_rate": 8.147397896869135e-08, "loss": 0.0221, "step": 1001075 }, { "epoch": 9.84, "grad_norm": 0.1313234567642212, "learning_rate": 8.134985651444288e-08, "loss": 0.0069, "step": 1001100 }, { "epoch": 9.84, "grad_norm": 0.150225430727005, "learning_rate": 8.122573406019444e-08, "loss": 0.0347, "step": 1001125 }, { "epoch": 9.84, "grad_norm": 0.7178472280502319, "learning_rate": 8.110161160594597e-08, "loss": 0.0186, "step": 1001150 }, { "epoch": 9.84, "grad_norm": 1.2665079832077026, "learning_rate": 8.098245404986745e-08, "loss": 0.0449, "step": 1001175 }, { "epoch": 9.84, "grad_norm": 11.660751342773438, "learning_rate": 8.085833159561898e-08, "loss": 0.0187, "step": 1001200 }, { "epoch": 9.84, "grad_norm": 8.419798851013184, "learning_rate": 8.07342091413705e-08, "loss": 0.0374, "step": 1001225 }, { "epoch": 9.84, "grad_norm": 13.648080825805664, "learning_rate": 8.061008668712206e-08, "loss": 0.0143, "step": 1001250 }, { "epoch": 9.84, "grad_norm": 1.1811485290527344, "learning_rate": 8.048596423287359e-08, "loss": 0.0318, "step": 1001275 }, { "epoch": 9.84, "grad_norm": 6.388676166534424, "learning_rate": 8.036184177862512e-08, "loss": 0.0097, "step": 1001300 }, { "epoch": 9.85, "grad_norm": 0.019519472494721413, "learning_rate": 8.023771932437666e-08, "loss": 0.0421, "step": 1001325 }, { "epoch": 9.85, "grad_norm": 2.0974538326263428, "learning_rate": 8.01135968701282e-08, "loss": 0.0139, "step": 1001350 }, { "epoch": 9.85, "grad_norm": 0.00159844767767936, "learning_rate": 7.998947441587975e-08, "loss": 0.0282, "step": 1001375 }, { "epoch": 9.85, "grad_norm": 0.23042480647563934, "learning_rate": 7.986535196163127e-08, "loss": 0.0117, "step": 1001400 }, { "epoch": 9.85, "grad_norm": 0.013363749720156193, "learning_rate": 7.97412295073828e-08, "loss": 0.0301, "step": 1001425 }, { "epoch": 9.85, "grad_norm": 5.026206016540527, "learning_rate": 7.961710705313436e-08, "loss": 0.0145, "step": 1001450 }, { "epoch": 9.85, "grad_norm": 0.02572498843073845, "learning_rate": 7.949298459888589e-08, "loss": 0.0285, "step": 1001475 }, { "epoch": 9.85, "grad_norm": 0.11810377985239029, "learning_rate": 7.936886214463742e-08, "loss": 0.0116, "step": 1001500 }, { "epoch": 9.85, "grad_norm": 0.027057603001594543, "learning_rate": 7.924473969038896e-08, "loss": 0.0395, "step": 1001525 }, { "epoch": 9.85, "grad_norm": 0.8135157227516174, "learning_rate": 7.912061723614049e-08, "loss": 0.0244, "step": 1001550 }, { "epoch": 9.85, "grad_norm": 0.015559595078229904, "learning_rate": 7.899649478189202e-08, "loss": 0.0236, "step": 1001575 }, { "epoch": 9.85, "grad_norm": 10.103729248046875, "learning_rate": 7.887237232764357e-08, "loss": 0.0249, "step": 1001600 }, { "epoch": 9.85, "grad_norm": 0.165476456284523, "learning_rate": 7.87482498733951e-08, "loss": 0.0483, "step": 1001625 }, { "epoch": 9.85, "grad_norm": 0.25711363554000854, "learning_rate": 7.862412741914663e-08, "loss": 0.0112, "step": 1001650 }, { "epoch": 9.85, "grad_norm": 1.2070571184158325, "learning_rate": 7.850000496489817e-08, "loss": 0.0263, "step": 1001675 }, { "epoch": 9.85, "grad_norm": 3.355773448944092, "learning_rate": 7.837588251064972e-08, "loss": 0.0147, "step": 1001700 }, { "epoch": 9.85, "grad_norm": 0.27427342534065247, "learning_rate": 7.825176005640126e-08, "loss": 0.0255, "step": 1001725 }, { "epoch": 9.85, "grad_norm": 2.5230178833007812, "learning_rate": 7.812763760215279e-08, "loss": 0.0121, "step": 1001750 }, { "epoch": 9.85, "grad_norm": 7.728652000427246, "learning_rate": 7.800351514790432e-08, "loss": 0.0319, "step": 1001775 }, { "epoch": 9.85, "grad_norm": 3.6496219635009766, "learning_rate": 7.787939269365586e-08, "loss": 0.0178, "step": 1001800 }, { "epoch": 9.85, "grad_norm": 0.45863568782806396, "learning_rate": 7.77552702394074e-08, "loss": 0.0535, "step": 1001825 }, { "epoch": 9.85, "grad_norm": 9.054399490356445, "learning_rate": 7.763114778515893e-08, "loss": 0.0137, "step": 1001850 }, { "epoch": 9.85, "grad_norm": 0.7408748865127563, "learning_rate": 7.750702533091047e-08, "loss": 0.0473, "step": 1001875 }, { "epoch": 9.85, "grad_norm": 5.226439476013184, "learning_rate": 7.7382902876662e-08, "loss": 0.0093, "step": 1001900 }, { "epoch": 9.85, "grad_norm": 2.400956153869629, "learning_rate": 7.725878042241354e-08, "loss": 0.0308, "step": 1001925 }, { "epoch": 9.85, "grad_norm": 24.55243492126465, "learning_rate": 7.713465796816507e-08, "loss": 0.0305, "step": 1001950 }, { "epoch": 9.85, "grad_norm": 0.17019841074943542, "learning_rate": 7.701053551391662e-08, "loss": 0.026, "step": 1001975 }, { "epoch": 9.85, "grad_norm": 2.990738868713379, "learning_rate": 7.688641305966816e-08, "loss": 0.028, "step": 1002000 }, { "epoch": 9.85, "grad_norm": 0.5135917663574219, "learning_rate": 7.676229060541969e-08, "loss": 0.0219, "step": 1002025 }, { "epoch": 9.85, "grad_norm": 10.056951522827148, "learning_rate": 7.663816815117123e-08, "loss": 0.0171, "step": 1002050 }, { "epoch": 9.85, "grad_norm": 0.6746012568473816, "learning_rate": 7.651404569692276e-08, "loss": 0.0524, "step": 1002075 }, { "epoch": 9.85, "grad_norm": 8.197805404663086, "learning_rate": 7.63899232426743e-08, "loss": 0.0139, "step": 1002100 }, { "epoch": 9.85, "grad_norm": 0.33804863691329956, "learning_rate": 7.626580078842583e-08, "loss": 0.0368, "step": 1002125 }, { "epoch": 9.85, "grad_norm": 9.656131744384766, "learning_rate": 7.614167833417737e-08, "loss": 0.0164, "step": 1002150 }, { "epoch": 9.85, "grad_norm": 0.0074914367869496346, "learning_rate": 7.601755587992891e-08, "loss": 0.0419, "step": 1002175 }, { "epoch": 9.85, "grad_norm": 0.3220296800136566, "learning_rate": 7.589343342568044e-08, "loss": 0.0101, "step": 1002200 }, { "epoch": 9.85, "grad_norm": 4.376255035400391, "learning_rate": 7.576931097143199e-08, "loss": 0.0295, "step": 1002225 }, { "epoch": 9.85, "grad_norm": 8.418153762817383, "learning_rate": 7.564518851718351e-08, "loss": 0.0172, "step": 1002250 }, { "epoch": 9.85, "grad_norm": 1.8396700620651245, "learning_rate": 7.552106606293506e-08, "loss": 0.0313, "step": 1002275 }, { "epoch": 9.85, "grad_norm": 0.3012716472148895, "learning_rate": 7.539694360868659e-08, "loss": 0.0228, "step": 1002300 }, { "epoch": 9.86, "grad_norm": 0.020622730255126953, "learning_rate": 7.527282115443813e-08, "loss": 0.042, "step": 1002325 }, { "epoch": 9.86, "grad_norm": 14.905257225036621, "learning_rate": 7.514869870018967e-08, "loss": 0.0132, "step": 1002350 }, { "epoch": 9.86, "grad_norm": 0.8111417889595032, "learning_rate": 7.50245762459412e-08, "loss": 0.0242, "step": 1002375 }, { "epoch": 9.86, "grad_norm": 0.2667849063873291, "learning_rate": 7.490045379169274e-08, "loss": 0.0132, "step": 1002400 }, { "epoch": 9.86, "grad_norm": 0.022797774523496628, "learning_rate": 7.477633133744427e-08, "loss": 0.0449, "step": 1002425 }, { "epoch": 9.86, "grad_norm": 1.0178526639938354, "learning_rate": 7.465220888319581e-08, "loss": 0.0181, "step": 1002450 }, { "epoch": 9.86, "grad_norm": 0.6468080878257751, "learning_rate": 7.452808642894734e-08, "loss": 0.0219, "step": 1002475 }, { "epoch": 9.86, "grad_norm": 15.76169490814209, "learning_rate": 7.440396397469889e-08, "loss": 0.022, "step": 1002500 }, { "epoch": 9.86, "grad_norm": 8.169060707092285, "learning_rate": 7.427984152045043e-08, "loss": 0.0274, "step": 1002525 }, { "epoch": 9.86, "grad_norm": 0.4726950228214264, "learning_rate": 7.415571906620196e-08, "loss": 0.0067, "step": 1002550 }, { "epoch": 9.86, "grad_norm": 0.06192118301987648, "learning_rate": 7.40315966119535e-08, "loss": 0.0504, "step": 1002575 }, { "epoch": 9.86, "grad_norm": 6.80643892288208, "learning_rate": 7.390747415770503e-08, "loss": 0.006, "step": 1002600 }, { "epoch": 9.86, "grad_norm": 0.019970569759607315, "learning_rate": 7.378335170345657e-08, "loss": 0.027, "step": 1002625 }, { "epoch": 9.86, "grad_norm": 0.19078749418258667, "learning_rate": 7.36592292492081e-08, "loss": 0.008, "step": 1002650 }, { "epoch": 9.86, "grad_norm": 0.0339219756424427, "learning_rate": 7.353510679495964e-08, "loss": 0.0317, "step": 1002675 }, { "epoch": 9.86, "grad_norm": 3.478757619857788, "learning_rate": 7.341098434071118e-08, "loss": 0.0109, "step": 1002700 }, { "epoch": 9.86, "grad_norm": 0.005849581211805344, "learning_rate": 7.328686188646271e-08, "loss": 0.0196, "step": 1002725 }, { "epoch": 9.86, "grad_norm": 3.0686166286468506, "learning_rate": 7.316273943221426e-08, "loss": 0.0258, "step": 1002750 }, { "epoch": 9.86, "grad_norm": 0.03727482631802559, "learning_rate": 7.303861697796578e-08, "loss": 0.0484, "step": 1002775 }, { "epoch": 9.86, "grad_norm": 3.3068859577178955, "learning_rate": 7.291449452371733e-08, "loss": 0.0146, "step": 1002800 }, { "epoch": 9.86, "grad_norm": 0.035357825458049774, "learning_rate": 7.279037206946886e-08, "loss": 0.0428, "step": 1002825 }, { "epoch": 9.86, "grad_norm": 5.772192001342773, "learning_rate": 7.26662496152204e-08, "loss": 0.004, "step": 1002850 }, { "epoch": 9.86, "grad_norm": 0.20034287869930267, "learning_rate": 7.254212716097194e-08, "loss": 0.0363, "step": 1002875 }, { "epoch": 9.86, "grad_norm": 5.30105447769165, "learning_rate": 7.241800470672347e-08, "loss": 0.0043, "step": 1002900 }, { "epoch": 9.86, "grad_norm": 0.2931923270225525, "learning_rate": 7.229388225247501e-08, "loss": 0.0711, "step": 1002925 }, { "epoch": 9.86, "grad_norm": 0.03217712417244911, "learning_rate": 7.216975979822654e-08, "loss": 0.0096, "step": 1002950 }, { "epoch": 9.86, "grad_norm": 0.052864920347929, "learning_rate": 7.204563734397807e-08, "loss": 0.0262, "step": 1002975 }, { "epoch": 9.86, "grad_norm": 16.44753074645996, "learning_rate": 7.192151488972961e-08, "loss": 0.0148, "step": 1003000 }, { "epoch": 9.86, "grad_norm": 0.12614385783672333, "learning_rate": 7.179739243548115e-08, "loss": 0.0345, "step": 1003025 }, { "epoch": 9.86, "grad_norm": 1.3364237546920776, "learning_rate": 7.16732699812327e-08, "loss": 0.0105, "step": 1003050 }, { "epoch": 9.86, "grad_norm": 1.162775993347168, "learning_rate": 7.154914752698423e-08, "loss": 0.0309, "step": 1003075 }, { "epoch": 9.86, "grad_norm": 0.541702151298523, "learning_rate": 7.142502507273577e-08, "loss": 0.0149, "step": 1003100 }, { "epoch": 9.86, "grad_norm": 3.8633978366851807, "learning_rate": 7.13009026184873e-08, "loss": 0.0407, "step": 1003125 }, { "epoch": 9.86, "grad_norm": 9.869451522827148, "learning_rate": 7.117678016423883e-08, "loss": 0.0135, "step": 1003150 }, { "epoch": 9.86, "grad_norm": 0.02228553406894207, "learning_rate": 7.105265770999037e-08, "loss": 0.0279, "step": 1003175 }, { "epoch": 9.86, "grad_norm": 0.3839472234249115, "learning_rate": 7.092853525574191e-08, "loss": 0.0112, "step": 1003200 }, { "epoch": 9.86, "grad_norm": 0.1342376470565796, "learning_rate": 7.080441280149345e-08, "loss": 0.0278, "step": 1003225 }, { "epoch": 9.86, "grad_norm": 9.346843719482422, "learning_rate": 7.068029034724498e-08, "loss": 0.0125, "step": 1003250 }, { "epoch": 9.86, "grad_norm": 2.143306255340576, "learning_rate": 7.055616789299653e-08, "loss": 0.0205, "step": 1003275 }, { "epoch": 9.86, "grad_norm": 4.656022071838379, "learning_rate": 7.043204543874805e-08, "loss": 0.0122, "step": 1003300 }, { "epoch": 9.86, "grad_norm": 1.6003587245941162, "learning_rate": 7.03079229844996e-08, "loss": 0.0286, "step": 1003325 }, { "epoch": 9.87, "grad_norm": 1.643036961555481, "learning_rate": 7.018380053025113e-08, "loss": 0.0131, "step": 1003350 }, { "epoch": 9.87, "grad_norm": 0.9171988368034363, "learning_rate": 7.005967807600267e-08, "loss": 0.0315, "step": 1003375 }, { "epoch": 9.87, "grad_norm": 0.8756186962127686, "learning_rate": 6.993555562175421e-08, "loss": 0.0262, "step": 1003400 }, { "epoch": 9.87, "grad_norm": 0.013051740825176239, "learning_rate": 6.981143316750574e-08, "loss": 0.032, "step": 1003425 }, { "epoch": 9.87, "grad_norm": 1.1919277906417847, "learning_rate": 6.968731071325728e-08, "loss": 0.0184, "step": 1003450 }, { "epoch": 9.87, "grad_norm": 0.0894041582942009, "learning_rate": 6.956318825900881e-08, "loss": 0.0475, "step": 1003475 }, { "epoch": 9.87, "grad_norm": 7.912672996520996, "learning_rate": 6.943906580476035e-08, "loss": 0.016, "step": 1003500 }, { "epoch": 9.87, "grad_norm": 0.0482606440782547, "learning_rate": 6.931494335051188e-08, "loss": 0.0293, "step": 1003525 }, { "epoch": 9.87, "grad_norm": 1.4286686182022095, "learning_rate": 6.919082089626342e-08, "loss": 0.0144, "step": 1003550 }, { "epoch": 9.87, "grad_norm": 0.047398898750543594, "learning_rate": 6.906669844201495e-08, "loss": 0.0307, "step": 1003575 }, { "epoch": 9.87, "grad_norm": 0.07273352891206741, "learning_rate": 6.89425759877665e-08, "loss": 0.0179, "step": 1003600 }, { "epoch": 9.87, "grad_norm": 0.08914012461900711, "learning_rate": 6.881845353351804e-08, "loss": 0.0295, "step": 1003625 }, { "epoch": 9.87, "grad_norm": 4.971688747406006, "learning_rate": 6.869433107926957e-08, "loss": 0.0159, "step": 1003650 }, { "epoch": 9.87, "grad_norm": 0.06299931555986404, "learning_rate": 6.857020862502111e-08, "loss": 0.0313, "step": 1003675 }, { "epoch": 9.87, "grad_norm": 20.35603141784668, "learning_rate": 6.844608617077264e-08, "loss": 0.0203, "step": 1003700 }, { "epoch": 9.87, "grad_norm": 6.36271333694458, "learning_rate": 6.832196371652418e-08, "loss": 0.0464, "step": 1003725 }, { "epoch": 9.87, "grad_norm": 0.032358214259147644, "learning_rate": 6.819784126227571e-08, "loss": 0.0178, "step": 1003750 }, { "epoch": 9.87, "grad_norm": 0.9187770485877991, "learning_rate": 6.807371880802725e-08, "loss": 0.0306, "step": 1003775 }, { "epoch": 9.87, "grad_norm": 3.741173028945923, "learning_rate": 6.79495963537788e-08, "loss": 0.0089, "step": 1003800 }, { "epoch": 9.87, "grad_norm": 0.8672508597373962, "learning_rate": 6.782547389953032e-08, "loss": 0.0423, "step": 1003825 }, { "epoch": 9.87, "grad_norm": 6.405612945556641, "learning_rate": 6.770135144528187e-08, "loss": 0.0075, "step": 1003850 }, { "epoch": 9.87, "grad_norm": 1.3490747213363647, "learning_rate": 6.75772289910334e-08, "loss": 0.0294, "step": 1003875 }, { "epoch": 9.87, "grad_norm": 5.772753715515137, "learning_rate": 6.745310653678494e-08, "loss": 0.0115, "step": 1003900 }, { "epoch": 9.87, "grad_norm": 0.0644412636756897, "learning_rate": 6.732898408253647e-08, "loss": 0.0306, "step": 1003925 }, { "epoch": 9.87, "grad_norm": 4.705480098724365, "learning_rate": 6.720486162828801e-08, "loss": 0.0093, "step": 1003950 }, { "epoch": 9.87, "grad_norm": 0.24626608192920685, "learning_rate": 6.708570407220949e-08, "loss": 0.0389, "step": 1003975 }, { "epoch": 9.87, "grad_norm": 0.7907329201698303, "learning_rate": 6.696158161796102e-08, "loss": 0.0131, "step": 1004000 }, { "epoch": 9.87, "grad_norm": 0.006043042056262493, "learning_rate": 6.683745916371256e-08, "loss": 0.0213, "step": 1004025 }, { "epoch": 9.87, "grad_norm": 11.92526912689209, "learning_rate": 6.67133367094641e-08, "loss": 0.0206, "step": 1004050 }, { "epoch": 9.87, "grad_norm": 0.04104077070951462, "learning_rate": 6.658921425521563e-08, "loss": 0.0393, "step": 1004075 }, { "epoch": 9.87, "grad_norm": 14.440686225891113, "learning_rate": 6.646509180096717e-08, "loss": 0.0213, "step": 1004100 }, { "epoch": 9.87, "grad_norm": 15.169114112854004, "learning_rate": 6.63409693467187e-08, "loss": 0.046, "step": 1004125 }, { "epoch": 9.87, "grad_norm": 0.18046969175338745, "learning_rate": 6.621684689247024e-08, "loss": 0.0232, "step": 1004150 }, { "epoch": 9.87, "grad_norm": 0.03618866205215454, "learning_rate": 6.609272443822177e-08, "loss": 0.0414, "step": 1004175 }, { "epoch": 9.87, "grad_norm": 9.046224594116211, "learning_rate": 6.596860198397331e-08, "loss": 0.0194, "step": 1004200 }, { "epoch": 9.87, "grad_norm": 0.5857313871383667, "learning_rate": 6.584447952972484e-08, "loss": 0.039, "step": 1004225 }, { "epoch": 9.87, "grad_norm": 11.111472129821777, "learning_rate": 6.572035707547639e-08, "loss": 0.0191, "step": 1004250 }, { "epoch": 9.87, "grad_norm": 1.331172227859497, "learning_rate": 6.559623462122793e-08, "loss": 0.0505, "step": 1004275 }, { "epoch": 9.87, "grad_norm": 1.5270313024520874, "learning_rate": 6.547211216697946e-08, "loss": 0.0139, "step": 1004300 }, { "epoch": 9.87, "grad_norm": 3.1847782135009766, "learning_rate": 6.5347989712731e-08, "loss": 0.0365, "step": 1004325 }, { "epoch": 9.87, "grad_norm": 0.45876604318618774, "learning_rate": 6.522386725848253e-08, "loss": 0.0168, "step": 1004350 }, { "epoch": 9.88, "grad_norm": 1.9821065664291382, "learning_rate": 6.509974480423407e-08, "loss": 0.0303, "step": 1004375 }, { "epoch": 9.88, "grad_norm": 0.4853232502937317, "learning_rate": 6.49756223499856e-08, "loss": 0.0257, "step": 1004400 }, { "epoch": 9.88, "grad_norm": 0.5186339616775513, "learning_rate": 6.485149989573714e-08, "loss": 0.0512, "step": 1004425 }, { "epoch": 9.88, "grad_norm": 0.6460503935813904, "learning_rate": 6.472737744148869e-08, "loss": 0.0132, "step": 1004450 }, { "epoch": 9.88, "grad_norm": 0.0836685448884964, "learning_rate": 6.460325498724021e-08, "loss": 0.0281, "step": 1004475 }, { "epoch": 9.88, "grad_norm": 7.954201698303223, "learning_rate": 6.447913253299176e-08, "loss": 0.0176, "step": 1004500 }, { "epoch": 9.88, "grad_norm": 10.2832670211792, "learning_rate": 6.435501007874329e-08, "loss": 0.0379, "step": 1004525 }, { "epoch": 9.88, "grad_norm": 0.9271697998046875, "learning_rate": 6.423088762449483e-08, "loss": 0.016, "step": 1004550 }, { "epoch": 9.88, "grad_norm": 5.26526403427124, "learning_rate": 6.410676517024636e-08, "loss": 0.0348, "step": 1004575 }, { "epoch": 9.88, "grad_norm": 12.662057876586914, "learning_rate": 6.39826427159979e-08, "loss": 0.0201, "step": 1004600 }, { "epoch": 9.88, "grad_norm": 0.1043114960193634, "learning_rate": 6.385852026174944e-08, "loss": 0.028, "step": 1004625 }, { "epoch": 9.88, "grad_norm": 8.514598846435547, "learning_rate": 6.373439780750097e-08, "loss": 0.0195, "step": 1004650 }, { "epoch": 9.88, "grad_norm": 0.007843896746635437, "learning_rate": 6.361027535325251e-08, "loss": 0.0423, "step": 1004675 }, { "epoch": 9.88, "grad_norm": 2.8346569538116455, "learning_rate": 6.348615289900404e-08, "loss": 0.0158, "step": 1004700 }, { "epoch": 9.88, "grad_norm": 0.10771257430315018, "learning_rate": 6.336203044475558e-08, "loss": 0.0373, "step": 1004725 }, { "epoch": 9.88, "grad_norm": 1.5923149585723877, "learning_rate": 6.323790799050711e-08, "loss": 0.0198, "step": 1004750 }, { "epoch": 9.88, "grad_norm": 0.03038017451763153, "learning_rate": 6.311378553625866e-08, "loss": 0.0491, "step": 1004775 }, { "epoch": 9.88, "grad_norm": 6.0298027992248535, "learning_rate": 6.29896630820102e-08, "loss": 0.0163, "step": 1004800 }, { "epoch": 9.88, "grad_norm": 5.408221244812012, "learning_rate": 6.286554062776173e-08, "loss": 0.0309, "step": 1004825 }, { "epoch": 9.88, "grad_norm": 5.0931715965271, "learning_rate": 6.274141817351327e-08, "loss": 0.0212, "step": 1004850 }, { "epoch": 9.88, "grad_norm": 1.104891061782837, "learning_rate": 6.26172957192648e-08, "loss": 0.04, "step": 1004875 }, { "epoch": 9.88, "grad_norm": 1.497687578201294, "learning_rate": 6.249317326501634e-08, "loss": 0.0086, "step": 1004900 }, { "epoch": 9.88, "grad_norm": 0.770332932472229, "learning_rate": 6.236905081076787e-08, "loss": 0.0424, "step": 1004925 }, { "epoch": 9.88, "grad_norm": 2.4369044303894043, "learning_rate": 6.224492835651941e-08, "loss": 0.013, "step": 1004950 }, { "epoch": 9.88, "grad_norm": 0.03691118583083153, "learning_rate": 6.212080590227096e-08, "loss": 0.0389, "step": 1004975 }, { "epoch": 9.88, "grad_norm": 6.782358169555664, "learning_rate": 6.199668344802248e-08, "loss": 0.0191, "step": 1005000 }, { "epoch": 9.88, "grad_norm": 0.00883459858596325, "learning_rate": 6.187256099377403e-08, "loss": 0.0401, "step": 1005025 }, { "epoch": 9.88, "grad_norm": 10.972853660583496, "learning_rate": 6.174843853952556e-08, "loss": 0.0138, "step": 1005050 }, { "epoch": 9.88, "grad_norm": 0.02959812432527542, "learning_rate": 6.16243160852771e-08, "loss": 0.0481, "step": 1005075 }, { "epoch": 9.88, "grad_norm": 9.130831718444824, "learning_rate": 6.150019363102863e-08, "loss": 0.0192, "step": 1005100 }, { "epoch": 9.88, "grad_norm": 15.107514381408691, "learning_rate": 6.137607117678017e-08, "loss": 0.0249, "step": 1005125 }, { "epoch": 9.88, "grad_norm": 10.462825775146484, "learning_rate": 6.125194872253171e-08, "loss": 0.0173, "step": 1005150 }, { "epoch": 9.88, "grad_norm": 0.1484570950269699, "learning_rate": 6.112782626828324e-08, "loss": 0.0402, "step": 1005175 }, { "epoch": 9.88, "grad_norm": 7.092962265014648, "learning_rate": 6.100370381403478e-08, "loss": 0.026, "step": 1005200 }, { "epoch": 9.88, "grad_norm": 0.006696423981338739, "learning_rate": 6.087958135978631e-08, "loss": 0.0335, "step": 1005225 }, { "epoch": 9.88, "grad_norm": 1.0288230180740356, "learning_rate": 6.075545890553785e-08, "loss": 0.0265, "step": 1005250 }, { "epoch": 9.88, "grad_norm": 0.0072068944573402405, "learning_rate": 6.063133645128938e-08, "loss": 0.0475, "step": 1005275 }, { "epoch": 9.88, "grad_norm": 0.042883049696683884, "learning_rate": 6.050721399704093e-08, "loss": 0.0114, "step": 1005300 }, { "epoch": 9.88, "grad_norm": 0.4326420724391937, "learning_rate": 6.038309154279247e-08, "loss": 0.0338, "step": 1005325 }, { "epoch": 9.88, "grad_norm": 2.3644697666168213, "learning_rate": 6.0258969088544e-08, "loss": 0.0094, "step": 1005350 }, { "epoch": 9.89, "grad_norm": 1.9260295629501343, "learning_rate": 6.013484663429554e-08, "loss": 0.0275, "step": 1005375 }, { "epoch": 9.89, "grad_norm": 24.795412063598633, "learning_rate": 6.001072418004707e-08, "loss": 0.0188, "step": 1005400 }, { "epoch": 9.89, "grad_norm": 10.1270751953125, "learning_rate": 5.988660172579861e-08, "loss": 0.0344, "step": 1005425 }, { "epoch": 9.89, "grad_norm": 0.606172502040863, "learning_rate": 5.976247927155014e-08, "loss": 0.0082, "step": 1005450 }, { "epoch": 9.89, "grad_norm": 0.23122668266296387, "learning_rate": 5.963835681730168e-08, "loss": 0.0413, "step": 1005475 }, { "epoch": 9.89, "grad_norm": 2.530813217163086, "learning_rate": 5.9514234363053225e-08, "loss": 0.0229, "step": 1005500 }, { "epoch": 9.89, "grad_norm": 0.43033289909362793, "learning_rate": 5.9390111908804754e-08, "loss": 0.0286, "step": 1005525 }, { "epoch": 9.89, "grad_norm": 0.6431351900100708, "learning_rate": 5.926598945455629e-08, "loss": 0.0134, "step": 1005550 }, { "epoch": 9.89, "grad_norm": 1.7688093185424805, "learning_rate": 5.914186700030783e-08, "loss": 0.0437, "step": 1005575 }, { "epoch": 9.89, "grad_norm": 2.175938844680786, "learning_rate": 5.901774454605936e-08, "loss": 0.0078, "step": 1005600 }, { "epoch": 9.89, "grad_norm": 2.1056950092315674, "learning_rate": 5.88936220918109e-08, "loss": 0.0324, "step": 1005625 }, { "epoch": 9.89, "grad_norm": 11.225205421447754, "learning_rate": 5.876949963756244e-08, "loss": 0.0183, "step": 1005650 }, { "epoch": 9.89, "grad_norm": 0.026853354647755623, "learning_rate": 5.864537718331398e-08, "loss": 0.0286, "step": 1005675 }, { "epoch": 9.89, "grad_norm": 9.307965278625488, "learning_rate": 5.852125472906551e-08, "loss": 0.0199, "step": 1005700 }, { "epoch": 9.89, "grad_norm": 0.019644256681203842, "learning_rate": 5.8397132274817046e-08, "loss": 0.0262, "step": 1005725 }, { "epoch": 9.89, "grad_norm": 5.690699577331543, "learning_rate": 5.827300982056859e-08, "loss": 0.0205, "step": 1005750 }, { "epoch": 9.89, "grad_norm": 1.8496534824371338, "learning_rate": 5.814888736632012e-08, "loss": 0.0412, "step": 1005775 }, { "epoch": 9.89, "grad_norm": 5.029815673828125, "learning_rate": 5.802476491207166e-08, "loss": 0.0142, "step": 1005800 }, { "epoch": 9.89, "grad_norm": 2.0111708641052246, "learning_rate": 5.7900642457823196e-08, "loss": 0.0562, "step": 1005825 }, { "epoch": 9.89, "grad_norm": 1.859792947769165, "learning_rate": 5.777652000357474e-08, "loss": 0.0082, "step": 1005850 }, { "epoch": 9.89, "grad_norm": 0.003684831317514181, "learning_rate": 5.765239754932627e-08, "loss": 0.0349, "step": 1005875 }, { "epoch": 9.89, "grad_norm": 2.9826459884643555, "learning_rate": 5.75282750950778e-08, "loss": 0.0142, "step": 1005900 }, { "epoch": 9.89, "grad_norm": 0.005048808641731739, "learning_rate": 5.7404152640829345e-08, "loss": 0.0256, "step": 1005925 }, { "epoch": 9.89, "grad_norm": 0.4504150152206421, "learning_rate": 5.7280030186580874e-08, "loss": 0.0113, "step": 1005950 }, { "epoch": 9.89, "grad_norm": 0.013043714687228203, "learning_rate": 5.7155907732332417e-08, "loss": 0.0249, "step": 1005975 }, { "epoch": 9.89, "grad_norm": 6.814070701599121, "learning_rate": 5.703178527808395e-08, "loss": 0.0138, "step": 1006000 }, { "epoch": 9.89, "grad_norm": 1.5010508298873901, "learning_rate": 5.6907662823835495e-08, "loss": 0.0396, "step": 1006025 }, { "epoch": 9.89, "grad_norm": 4.65989351272583, "learning_rate": 5.6783540369587024e-08, "loss": 0.0119, "step": 1006050 }, { "epoch": 9.89, "grad_norm": 0.05430697649717331, "learning_rate": 5.665941791533856e-08, "loss": 0.0165, "step": 1006075 }, { "epoch": 9.89, "grad_norm": 0.9002828001976013, "learning_rate": 5.65352954610901e-08, "loss": 0.0136, "step": 1006100 }, { "epoch": 9.89, "grad_norm": 0.012403491884469986, "learning_rate": 5.641117300684163e-08, "loss": 0.0304, "step": 1006125 }, { "epoch": 9.89, "grad_norm": 8.41302490234375, "learning_rate": 5.628705055259317e-08, "loss": 0.0084, "step": 1006150 }, { "epoch": 9.89, "grad_norm": 0.03206849843263626, "learning_rate": 5.616292809834471e-08, "loss": 0.043, "step": 1006175 }, { "epoch": 9.89, "grad_norm": 5.554184436798096, "learning_rate": 5.603880564409625e-08, "loss": 0.0081, "step": 1006200 }, { "epoch": 9.89, "grad_norm": 1.0697612762451172, "learning_rate": 5.591468318984778e-08, "loss": 0.0254, "step": 1006225 }, { "epoch": 9.89, "grad_norm": 3.239494562149048, "learning_rate": 5.5790560735599316e-08, "loss": 0.0079, "step": 1006250 }, { "epoch": 9.89, "grad_norm": 0.9099822044372559, "learning_rate": 5.566643828135086e-08, "loss": 0.0508, "step": 1006275 }, { "epoch": 9.89, "grad_norm": 0.21262647211551666, "learning_rate": 5.554231582710239e-08, "loss": 0.0118, "step": 1006300 }, { "epoch": 9.89, "grad_norm": 0.03338605538010597, "learning_rate": 5.541819337285393e-08, "loss": 0.0321, "step": 1006325 }, { "epoch": 9.89, "grad_norm": 0.03175881877541542, "learning_rate": 5.5294070918605466e-08, "loss": 0.0161, "step": 1006350 }, { "epoch": 9.89, "grad_norm": 0.01136533822864294, "learning_rate": 5.5169948464356995e-08, "loss": 0.0225, "step": 1006375 }, { "epoch": 9.9, "grad_norm": 9.810725212097168, "learning_rate": 5.504582601010854e-08, "loss": 0.0251, "step": 1006400 }, { "epoch": 9.9, "grad_norm": 0.15047451853752136, "learning_rate": 5.492170355586007e-08, "loss": 0.0527, "step": 1006425 }, { "epoch": 9.9, "grad_norm": 0.405446320772171, "learning_rate": 5.4797581101611615e-08, "loss": 0.0101, "step": 1006450 }, { "epoch": 9.9, "grad_norm": 7.506474018096924, "learning_rate": 5.4673458647363144e-08, "loss": 0.0514, "step": 1006475 }, { "epoch": 9.9, "grad_norm": 9.674633026123047, "learning_rate": 5.4549336193114686e-08, "loss": 0.0122, "step": 1006500 }, { "epoch": 9.9, "grad_norm": 1.569421648979187, "learning_rate": 5.443017863703616e-08, "loss": 0.024, "step": 1006525 }, { "epoch": 9.9, "grad_norm": 2.2434802055358887, "learning_rate": 5.430605618278769e-08, "loss": 0.0189, "step": 1006550 }, { "epoch": 9.9, "grad_norm": 0.08180359750986099, "learning_rate": 5.4181933728539235e-08, "loss": 0.0337, "step": 1006575 }, { "epoch": 9.9, "grad_norm": 0.12858501076698303, "learning_rate": 5.4057811274290764e-08, "loss": 0.0201, "step": 1006600 }, { "epoch": 9.9, "grad_norm": 0.0647752434015274, "learning_rate": 5.393368882004231e-08, "loss": 0.018, "step": 1006625 }, { "epoch": 9.9, "grad_norm": 6.4422736167907715, "learning_rate": 5.380956636579384e-08, "loss": 0.0236, "step": 1006650 }, { "epoch": 9.9, "grad_norm": 0.014233430847525597, "learning_rate": 5.3685443911545385e-08, "loss": 0.0379, "step": 1006675 }, { "epoch": 9.9, "grad_norm": 1.2806943655014038, "learning_rate": 5.3561321457296914e-08, "loss": 0.0096, "step": 1006700 }, { "epoch": 9.9, "grad_norm": 0.27761387825012207, "learning_rate": 5.343719900304845e-08, "loss": 0.0375, "step": 1006725 }, { "epoch": 9.9, "grad_norm": 10.330525398254395, "learning_rate": 5.331307654879999e-08, "loss": 0.0165, "step": 1006750 }, { "epoch": 9.9, "grad_norm": 0.11992677301168442, "learning_rate": 5.318895409455152e-08, "loss": 0.0314, "step": 1006775 }, { "epoch": 9.9, "grad_norm": 0.3976992964744568, "learning_rate": 5.3064831640303063e-08, "loss": 0.0133, "step": 1006800 }, { "epoch": 9.9, "grad_norm": 0.17596368491649628, "learning_rate": 5.29407091860546e-08, "loss": 0.0163, "step": 1006825 }, { "epoch": 9.9, "grad_norm": 3.0609233379364014, "learning_rate": 5.281658673180613e-08, "loss": 0.0115, "step": 1006850 }, { "epoch": 9.9, "grad_norm": 1.4202566146850586, "learning_rate": 5.269246427755767e-08, "loss": 0.0424, "step": 1006875 }, { "epoch": 9.9, "grad_norm": 5.539343357086182, "learning_rate": 5.2568341823309206e-08, "loss": 0.0099, "step": 1006900 }, { "epoch": 9.9, "grad_norm": 0.07714793086051941, "learning_rate": 5.244421936906075e-08, "loss": 0.0402, "step": 1006925 }, { "epoch": 9.9, "grad_norm": 0.9567859768867493, "learning_rate": 5.232009691481228e-08, "loss": 0.0174, "step": 1006950 }, { "epoch": 9.9, "grad_norm": 1.6077927350997925, "learning_rate": 5.219597446056382e-08, "loss": 0.0233, "step": 1006975 }, { "epoch": 9.9, "grad_norm": 7.985696792602539, "learning_rate": 5.2071852006315356e-08, "loss": 0.0139, "step": 1007000 }, { "epoch": 9.9, "grad_norm": 0.015441009774804115, "learning_rate": 5.1947729552066885e-08, "loss": 0.0602, "step": 1007025 }, { "epoch": 9.9, "grad_norm": 0.15139231085777283, "learning_rate": 5.182360709781843e-08, "loss": 0.0169, "step": 1007050 }, { "epoch": 9.9, "grad_norm": 0.043569017201662064, "learning_rate": 5.169948464356996e-08, "loss": 0.0397, "step": 1007075 }, { "epoch": 9.9, "grad_norm": 6.44288444519043, "learning_rate": 5.1575362189321505e-08, "loss": 0.0199, "step": 1007100 }, { "epoch": 9.9, "grad_norm": 0.025946330279111862, "learning_rate": 5.1451239735073034e-08, "loss": 0.0296, "step": 1007125 }, { "epoch": 9.9, "grad_norm": 7.871509552001953, "learning_rate": 5.1327117280824577e-08, "loss": 0.0257, "step": 1007150 }, { "epoch": 9.9, "grad_norm": 0.007232289761304855, "learning_rate": 5.120299482657611e-08, "loss": 0.0449, "step": 1007175 }, { "epoch": 9.9, "grad_norm": 3.1107287406921387, "learning_rate": 5.107887237232764e-08, "loss": 0.015, "step": 1007200 }, { "epoch": 9.9, "grad_norm": 0.2577848732471466, "learning_rate": 5.0954749918079184e-08, "loss": 0.0299, "step": 1007225 }, { "epoch": 9.9, "grad_norm": 2.702254056930542, "learning_rate": 5.083062746383072e-08, "loss": 0.0095, "step": 1007250 }, { "epoch": 9.9, "grad_norm": 3.0413291454315186, "learning_rate": 5.070650500958226e-08, "loss": 0.0275, "step": 1007275 }, { "epoch": 9.9, "grad_norm": 20.08562469482422, "learning_rate": 5.058238255533379e-08, "loss": 0.0158, "step": 1007300 }, { "epoch": 9.9, "grad_norm": 0.10123690962791443, "learning_rate": 5.045826010108533e-08, "loss": 0.0441, "step": 1007325 }, { "epoch": 9.9, "grad_norm": 10.364115715026855, "learning_rate": 5.033413764683687e-08, "loss": 0.0137, "step": 1007350 }, { "epoch": 9.9, "grad_norm": 0.10341502726078033, "learning_rate": 5.02100151925884e-08, "loss": 0.0357, "step": 1007375 }, { "epoch": 9.9, "grad_norm": 7.128016948699951, "learning_rate": 5.008589273833994e-08, "loss": 0.0192, "step": 1007400 }, { "epoch": 9.91, "grad_norm": 0.012147532775998116, "learning_rate": 4.9961770284091476e-08, "loss": 0.0378, "step": 1007425 }, { "epoch": 9.91, "grad_norm": 0.2922407388687134, "learning_rate": 4.983764782984301e-08, "loss": 0.0127, "step": 1007450 }, { "epoch": 9.91, "grad_norm": 1.0289705991744995, "learning_rate": 4.971352537559455e-08, "loss": 0.0272, "step": 1007475 }, { "epoch": 9.91, "grad_norm": 3.5122790336608887, "learning_rate": 4.958940292134609e-08, "loss": 0.0161, "step": 1007500 }, { "epoch": 9.91, "grad_norm": 0.03469089791178703, "learning_rate": 4.9465280467097626e-08, "loss": 0.0192, "step": 1007525 }, { "epoch": 9.91, "grad_norm": 0.10195744037628174, "learning_rate": 4.9341158012849155e-08, "loss": 0.0135, "step": 1007550 }, { "epoch": 9.91, "grad_norm": 1.537645697593689, "learning_rate": 4.92170355586007e-08, "loss": 0.0327, "step": 1007575 }, { "epoch": 9.91, "grad_norm": 0.9152272343635559, "learning_rate": 4.909291310435223e-08, "loss": 0.0222, "step": 1007600 }, { "epoch": 9.91, "grad_norm": 0.9767880439758301, "learning_rate": 4.896879065010377e-08, "loss": 0.036, "step": 1007625 }, { "epoch": 9.91, "grad_norm": 3.5797252655029297, "learning_rate": 4.8844668195855304e-08, "loss": 0.0074, "step": 1007650 }, { "epoch": 9.91, "grad_norm": 0.8786035180091858, "learning_rate": 4.8720545741606846e-08, "loss": 0.0177, "step": 1007675 }, { "epoch": 9.91, "grad_norm": 8.81932258605957, "learning_rate": 4.859642328735838e-08, "loss": 0.0146, "step": 1007700 }, { "epoch": 9.91, "grad_norm": 0.9906705021858215, "learning_rate": 4.847230083310991e-08, "loss": 0.0372, "step": 1007725 }, { "epoch": 9.91, "grad_norm": 16.1709041595459, "learning_rate": 4.8348178378861454e-08, "loss": 0.0247, "step": 1007750 }, { "epoch": 9.91, "grad_norm": 1.0610554218292236, "learning_rate": 4.8224055924612996e-08, "loss": 0.0303, "step": 1007775 }, { "epoch": 9.91, "grad_norm": 0.022911353036761284, "learning_rate": 4.8099933470364525e-08, "loss": 0.0091, "step": 1007800 }, { "epoch": 9.91, "grad_norm": 0.03158850967884064, "learning_rate": 4.797581101611606e-08, "loss": 0.0176, "step": 1007825 }, { "epoch": 9.91, "grad_norm": 4.87709379196167, "learning_rate": 4.78516885618676e-08, "loss": 0.0069, "step": 1007850 }, { "epoch": 9.91, "grad_norm": 2.0271921157836914, "learning_rate": 4.772756610761914e-08, "loss": 0.0259, "step": 1007875 }, { "epoch": 9.91, "grad_norm": 0.15766777098178864, "learning_rate": 4.760344365337067e-08, "loss": 0.0233, "step": 1007900 }, { "epoch": 9.91, "grad_norm": 0.0339931920170784, "learning_rate": 4.747932119912221e-08, "loss": 0.0463, "step": 1007925 }, { "epoch": 9.91, "grad_norm": 11.550920486450195, "learning_rate": 4.735519874487375e-08, "loss": 0.0106, "step": 1007950 }, { "epoch": 9.91, "grad_norm": 8.860559463500977, "learning_rate": 4.723107629062528e-08, "loss": 0.0611, "step": 1007975 }, { "epoch": 9.91, "grad_norm": 1.2565547227859497, "learning_rate": 4.710695383637682e-08, "loss": 0.0111, "step": 1008000 }, { "epoch": 9.91, "grad_norm": 0.366330087184906, "learning_rate": 4.698283138212836e-08, "loss": 0.025, "step": 1008025 }, { "epoch": 9.91, "grad_norm": 12.519240379333496, "learning_rate": 4.6858708927879895e-08, "loss": 0.0243, "step": 1008050 }, { "epoch": 9.91, "grad_norm": 0.5429565906524658, "learning_rate": 4.6734586473631424e-08, "loss": 0.0347, "step": 1008075 }, { "epoch": 9.91, "grad_norm": 1.544432282447815, "learning_rate": 4.661046401938297e-08, "loss": 0.0144, "step": 1008100 }, { "epoch": 9.91, "grad_norm": 0.021646393463015556, "learning_rate": 4.648634156513451e-08, "loss": 0.0441, "step": 1008125 }, { "epoch": 9.91, "grad_norm": 15.873066902160645, "learning_rate": 4.636221911088604e-08, "loss": 0.0132, "step": 1008150 }, { "epoch": 9.91, "grad_norm": 0.0801047533750534, "learning_rate": 4.6238096656637574e-08, "loss": 0.0347, "step": 1008175 }, { "epoch": 9.91, "grad_norm": 12.722325325012207, "learning_rate": 4.6113974202389116e-08, "loss": 0.014, "step": 1008200 }, { "epoch": 9.91, "grad_norm": 0.037818532437086105, "learning_rate": 4.5989851748140645e-08, "loss": 0.0207, "step": 1008225 }, { "epoch": 9.91, "grad_norm": 0.07718385756015778, "learning_rate": 4.586572929389219e-08, "loss": 0.0197, "step": 1008250 }, { "epoch": 9.91, "grad_norm": 4.240558624267578, "learning_rate": 4.5741606839643723e-08, "loss": 0.0353, "step": 1008275 }, { "epoch": 9.91, "grad_norm": 0.14241521060466766, "learning_rate": 4.5617484385395266e-08, "loss": 0.0161, "step": 1008300 }, { "epoch": 9.91, "grad_norm": 0.7928873300552368, "learning_rate": 4.5493361931146795e-08, "loss": 0.0307, "step": 1008325 }, { "epoch": 9.91, "grad_norm": 3.0553205013275146, "learning_rate": 4.536923947689833e-08, "loss": 0.021, "step": 1008350 }, { "epoch": 9.91, "grad_norm": 0.025819318369030952, "learning_rate": 4.524511702264987e-08, "loss": 0.0259, "step": 1008375 }, { "epoch": 9.91, "grad_norm": 2.05728816986084, "learning_rate": 4.51209945684014e-08, "loss": 0.0136, "step": 1008400 }, { "epoch": 9.92, "grad_norm": 2.2600977420806885, "learning_rate": 4.4996872114152944e-08, "loss": 0.0373, "step": 1008425 }, { "epoch": 9.92, "grad_norm": 2.348522663116455, "learning_rate": 4.487274965990448e-08, "loss": 0.0057, "step": 1008450 }, { "epoch": 9.92, "grad_norm": 0.201612189412117, "learning_rate": 4.474862720565602e-08, "loss": 0.033, "step": 1008475 }, { "epoch": 9.92, "grad_norm": 12.995442390441895, "learning_rate": 4.462450475140755e-08, "loss": 0.01, "step": 1008500 }, { "epoch": 9.92, "grad_norm": 0.009217886254191399, "learning_rate": 4.450038229715909e-08, "loss": 0.0302, "step": 1008525 }, { "epoch": 9.92, "grad_norm": 1.1528716087341309, "learning_rate": 4.437625984291063e-08, "loss": 0.0131, "step": 1008550 }, { "epoch": 9.92, "grad_norm": 2.1589694023132324, "learning_rate": 4.425213738866216e-08, "loss": 0.0235, "step": 1008575 }, { "epoch": 9.92, "grad_norm": 0.5102052688598633, "learning_rate": 4.41280149344137e-08, "loss": 0.0147, "step": 1008600 }, { "epoch": 9.92, "grad_norm": 0.027398407459259033, "learning_rate": 4.4003892480165237e-08, "loss": 0.0432, "step": 1008625 }, { "epoch": 9.92, "grad_norm": 0.587009847164154, "learning_rate": 4.387977002591678e-08, "loss": 0.0114, "step": 1008650 }, { "epoch": 9.92, "grad_norm": 0.007069509942084551, "learning_rate": 4.375564757166831e-08, "loss": 0.0387, "step": 1008675 }, { "epoch": 9.92, "grad_norm": 0.47431355714797974, "learning_rate": 4.3631525117419844e-08, "loss": 0.0122, "step": 1008700 }, { "epoch": 9.92, "grad_norm": 0.020760737359523773, "learning_rate": 4.3507402663171386e-08, "loss": 0.0253, "step": 1008725 }, { "epoch": 9.92, "grad_norm": 0.8406263589859009, "learning_rate": 4.3383280208922915e-08, "loss": 0.0084, "step": 1008750 }, { "epoch": 9.92, "grad_norm": 0.0700303390622139, "learning_rate": 4.325915775467446e-08, "loss": 0.035, "step": 1008775 }, { "epoch": 9.92, "grad_norm": 14.807487487792969, "learning_rate": 4.313503530042599e-08, "loss": 0.0087, "step": 1008800 }, { "epoch": 9.92, "grad_norm": 0.1553027182817459, "learning_rate": 4.3010912846177536e-08, "loss": 0.051, "step": 1008825 }, { "epoch": 9.92, "grad_norm": 0.8018779754638672, "learning_rate": 4.2886790391929065e-08, "loss": 0.0158, "step": 1008850 }, { "epoch": 9.92, "grad_norm": 0.034382548183202744, "learning_rate": 4.27626679376806e-08, "loss": 0.0188, "step": 1008875 }, { "epoch": 9.92, "grad_norm": 6.712969779968262, "learning_rate": 4.263854548343214e-08, "loss": 0.0165, "step": 1008900 }, { "epoch": 9.92, "grad_norm": 1.2405041456222534, "learning_rate": 4.251442302918367e-08, "loss": 0.0296, "step": 1008925 }, { "epoch": 9.92, "grad_norm": 1.4537739753723145, "learning_rate": 4.2390300574935214e-08, "loss": 0.0055, "step": 1008950 }, { "epoch": 9.92, "grad_norm": 1.403043508529663, "learning_rate": 4.226617812068675e-08, "loss": 0.0249, "step": 1008975 }, { "epoch": 9.92, "grad_norm": 6.80855131149292, "learning_rate": 4.214205566643828e-08, "loss": 0.0185, "step": 1009000 }, { "epoch": 9.92, "grad_norm": 2.456801652908325, "learning_rate": 4.201793321218982e-08, "loss": 0.0522, "step": 1009025 }, { "epoch": 9.92, "grad_norm": 0.8791725039482117, "learning_rate": 4.189381075794136e-08, "loss": 0.0317, "step": 1009050 }, { "epoch": 9.92, "grad_norm": 0.9743327498435974, "learning_rate": 4.17696883036929e-08, "loss": 0.0504, "step": 1009075 }, { "epoch": 9.92, "grad_norm": 6.532220840454102, "learning_rate": 4.164556584944443e-08, "loss": 0.0189, "step": 1009100 }, { "epoch": 9.92, "grad_norm": 0.07194996625185013, "learning_rate": 4.152144339519597e-08, "loss": 0.0197, "step": 1009125 }, { "epoch": 9.92, "grad_norm": 6.732234477996826, "learning_rate": 4.1397320940947506e-08, "loss": 0.0071, "step": 1009150 }, { "epoch": 9.92, "grad_norm": 1.9885387420654297, "learning_rate": 4.1273198486699035e-08, "loss": 0.0433, "step": 1009175 }, { "epoch": 9.92, "grad_norm": 0.41503655910491943, "learning_rate": 4.114907603245058e-08, "loss": 0.0136, "step": 1009200 }, { "epoch": 9.92, "grad_norm": 0.22982008755207062, "learning_rate": 4.1024953578202114e-08, "loss": 0.0448, "step": 1009225 }, { "epoch": 9.92, "grad_norm": 9.131084442138672, "learning_rate": 4.0900831123953656e-08, "loss": 0.0082, "step": 1009250 }, { "epoch": 9.92, "grad_norm": 0.6995533108711243, "learning_rate": 4.0776708669705185e-08, "loss": 0.0348, "step": 1009275 }, { "epoch": 9.92, "grad_norm": 10.599361419677734, "learning_rate": 4.065258621545673e-08, "loss": 0.0185, "step": 1009300 }, { "epoch": 9.92, "grad_norm": 0.02884158492088318, "learning_rate": 4.052846376120826e-08, "loss": 0.0452, "step": 1009325 }, { "epoch": 9.92, "grad_norm": 10.787198066711426, "learning_rate": 4.040434130695979e-08, "loss": 0.0121, "step": 1009350 }, { "epoch": 9.92, "grad_norm": 0.009433315135538578, "learning_rate": 4.0280218852711334e-08, "loss": 0.0246, "step": 1009375 }, { "epoch": 9.92, "grad_norm": 3.2241220474243164, "learning_rate": 4.015609639846287e-08, "loss": 0.0182, "step": 1009400 }, { "epoch": 9.92, "grad_norm": 8.609528541564941, "learning_rate": 4.003197394421441e-08, "loss": 0.0267, "step": 1009425 }, { "epoch": 9.93, "grad_norm": 9.51405143737793, "learning_rate": 3.990785148996594e-08, "loss": 0.0278, "step": 1009450 }, { "epoch": 9.93, "grad_norm": 0.021099064499139786, "learning_rate": 3.9783729035717484e-08, "loss": 0.0322, "step": 1009475 }, { "epoch": 9.93, "grad_norm": 1.2994636297225952, "learning_rate": 3.965960658146902e-08, "loss": 0.0103, "step": 1009500 }, { "epoch": 9.93, "grad_norm": 2.2356042861938477, "learning_rate": 3.953548412722055e-08, "loss": 0.0393, "step": 1009525 }, { "epoch": 9.93, "grad_norm": 0.13010108470916748, "learning_rate": 3.941136167297209e-08, "loss": 0.0154, "step": 1009550 }, { "epoch": 9.93, "grad_norm": 1.8137335777282715, "learning_rate": 3.929220411689356e-08, "loss": 0.0415, "step": 1009575 }, { "epoch": 9.93, "grad_norm": 7.973453998565674, "learning_rate": 3.9168081662645104e-08, "loss": 0.0243, "step": 1009600 }, { "epoch": 9.93, "grad_norm": 0.17199841141700745, "learning_rate": 3.904395920839664e-08, "loss": 0.016, "step": 1009625 }, { "epoch": 9.93, "grad_norm": 7.474116325378418, "learning_rate": 3.8919836754148176e-08, "loss": 0.0133, "step": 1009650 }, { "epoch": 9.93, "grad_norm": 0.08908922970294952, "learning_rate": 3.879571429989971e-08, "loss": 0.0235, "step": 1009675 }, { "epoch": 9.93, "grad_norm": 12.580863952636719, "learning_rate": 3.867159184565125e-08, "loss": 0.0184, "step": 1009700 }, { "epoch": 9.93, "grad_norm": 0.8357506394386292, "learning_rate": 3.854746939140278e-08, "loss": 0.0497, "step": 1009725 }, { "epoch": 9.93, "grad_norm": 12.813324928283691, "learning_rate": 3.8423346937154325e-08, "loss": 0.0161, "step": 1009750 }, { "epoch": 9.93, "grad_norm": 6.549183368682861, "learning_rate": 3.829922448290586e-08, "loss": 0.0294, "step": 1009775 }, { "epoch": 9.93, "grad_norm": 7.936954498291016, "learning_rate": 3.8175102028657397e-08, "loss": 0.015, "step": 1009800 }, { "epoch": 9.93, "grad_norm": 4.357621669769287, "learning_rate": 3.805097957440893e-08, "loss": 0.0333, "step": 1009825 }, { "epoch": 9.93, "grad_norm": 11.95762825012207, "learning_rate": 3.792685712016047e-08, "loss": 0.0194, "step": 1009850 }, { "epoch": 9.93, "grad_norm": 1.344720482826233, "learning_rate": 3.7802734665912004e-08, "loss": 0.0237, "step": 1009875 }, { "epoch": 9.93, "grad_norm": 3.925823211669922, "learning_rate": 3.767861221166354e-08, "loss": 0.0209, "step": 1009900 }, { "epoch": 9.93, "grad_norm": 1.2618732452392578, "learning_rate": 3.7554489757415075e-08, "loss": 0.0345, "step": 1009925 }, { "epoch": 9.93, "grad_norm": 0.06550184637308121, "learning_rate": 3.743036730316662e-08, "loss": 0.0121, "step": 1009950 }, { "epoch": 9.93, "grad_norm": 0.3560493290424347, "learning_rate": 3.730624484891815e-08, "loss": 0.0354, "step": 1009975 }, { "epoch": 9.93, "grad_norm": 19.700721740722656, "learning_rate": 3.718212239466969e-08, "loss": 0.0143, "step": 1010000 }, { "epoch": 9.93, "grad_norm": 0.7987688779830933, "learning_rate": 3.7057999940421225e-08, "loss": 0.0407, "step": 1010025 }, { "epoch": 9.93, "grad_norm": 10.763555526733398, "learning_rate": 3.693387748617276e-08, "loss": 0.0132, "step": 1010050 }, { "epoch": 9.93, "grad_norm": 2.763077735900879, "learning_rate": 3.6809755031924296e-08, "loss": 0.0315, "step": 1010075 }, { "epoch": 9.93, "grad_norm": 21.81578826904297, "learning_rate": 3.668563257767583e-08, "loss": 0.014, "step": 1010100 }, { "epoch": 9.93, "grad_norm": 0.005460164975374937, "learning_rate": 3.6561510123427374e-08, "loss": 0.0296, "step": 1010125 }, { "epoch": 9.93, "grad_norm": 2.524442434310913, "learning_rate": 3.643738766917891e-08, "loss": 0.007, "step": 1010150 }, { "epoch": 9.93, "grad_norm": 0.1662219613790512, "learning_rate": 3.6313265214930445e-08, "loss": 0.035, "step": 1010175 }, { "epoch": 9.93, "grad_norm": 0.17684756219387054, "learning_rate": 3.618914276068198e-08, "loss": 0.0234, "step": 1010200 }, { "epoch": 9.93, "grad_norm": 1.299309492111206, "learning_rate": 3.606502030643352e-08, "loss": 0.051, "step": 1010225 }, { "epoch": 9.93, "grad_norm": 7.883469104766846, "learning_rate": 3.594089785218505e-08, "loss": 0.0103, "step": 1010250 }, { "epoch": 9.93, "grad_norm": 3.8648951053619385, "learning_rate": 3.581677539793659e-08, "loss": 0.033, "step": 1010275 }, { "epoch": 9.93, "grad_norm": 4.838206768035889, "learning_rate": 3.569265294368813e-08, "loss": 0.0114, "step": 1010300 }, { "epoch": 9.93, "grad_norm": 1.0840094089508057, "learning_rate": 3.5568530489439666e-08, "loss": 0.0449, "step": 1010325 }, { "epoch": 9.93, "grad_norm": 0.22592663764953613, "learning_rate": 3.54444080351912e-08, "loss": 0.025, "step": 1010350 }, { "epoch": 9.93, "grad_norm": 1.3172129392623901, "learning_rate": 3.532028558094274e-08, "loss": 0.0296, "step": 1010375 }, { "epoch": 9.93, "grad_norm": 0.09964283555746078, "learning_rate": 3.5196163126694273e-08, "loss": 0.0101, "step": 1010400 }, { "epoch": 9.93, "grad_norm": 0.01599416695535183, "learning_rate": 3.507204067244581e-08, "loss": 0.0491, "step": 1010425 }, { "epoch": 9.93, "grad_norm": 1.5354505777359009, "learning_rate": 3.4947918218197345e-08, "loss": 0.0146, "step": 1010450 }, { "epoch": 9.94, "grad_norm": 0.013895794749259949, "learning_rate": 3.482379576394889e-08, "loss": 0.0231, "step": 1010475 }, { "epoch": 9.94, "grad_norm": 8.922297477722168, "learning_rate": 3.469967330970042e-08, "loss": 0.0288, "step": 1010500 }, { "epoch": 9.94, "grad_norm": 0.1334773302078247, "learning_rate": 3.457555085545196e-08, "loss": 0.0399, "step": 1010525 }, { "epoch": 9.94, "grad_norm": 0.14278779923915863, "learning_rate": 3.4451428401203494e-08, "loss": 0.0191, "step": 1010550 }, { "epoch": 9.94, "grad_norm": 0.28458330035209656, "learning_rate": 3.432730594695503e-08, "loss": 0.0466, "step": 1010575 }, { "epoch": 9.94, "grad_norm": 6.822378158569336, "learning_rate": 3.4203183492706566e-08, "loss": 0.0152, "step": 1010600 }, { "epoch": 9.94, "grad_norm": 0.01726752705872059, "learning_rate": 3.40790610384581e-08, "loss": 0.0415, "step": 1010625 }, { "epoch": 9.94, "grad_norm": 8.524483680725098, "learning_rate": 3.3954938584209644e-08, "loss": 0.025, "step": 1010650 }, { "epoch": 9.94, "grad_norm": 0.23695242404937744, "learning_rate": 3.383081612996118e-08, "loss": 0.0294, "step": 1010675 }, { "epoch": 9.94, "grad_norm": 15.073938369750977, "learning_rate": 3.370669367571271e-08, "loss": 0.0162, "step": 1010700 }, { "epoch": 9.94, "grad_norm": 4.856886386871338, "learning_rate": 3.358257122146425e-08, "loss": 0.0312, "step": 1010725 }, { "epoch": 9.94, "grad_norm": 9.287748336791992, "learning_rate": 3.3458448767215787e-08, "loss": 0.0282, "step": 1010750 }, { "epoch": 9.94, "grad_norm": 0.20039674639701843, "learning_rate": 3.333432631296732e-08, "loss": 0.033, "step": 1010775 }, { "epoch": 9.94, "grad_norm": 0.060831133276224136, "learning_rate": 3.321020385871886e-08, "loss": 0.0159, "step": 1010800 }, { "epoch": 9.94, "grad_norm": 3.0855157375335693, "learning_rate": 3.30860814044704e-08, "loss": 0.0342, "step": 1010825 }, { "epoch": 9.94, "grad_norm": 0.7969847917556763, "learning_rate": 3.2961958950221936e-08, "loss": 0.0114, "step": 1010850 }, { "epoch": 9.94, "grad_norm": 0.05722508206963539, "learning_rate": 3.283783649597347e-08, "loss": 0.029, "step": 1010875 }, { "epoch": 9.94, "grad_norm": 0.5301374197006226, "learning_rate": 3.271371404172501e-08, "loss": 0.0156, "step": 1010900 }, { "epoch": 9.94, "grad_norm": 0.01708872988820076, "learning_rate": 3.258959158747654e-08, "loss": 0.0308, "step": 1010925 }, { "epoch": 9.94, "grad_norm": 14.083335876464844, "learning_rate": 3.246546913322808e-08, "loss": 0.0144, "step": 1010950 }, { "epoch": 9.94, "grad_norm": 0.21057309210300446, "learning_rate": 3.2341346678979615e-08, "loss": 0.0382, "step": 1010975 }, { "epoch": 9.94, "grad_norm": 7.832048416137695, "learning_rate": 3.221722422473115e-08, "loss": 0.0442, "step": 1011000 }, { "epoch": 9.94, "grad_norm": 0.8193992972373962, "learning_rate": 3.209310177048269e-08, "loss": 0.0373, "step": 1011025 }, { "epoch": 9.94, "grad_norm": 4.080381393432617, "learning_rate": 3.196897931623423e-08, "loss": 0.013, "step": 1011050 }, { "epoch": 9.94, "grad_norm": 1.411481499671936, "learning_rate": 3.1844856861985764e-08, "loss": 0.0304, "step": 1011075 }, { "epoch": 9.94, "grad_norm": 3.230842351913452, "learning_rate": 3.17207344077373e-08, "loss": 0.0108, "step": 1011100 }, { "epoch": 9.94, "grad_norm": 1.5420888662338257, "learning_rate": 3.1596611953488836e-08, "loss": 0.0213, "step": 1011125 }, { "epoch": 9.94, "grad_norm": 0.5518614053726196, "learning_rate": 3.147248949924037e-08, "loss": 0.0097, "step": 1011150 }, { "epoch": 9.94, "grad_norm": 0.020168377086520195, "learning_rate": 3.134836704499191e-08, "loss": 0.0424, "step": 1011175 }, { "epoch": 9.94, "grad_norm": 0.2824244797229767, "learning_rate": 3.122424459074345e-08, "loss": 0.005, "step": 1011200 }, { "epoch": 9.94, "grad_norm": 0.03072766400873661, "learning_rate": 3.1100122136494985e-08, "loss": 0.0323, "step": 1011225 }, { "epoch": 9.94, "grad_norm": 0.9800422191619873, "learning_rate": 3.097599968224652e-08, "loss": 0.009, "step": 1011250 }, { "epoch": 9.94, "grad_norm": 0.027907760813832283, "learning_rate": 3.0851877227998056e-08, "loss": 0.0353, "step": 1011275 }, { "epoch": 9.94, "grad_norm": 1.8922613859176636, "learning_rate": 3.072775477374959e-08, "loss": 0.005, "step": 1011300 }, { "epoch": 9.94, "grad_norm": 0.005299228243529797, "learning_rate": 3.060363231950113e-08, "loss": 0.0286, "step": 1011325 }, { "epoch": 9.94, "grad_norm": 0.7970935106277466, "learning_rate": 3.0479509865252664e-08, "loss": 0.0066, "step": 1011350 }, { "epoch": 9.94, "grad_norm": 0.04903523996472359, "learning_rate": 3.0355387411004206e-08, "loss": 0.0292, "step": 1011375 }, { "epoch": 9.94, "grad_norm": 0.17369486391544342, "learning_rate": 3.023126495675574e-08, "loss": 0.0187, "step": 1011400 }, { "epoch": 9.94, "grad_norm": 9.360517501831055, "learning_rate": 3.010714250250728e-08, "loss": 0.0407, "step": 1011425 }, { "epoch": 9.94, "grad_norm": 17.272642135620117, "learning_rate": 2.998302004825881e-08, "loss": 0.0153, "step": 1011450 }, { "epoch": 9.94, "grad_norm": 0.39236336946487427, "learning_rate": 2.985889759401035e-08, "loss": 0.0408, "step": 1011475 }, { "epoch": 9.95, "grad_norm": 1.3184230327606201, "learning_rate": 2.9734775139761888e-08, "loss": 0.0166, "step": 1011500 }, { "epoch": 9.95, "grad_norm": 3.308197021484375, "learning_rate": 2.961065268551342e-08, "loss": 0.0461, "step": 1011525 }, { "epoch": 9.95, "grad_norm": 14.263398170471191, "learning_rate": 2.948653023126496e-08, "loss": 0.0155, "step": 1011550 }, { "epoch": 9.95, "grad_norm": 0.018335556611418724, "learning_rate": 2.9362407777016495e-08, "loss": 0.0491, "step": 1011575 }, { "epoch": 9.95, "grad_norm": 11.148152351379395, "learning_rate": 2.9238285322768034e-08, "loss": 0.0089, "step": 1011600 }, { "epoch": 9.95, "grad_norm": 0.033105384558439255, "learning_rate": 2.911416286851957e-08, "loss": 0.0281, "step": 1011625 }, { "epoch": 9.95, "grad_norm": 0.31683212518692017, "learning_rate": 2.8990040414271105e-08, "loss": 0.0071, "step": 1011650 }, { "epoch": 9.95, "grad_norm": 2.247284173965454, "learning_rate": 2.8865917960022644e-08, "loss": 0.0406, "step": 1011675 }, { "epoch": 9.95, "grad_norm": 5.682784080505371, "learning_rate": 2.8741795505774177e-08, "loss": 0.0206, "step": 1011700 }, { "epoch": 9.95, "grad_norm": 0.07593902945518494, "learning_rate": 2.8617673051525716e-08, "loss": 0.0274, "step": 1011725 }, { "epoch": 9.95, "grad_norm": 0.18474097549915314, "learning_rate": 2.849355059727725e-08, "loss": 0.0156, "step": 1011750 }, { "epoch": 9.95, "grad_norm": 1.0556577444076538, "learning_rate": 2.8369428143028787e-08, "loss": 0.0193, "step": 1011775 }, { "epoch": 9.95, "grad_norm": 16.42310905456543, "learning_rate": 2.8245305688780326e-08, "loss": 0.013, "step": 1011800 }, { "epoch": 9.95, "grad_norm": 0.5867797136306763, "learning_rate": 2.8121183234531862e-08, "loss": 0.0486, "step": 1011825 }, { "epoch": 9.95, "grad_norm": 6.542603015899658, "learning_rate": 2.79970607802834e-08, "loss": 0.0168, "step": 1011850 }, { "epoch": 9.95, "grad_norm": 10.984333038330078, "learning_rate": 2.7872938326034933e-08, "loss": 0.0286, "step": 1011875 }, { "epoch": 9.95, "grad_norm": 5.315666198730469, "learning_rate": 2.7748815871786472e-08, "loss": 0.0159, "step": 1011900 }, { "epoch": 9.95, "grad_norm": 0.05414106324315071, "learning_rate": 2.7624693417538008e-08, "loss": 0.0367, "step": 1011925 }, { "epoch": 9.95, "grad_norm": 5.750499248504639, "learning_rate": 2.7500570963289544e-08, "loss": 0.0252, "step": 1011950 }, { "epoch": 9.95, "grad_norm": 0.07535578310489655, "learning_rate": 2.7376448509041083e-08, "loss": 0.0337, "step": 1011975 }, { "epoch": 9.95, "grad_norm": 3.3311939239501953, "learning_rate": 2.725232605479262e-08, "loss": 0.0128, "step": 1012000 }, { "epoch": 9.95, "grad_norm": 1.3642948865890503, "learning_rate": 2.7128203600544158e-08, "loss": 0.0287, "step": 1012025 }, { "epoch": 9.95, "grad_norm": 0.08417576551437378, "learning_rate": 2.700408114629569e-08, "loss": 0.0099, "step": 1012050 }, { "epoch": 9.95, "grad_norm": 0.3991016149520874, "learning_rate": 2.6879958692047226e-08, "loss": 0.0215, "step": 1012075 }, { "epoch": 9.95, "grad_norm": 1.063970923423767, "learning_rate": 2.6755836237798765e-08, "loss": 0.0066, "step": 1012100 }, { "epoch": 9.95, "grad_norm": 1.9035604000091553, "learning_rate": 2.66317137835503e-08, "loss": 0.0229, "step": 1012125 }, { "epoch": 9.95, "grad_norm": 4.759331703186035, "learning_rate": 2.650759132930184e-08, "loss": 0.0124, "step": 1012150 }, { "epoch": 9.95, "grad_norm": 1.6074515581130981, "learning_rate": 2.6383468875053375e-08, "loss": 0.0328, "step": 1012175 }, { "epoch": 9.95, "grad_norm": 5.241008758544922, "learning_rate": 2.6259346420804914e-08, "loss": 0.0188, "step": 1012200 }, { "epoch": 9.95, "grad_norm": 0.2389390915632248, "learning_rate": 2.6135223966556447e-08, "loss": 0.0423, "step": 1012225 }, { "epoch": 9.95, "grad_norm": 8.078263282775879, "learning_rate": 2.6011101512307982e-08, "loss": 0.0089, "step": 1012250 }, { "epoch": 9.95, "grad_norm": 0.02341505140066147, "learning_rate": 2.588697905805952e-08, "loss": 0.0454, "step": 1012275 }, { "epoch": 9.95, "grad_norm": 1.5768625736236572, "learning_rate": 2.5762856603811057e-08, "loss": 0.0135, "step": 1012300 }, { "epoch": 9.95, "grad_norm": 0.017920276150107384, "learning_rate": 2.5638734149562596e-08, "loss": 0.0281, "step": 1012325 }, { "epoch": 9.95, "grad_norm": 2.320070743560791, "learning_rate": 2.5514611695314132e-08, "loss": 0.0136, "step": 1012350 }, { "epoch": 9.95, "grad_norm": 1.2410858869552612, "learning_rate": 2.539048924106567e-08, "loss": 0.0464, "step": 1012375 }, { "epoch": 9.95, "grad_norm": 6.59240198135376, "learning_rate": 2.5266366786817203e-08, "loss": 0.0116, "step": 1012400 }, { "epoch": 9.95, "grad_norm": 0.14725562930107117, "learning_rate": 2.514224433256874e-08, "loss": 0.0335, "step": 1012425 }, { "epoch": 9.95, "grad_norm": 0.22030840814113617, "learning_rate": 2.5018121878320278e-08, "loss": 0.0059, "step": 1012450 }, { "epoch": 9.95, "grad_norm": 0.0831255093216896, "learning_rate": 2.4893999424071814e-08, "loss": 0.0264, "step": 1012475 }, { "epoch": 9.96, "grad_norm": 1.9774730205535889, "learning_rate": 2.4769876969823353e-08, "loss": 0.0064, "step": 1012500 }, { "epoch": 9.96, "grad_norm": 0.089514821767807, "learning_rate": 2.464575451557489e-08, "loss": 0.0449, "step": 1012525 }, { "epoch": 9.96, "grad_norm": 0.38918396830558777, "learning_rate": 2.452163206132642e-08, "loss": 0.0117, "step": 1012550 }, { "epoch": 9.96, "grad_norm": 0.006926534231752157, "learning_rate": 2.439750960707796e-08, "loss": 0.0375, "step": 1012575 }, { "epoch": 9.96, "grad_norm": 10.21249008178711, "learning_rate": 2.4273387152829496e-08, "loss": 0.0102, "step": 1012600 }, { "epoch": 9.96, "grad_norm": 0.0762196034193039, "learning_rate": 2.4149264698581035e-08, "loss": 0.0298, "step": 1012625 }, { "epoch": 9.96, "grad_norm": 4.753821849822998, "learning_rate": 2.402514224433257e-08, "loss": 0.0167, "step": 1012650 }, { "epoch": 9.96, "grad_norm": 1.0535435676574707, "learning_rate": 2.3905984688254048e-08, "loss": 0.0325, "step": 1012675 }, { "epoch": 9.96, "grad_norm": 2.1239638328552246, "learning_rate": 2.3781862234005583e-08, "loss": 0.0071, "step": 1012700 }, { "epoch": 9.96, "grad_norm": 0.01377436239272356, "learning_rate": 2.3657739779757116e-08, "loss": 0.0306, "step": 1012725 }, { "epoch": 9.96, "grad_norm": 6.875720500946045, "learning_rate": 2.3533617325508655e-08, "loss": 0.0062, "step": 1012750 }, { "epoch": 9.96, "grad_norm": 0.020122742280364037, "learning_rate": 2.340949487126019e-08, "loss": 0.0289, "step": 1012775 }, { "epoch": 9.96, "grad_norm": 3.7860095500946045, "learning_rate": 2.328537241701173e-08, "loss": 0.0139, "step": 1012800 }, { "epoch": 9.96, "grad_norm": 0.014166871085762978, "learning_rate": 2.3161249962763265e-08, "loss": 0.0483, "step": 1012825 }, { "epoch": 9.96, "grad_norm": 12.581381797790527, "learning_rate": 2.3037127508514804e-08, "loss": 0.0103, "step": 1012850 }, { "epoch": 9.96, "grad_norm": 4.2620954513549805, "learning_rate": 2.291300505426634e-08, "loss": 0.035, "step": 1012875 }, { "epoch": 9.96, "grad_norm": 9.881019592285156, "learning_rate": 2.2788882600017872e-08, "loss": 0.0125, "step": 1012900 }, { "epoch": 9.96, "grad_norm": 0.02125641703605652, "learning_rate": 2.266476014576941e-08, "loss": 0.0362, "step": 1012925 }, { "epoch": 9.96, "grad_norm": 3.7919607162475586, "learning_rate": 2.2540637691520947e-08, "loss": 0.0239, "step": 1012950 }, { "epoch": 9.96, "grad_norm": 0.030314182862639427, "learning_rate": 2.2416515237272486e-08, "loss": 0.0186, "step": 1012975 }, { "epoch": 9.96, "grad_norm": 0.1360749900341034, "learning_rate": 2.2292392783024022e-08, "loss": 0.0111, "step": 1013000 }, { "epoch": 9.96, "grad_norm": 6.016964912414551, "learning_rate": 2.2168270328775558e-08, "loss": 0.0418, "step": 1013025 }, { "epoch": 9.96, "grad_norm": 1.3220500946044922, "learning_rate": 2.2044147874527097e-08, "loss": 0.0169, "step": 1013050 }, { "epoch": 9.96, "grad_norm": 0.6129027009010315, "learning_rate": 2.192002542027863e-08, "loss": 0.0306, "step": 1013075 }, { "epoch": 9.96, "grad_norm": 1.2897193431854248, "learning_rate": 2.179590296603017e-08, "loss": 0.0134, "step": 1013100 }, { "epoch": 9.96, "grad_norm": 0.10432111471891403, "learning_rate": 2.1671780511781704e-08, "loss": 0.0381, "step": 1013125 }, { "epoch": 9.96, "grad_norm": 9.459071159362793, "learning_rate": 2.1547658057533243e-08, "loss": 0.0181, "step": 1013150 }, { "epoch": 9.96, "grad_norm": 0.02234136499464512, "learning_rate": 2.142353560328478e-08, "loss": 0.0499, "step": 1013175 }, { "epoch": 9.96, "grad_norm": 0.09472429007291794, "learning_rate": 2.1299413149036314e-08, "loss": 0.0232, "step": 1013200 }, { "epoch": 9.96, "grad_norm": 0.07892359793186188, "learning_rate": 2.1175290694787853e-08, "loss": 0.0337, "step": 1013225 }, { "epoch": 9.96, "grad_norm": 5.3412017822265625, "learning_rate": 2.1051168240539386e-08, "loss": 0.0169, "step": 1013250 }, { "epoch": 9.96, "grad_norm": 1.880859375, "learning_rate": 2.0927045786290928e-08, "loss": 0.0426, "step": 1013275 }, { "epoch": 9.96, "grad_norm": 4.325608730316162, "learning_rate": 2.080292333204246e-08, "loss": 0.0103, "step": 1013300 }, { "epoch": 9.96, "grad_norm": 9.766234397888184, "learning_rate": 2.0678800877793996e-08, "loss": 0.0452, "step": 1013325 }, { "epoch": 9.96, "grad_norm": 3.89902925491333, "learning_rate": 2.0554678423545535e-08, "loss": 0.0102, "step": 1013350 }, { "epoch": 9.96, "grad_norm": 0.1578650325536728, "learning_rate": 2.043055596929707e-08, "loss": 0.0377, "step": 1013375 }, { "epoch": 9.96, "grad_norm": 5.091617584228516, "learning_rate": 2.030643351504861e-08, "loss": 0.0212, "step": 1013400 }, { "epoch": 9.96, "grad_norm": 0.008241204544901848, "learning_rate": 2.0182311060800146e-08, "loss": 0.0281, "step": 1013425 }, { "epoch": 9.96, "grad_norm": 0.09921993315219879, "learning_rate": 2.0058188606551685e-08, "loss": 0.0264, "step": 1013450 }, { "epoch": 9.96, "grad_norm": 0.025664540007710457, "learning_rate": 1.9934066152303217e-08, "loss": 0.0209, "step": 1013475 }, { "epoch": 9.96, "grad_norm": 10.274489402770996, "learning_rate": 1.9809943698054753e-08, "loss": 0.0135, "step": 1013500 }, { "epoch": 9.97, "grad_norm": 0.06747392565011978, "learning_rate": 1.9685821243806292e-08, "loss": 0.0537, "step": 1013525 }, { "epoch": 9.97, "grad_norm": 24.249469757080078, "learning_rate": 1.9561698789557827e-08, "loss": 0.0233, "step": 1013550 }, { "epoch": 9.97, "grad_norm": 1.9718540906906128, "learning_rate": 1.9437576335309363e-08, "loss": 0.032, "step": 1013575 }, { "epoch": 9.97, "grad_norm": 0.3089069724082947, "learning_rate": 1.9313453881060902e-08, "loss": 0.0272, "step": 1013600 }, { "epoch": 9.97, "grad_norm": 0.0043493458069860935, "learning_rate": 1.9189331426812438e-08, "loss": 0.0186, "step": 1013625 }, { "epoch": 9.97, "grad_norm": 1.668221116065979, "learning_rate": 1.9065208972563974e-08, "loss": 0.019, "step": 1013650 }, { "epoch": 9.97, "grad_norm": 0.19121168553829193, "learning_rate": 1.8941086518315513e-08, "loss": 0.0331, "step": 1013675 }, { "epoch": 9.97, "grad_norm": 22.24143409729004, "learning_rate": 1.881696406406705e-08, "loss": 0.0134, "step": 1013700 }, { "epoch": 9.97, "grad_norm": 1.2472949028015137, "learning_rate": 1.8692841609818584e-08, "loss": 0.0218, "step": 1013725 }, { "epoch": 9.97, "grad_norm": 1.063475251197815, "learning_rate": 1.856871915557012e-08, "loss": 0.0195, "step": 1013750 }, { "epoch": 9.97, "grad_norm": 2.629774332046509, "learning_rate": 1.844459670132166e-08, "loss": 0.038, "step": 1013775 }, { "epoch": 9.97, "grad_norm": 6.4397053718566895, "learning_rate": 1.8320474247073195e-08, "loss": 0.0209, "step": 1013800 }, { "epoch": 9.97, "grad_norm": 0.0638171136379242, "learning_rate": 1.819635179282473e-08, "loss": 0.0337, "step": 1013825 }, { "epoch": 9.97, "grad_norm": 3.70448899269104, "learning_rate": 1.807222933857627e-08, "loss": 0.015, "step": 1013850 }, { "epoch": 9.97, "grad_norm": 1.128444790840149, "learning_rate": 1.79481068843278e-08, "loss": 0.0324, "step": 1013875 }, { "epoch": 9.97, "grad_norm": 6.807050704956055, "learning_rate": 1.782398443007934e-08, "loss": 0.016, "step": 1013900 }, { "epoch": 9.97, "grad_norm": 5.618440628051758, "learning_rate": 1.7699861975830876e-08, "loss": 0.0233, "step": 1013925 }, { "epoch": 9.97, "grad_norm": 0.1434171050786972, "learning_rate": 1.7575739521582415e-08, "loss": 0.0156, "step": 1013950 }, { "epoch": 9.97, "grad_norm": 0.15237830579280853, "learning_rate": 1.745161706733395e-08, "loss": 0.036, "step": 1013975 }, { "epoch": 9.97, "grad_norm": 0.22243991494178772, "learning_rate": 1.7327494613085487e-08, "loss": 0.0269, "step": 1014000 }, { "epoch": 9.97, "grad_norm": 1.6594842672348022, "learning_rate": 1.7203372158837023e-08, "loss": 0.0484, "step": 1014025 }, { "epoch": 9.97, "grad_norm": 1.555195927619934, "learning_rate": 1.7079249704588558e-08, "loss": 0.0118, "step": 1014050 }, { "epoch": 9.97, "grad_norm": 8.681231498718262, "learning_rate": 1.6955127250340097e-08, "loss": 0.0265, "step": 1014075 }, { "epoch": 9.97, "grad_norm": 3.0523769855499268, "learning_rate": 1.6831004796091633e-08, "loss": 0.0204, "step": 1014100 }, { "epoch": 9.97, "grad_norm": 0.5817016363143921, "learning_rate": 1.6706882341843172e-08, "loss": 0.0514, "step": 1014125 }, { "epoch": 9.97, "grad_norm": 0.9988526701927185, "learning_rate": 1.6582759887594708e-08, "loss": 0.0099, "step": 1014150 }, { "epoch": 9.97, "grad_norm": 0.000846047536469996, "learning_rate": 1.6458637433346243e-08, "loss": 0.0292, "step": 1014175 }, { "epoch": 9.97, "grad_norm": 1.9428197145462036, "learning_rate": 1.633451497909778e-08, "loss": 0.0119, "step": 1014200 }, { "epoch": 9.97, "grad_norm": 0.025899816304445267, "learning_rate": 1.6210392524849315e-08, "loss": 0.0431, "step": 1014225 }, { "epoch": 9.97, "grad_norm": 1.3465479612350464, "learning_rate": 1.6086270070600854e-08, "loss": 0.0123, "step": 1014250 }, { "epoch": 9.97, "grad_norm": 0.013915814459323883, "learning_rate": 1.596214761635239e-08, "loss": 0.0344, "step": 1014275 }, { "epoch": 9.97, "grad_norm": 8.476890563964844, "learning_rate": 1.583802516210393e-08, "loss": 0.0198, "step": 1014300 }, { "epoch": 9.97, "grad_norm": 1.7379695177078247, "learning_rate": 1.571390270785546e-08, "loss": 0.0622, "step": 1014325 }, { "epoch": 9.97, "grad_norm": 2.3575382232666016, "learning_rate": 1.5589780253607e-08, "loss": 0.0114, "step": 1014350 }, { "epoch": 9.97, "grad_norm": 0.08265486359596252, "learning_rate": 1.5465657799358536e-08, "loss": 0.0365, "step": 1014375 }, { "epoch": 9.97, "grad_norm": 0.2431761622428894, "learning_rate": 1.5341535345110075e-08, "loss": 0.0125, "step": 1014400 }, { "epoch": 9.97, "grad_norm": 0.9034013748168945, "learning_rate": 1.521741289086161e-08, "loss": 0.054, "step": 1014425 }, { "epoch": 9.97, "grad_norm": 0.43537256121635437, "learning_rate": 1.5093290436613146e-08, "loss": 0.0277, "step": 1014450 }, { "epoch": 9.97, "grad_norm": 0.5378648042678833, "learning_rate": 1.4969167982364682e-08, "loss": 0.0321, "step": 1014475 }, { "epoch": 9.97, "grad_norm": 0.06468234956264496, "learning_rate": 1.484504552811622e-08, "loss": 0.0172, "step": 1014500 }, { "epoch": 9.97, "grad_norm": 5.176732063293457, "learning_rate": 1.4720923073867757e-08, "loss": 0.0411, "step": 1014525 }, { "epoch": 9.98, "grad_norm": 16.527860641479492, "learning_rate": 1.4596800619619292e-08, "loss": 0.0153, "step": 1014550 }, { "epoch": 9.98, "grad_norm": 2.1426103115081787, "learning_rate": 1.447267816537083e-08, "loss": 0.0399, "step": 1014575 }, { "epoch": 9.98, "grad_norm": 0.7846993803977966, "learning_rate": 1.4348555711122367e-08, "loss": 0.0135, "step": 1014600 }, { "epoch": 9.98, "grad_norm": 0.028093475848436356, "learning_rate": 1.4224433256873901e-08, "loss": 0.0356, "step": 1014625 }, { "epoch": 9.98, "grad_norm": 0.23978127539157867, "learning_rate": 1.4100310802625439e-08, "loss": 0.0244, "step": 1014650 }, { "epoch": 9.98, "grad_norm": 0.7994678020477295, "learning_rate": 1.3976188348376976e-08, "loss": 0.0304, "step": 1014675 }, { "epoch": 9.98, "grad_norm": 17.814579010009766, "learning_rate": 1.3852065894128513e-08, "loss": 0.0201, "step": 1014700 }, { "epoch": 9.98, "grad_norm": 0.3382464647293091, "learning_rate": 1.3727943439880049e-08, "loss": 0.0299, "step": 1014725 }, { "epoch": 9.98, "grad_norm": 7.696365833282471, "learning_rate": 1.3603820985631586e-08, "loss": 0.0198, "step": 1014750 }, { "epoch": 9.98, "grad_norm": 0.008933356031775475, "learning_rate": 1.3479698531383122e-08, "loss": 0.0433, "step": 1014775 }, { "epoch": 9.98, "grad_norm": 2.5187718868255615, "learning_rate": 1.3355576077134658e-08, "loss": 0.0053, "step": 1014800 }, { "epoch": 9.98, "grad_norm": 1.2345274686813354, "learning_rate": 1.3231453622886195e-08, "loss": 0.0679, "step": 1014825 }, { "epoch": 9.98, "grad_norm": 9.493000984191895, "learning_rate": 1.3107331168637732e-08, "loss": 0.0148, "step": 1014850 }, { "epoch": 9.98, "grad_norm": 0.06155240163207054, "learning_rate": 1.298320871438927e-08, "loss": 0.0458, "step": 1014875 }, { "epoch": 9.98, "grad_norm": 13.335552215576172, "learning_rate": 1.2859086260140806e-08, "loss": 0.022, "step": 1014900 }, { "epoch": 9.98, "grad_norm": 1.3620855808258057, "learning_rate": 1.2734963805892343e-08, "loss": 0.0288, "step": 1014925 }, { "epoch": 9.98, "grad_norm": 11.78995132446289, "learning_rate": 1.2610841351643879e-08, "loss": 0.0215, "step": 1014950 }, { "epoch": 9.98, "grad_norm": 0.34102022647857666, "learning_rate": 1.2486718897395414e-08, "loss": 0.0353, "step": 1014975 }, { "epoch": 9.98, "grad_norm": 17.58376693725586, "learning_rate": 1.2362596443146952e-08, "loss": 0.018, "step": 1015000 }, { "epoch": 9.98, "grad_norm": 0.060854218900203705, "learning_rate": 1.2238473988898489e-08, "loss": 0.0294, "step": 1015025 }, { "epoch": 9.98, "grad_norm": 0.28585106134414673, "learning_rate": 1.2114351534650026e-08, "loss": 0.0146, "step": 1015050 }, { "epoch": 9.98, "grad_norm": 0.10312867909669876, "learning_rate": 1.1990229080401562e-08, "loss": 0.0289, "step": 1015075 }, { "epoch": 9.98, "grad_norm": 0.16971144080162048, "learning_rate": 1.1866106626153098e-08, "loss": 0.0122, "step": 1015100 }, { "epoch": 9.98, "grad_norm": 4.361489295959473, "learning_rate": 1.1741984171904635e-08, "loss": 0.0314, "step": 1015125 }, { "epoch": 9.98, "grad_norm": 11.531038284301758, "learning_rate": 1.1617861717656171e-08, "loss": 0.0191, "step": 1015150 }, { "epoch": 9.98, "grad_norm": 0.021885672584176064, "learning_rate": 1.1493739263407708e-08, "loss": 0.0343, "step": 1015175 }, { "epoch": 9.98, "grad_norm": 10.302566528320312, "learning_rate": 1.1369616809159246e-08, "loss": 0.017, "step": 1015200 }, { "epoch": 9.98, "grad_norm": 0.007114348467439413, "learning_rate": 1.1245494354910783e-08, "loss": 0.0402, "step": 1015225 }, { "epoch": 9.98, "grad_norm": 23.67144012451172, "learning_rate": 1.1121371900662317e-08, "loss": 0.0173, "step": 1015250 }, { "epoch": 9.98, "grad_norm": 0.13662154972553253, "learning_rate": 1.0997249446413854e-08, "loss": 0.0288, "step": 1015275 }, { "epoch": 9.98, "grad_norm": 5.696292400360107, "learning_rate": 1.0873126992165392e-08, "loss": 0.0028, "step": 1015300 }, { "epoch": 9.98, "grad_norm": 0.08864932507276535, "learning_rate": 1.074900453791693e-08, "loss": 0.0316, "step": 1015325 }, { "epoch": 9.98, "grad_norm": 6.432154655456543, "learning_rate": 1.0624882083668465e-08, "loss": 0.0236, "step": 1015350 }, { "epoch": 9.98, "grad_norm": 0.030222145840525627, "learning_rate": 1.0500759629420002e-08, "loss": 0.0242, "step": 1015375 }, { "epoch": 9.98, "grad_norm": 0.8559691905975342, "learning_rate": 1.0376637175171536e-08, "loss": 0.0162, "step": 1015400 }, { "epoch": 9.98, "grad_norm": 0.06232157722115517, "learning_rate": 1.0252514720923074e-08, "loss": 0.0165, "step": 1015425 }, { "epoch": 9.98, "grad_norm": 7.097736835479736, "learning_rate": 1.0128392266674611e-08, "loss": 0.0135, "step": 1015450 }, { "epoch": 9.98, "grad_norm": 0.27246543765068054, "learning_rate": 1.0004269812426148e-08, "loss": 0.0339, "step": 1015475 }, { "epoch": 9.98, "grad_norm": 0.9386889338493347, "learning_rate": 9.880147358177686e-09, "loss": 0.023, "step": 1015500 }, { "epoch": 9.98, "grad_norm": 0.7698168754577637, "learning_rate": 9.756024903929222e-09, "loss": 0.0268, "step": 1015525 }, { "epoch": 9.99, "grad_norm": 13.381426811218262, "learning_rate": 9.631902449680757e-09, "loss": 0.0261, "step": 1015550 }, { "epoch": 9.99, "grad_norm": 0.4541022479534149, "learning_rate": 9.507779995432295e-09, "loss": 0.0461, "step": 1015575 }, { "epoch": 9.99, "grad_norm": 5.34681510925293, "learning_rate": 9.38365754118383e-09, "loss": 0.0106, "step": 1015600 }, { "epoch": 9.99, "grad_norm": 0.02419126033782959, "learning_rate": 9.259535086935368e-09, "loss": 0.0238, "step": 1015625 }, { "epoch": 9.99, "grad_norm": 11.12003231048584, "learning_rate": 9.135412632686905e-09, "loss": 0.021, "step": 1015650 }, { "epoch": 9.99, "grad_norm": 0.0725429430603981, "learning_rate": 9.01129017843844e-09, "loss": 0.0322, "step": 1015675 }, { "epoch": 9.99, "grad_norm": 0.01768922060728073, "learning_rate": 8.887167724189978e-09, "loss": 0.0146, "step": 1015700 }, { "epoch": 9.99, "grad_norm": 0.04914349317550659, "learning_rate": 8.763045269941514e-09, "loss": 0.0355, "step": 1015725 }, { "epoch": 9.99, "grad_norm": 8.95336627960205, "learning_rate": 8.638922815693051e-09, "loss": 0.0077, "step": 1015750 }, { "epoch": 9.99, "grad_norm": 1.6040042638778687, "learning_rate": 8.514800361444587e-09, "loss": 0.0232, "step": 1015775 }, { "epoch": 9.99, "grad_norm": 10.122708320617676, "learning_rate": 8.390677907196124e-09, "loss": 0.0242, "step": 1015800 }, { "epoch": 9.99, "grad_norm": 0.2470727562904358, "learning_rate": 8.26655545294766e-09, "loss": 0.0254, "step": 1015825 }, { "epoch": 9.99, "grad_norm": 8.606300354003906, "learning_rate": 8.142432998699197e-09, "loss": 0.0173, "step": 1015850 }, { "epoch": 9.99, "grad_norm": 0.4189406633377075, "learning_rate": 8.018310544450735e-09, "loss": 0.0569, "step": 1015875 }, { "epoch": 9.99, "grad_norm": 0.043436199426651, "learning_rate": 7.89418809020227e-09, "loss": 0.018, "step": 1015900 }, { "epoch": 9.99, "grad_norm": 3.238889694213867, "learning_rate": 7.770065635953808e-09, "loss": 0.029, "step": 1015925 }, { "epoch": 9.99, "grad_norm": 7.523128509521484, "learning_rate": 7.645943181705344e-09, "loss": 0.0245, "step": 1015950 }, { "epoch": 9.99, "grad_norm": 0.10772339254617691, "learning_rate": 7.521820727456881e-09, "loss": 0.0329, "step": 1015975 }, { "epoch": 9.99, "grad_norm": 2.1140313148498535, "learning_rate": 7.3976982732084174e-09, "loss": 0.0058, "step": 1016000 }, { "epoch": 9.99, "grad_norm": 5.792158126831055, "learning_rate": 7.273575818959954e-09, "loss": 0.027, "step": 1016025 }, { "epoch": 9.99, "grad_norm": 4.687312126159668, "learning_rate": 7.14945336471149e-09, "loss": 0.0135, "step": 1016050 }, { "epoch": 9.99, "grad_norm": 0.08601278811693192, "learning_rate": 7.025330910463027e-09, "loss": 0.0311, "step": 1016075 }, { "epoch": 9.99, "grad_norm": 2.8451955318450928, "learning_rate": 6.901208456214564e-09, "loss": 0.0194, "step": 1016100 }, { "epoch": 9.99, "grad_norm": 0.24994432926177979, "learning_rate": 6.7770860019661e-09, "loss": 0.0616, "step": 1016125 }, { "epoch": 9.99, "grad_norm": 2.602494716644287, "learning_rate": 6.652963547717637e-09, "loss": 0.0209, "step": 1016150 }, { "epoch": 9.99, "grad_norm": 0.6386338472366333, "learning_rate": 6.528841093469174e-09, "loss": 0.0245, "step": 1016175 }, { "epoch": 9.99, "grad_norm": 0.346810907125473, "learning_rate": 6.40471863922071e-09, "loss": 0.0138, "step": 1016200 }, { "epoch": 9.99, "grad_norm": 0.37743932008743286, "learning_rate": 6.280596184972246e-09, "loss": 0.0317, "step": 1016225 }, { "epoch": 9.99, "grad_norm": 1.5936779975891113, "learning_rate": 6.156473730723784e-09, "loss": 0.0236, "step": 1016250 }, { "epoch": 9.99, "grad_norm": 1.0794179439544678, "learning_rate": 6.032351276475319e-09, "loss": 0.0313, "step": 1016275 }, { "epoch": 9.99, "grad_norm": 8.158884048461914, "learning_rate": 5.908228822226857e-09, "loss": 0.0353, "step": 1016300 }, { "epoch": 9.99, "grad_norm": 0.035352688282728195, "learning_rate": 5.784106367978393e-09, "loss": 0.032, "step": 1016325 }, { "epoch": 9.99, "grad_norm": 0.16618573665618896, "learning_rate": 5.659983913729929e-09, "loss": 0.0076, "step": 1016350 }, { "epoch": 9.99, "grad_norm": 6.404623031616211, "learning_rate": 5.535861459481466e-09, "loss": 0.0276, "step": 1016375 }, { "epoch": 9.99, "grad_norm": 6.81091833114624, "learning_rate": 5.411739005233004e-09, "loss": 0.0102, "step": 1016400 }, { "epoch": 9.99, "grad_norm": 4.6648406982421875, "learning_rate": 5.2876165509845394e-09, "loss": 0.0472, "step": 1016425 }, { "epoch": 9.99, "grad_norm": 3.6420814990997314, "learning_rate": 5.163494096736076e-09, "loss": 0.0044, "step": 1016450 }, { "epoch": 9.99, "grad_norm": 1.7393125295639038, "learning_rate": 5.039371642487613e-09, "loss": 0.0268, "step": 1016475 }, { "epoch": 9.99, "grad_norm": 0.3243708908557892, "learning_rate": 4.91524918823915e-09, "loss": 0.0149, "step": 1016500 }, { "epoch": 9.99, "grad_norm": 0.003285112790763378, "learning_rate": 4.7911267339906864e-09, "loss": 0.0289, "step": 1016525 }, { "epoch": 9.99, "grad_norm": 0.10853415727615356, "learning_rate": 4.667004279742223e-09, "loss": 0.0081, "step": 1016550 }, { "epoch": 10.0, "grad_norm": 0.041759490966796875, "learning_rate": 4.5428818254937595e-09, "loss": 0.0373, "step": 1016575 }, { "epoch": 10.0, "grad_norm": 4.152030944824219, "learning_rate": 4.418759371245296e-09, "loss": 0.0098, "step": 1016600 }, { "epoch": 10.0, "grad_norm": 1.2276510000228882, "learning_rate": 4.294636916996833e-09, "loss": 0.0282, "step": 1016625 }, { "epoch": 10.0, "grad_norm": 12.3807954788208, "learning_rate": 4.170514462748369e-09, "loss": 0.0056, "step": 1016650 }, { "epoch": 10.0, "grad_norm": 0.03769959136843681, "learning_rate": 4.051356906669845e-09, "loss": 0.029, "step": 1016675 }, { "epoch": 10.0, "grad_norm": 12.030399322509766, "learning_rate": 3.927234452421381e-09, "loss": 0.0169, "step": 1016700 }, { "epoch": 10.0, "grad_norm": 3.088649034500122, "learning_rate": 3.803111998172918e-09, "loss": 0.0242, "step": 1016725 }, { "epoch": 10.0, "grad_norm": 2.93216872215271, "learning_rate": 3.6789895439244546e-09, "loss": 0.0134, "step": 1016750 }, { "epoch": 10.0, "grad_norm": 0.07252712547779083, "learning_rate": 3.5548670896759907e-09, "loss": 0.0396, "step": 1016775 }, { "epoch": 10.0, "grad_norm": 0.8234681487083435, "learning_rate": 3.4307446354275276e-09, "loss": 0.0162, "step": 1016800 }, { "epoch": 10.0, "grad_norm": 0.3597090542316437, "learning_rate": 3.306622181179064e-09, "loss": 0.0285, "step": 1016825 }, { "epoch": 10.0, "grad_norm": 0.8843782544136047, "learning_rate": 3.1824997269306007e-09, "loss": 0.0269, "step": 1016850 }, { "epoch": 10.0, "grad_norm": 0.03328671306371689, "learning_rate": 3.0583772726821377e-09, "loss": 0.0418, "step": 1016875 }, { "epoch": 10.0, "grad_norm": 11.5062255859375, "learning_rate": 2.934254818433674e-09, "loss": 0.0118, "step": 1016900 }, { "epoch": 10.0, "grad_norm": 5.107182502746582, "learning_rate": 2.8101323641852107e-09, "loss": 0.0302, "step": 1016925 }, { "epoch": 10.0, "grad_norm": 8.728755950927734, "learning_rate": 2.6860099099367477e-09, "loss": 0.0396, "step": 1016950 }, { "epoch": 10.0, "grad_norm": 0.038444869220256805, "learning_rate": 2.561887455688284e-09, "loss": 0.0351, "step": 1016975 }, { "epoch": 10.0, "grad_norm": 0.3054758310317993, "learning_rate": 2.4377650014398208e-09, "loss": 0.0153, "step": 1017000 }, { "epoch": 10.0, "grad_norm": 1.7086488008499146, "learning_rate": 2.3136425471913573e-09, "loss": 0.0386, "step": 1017025 }, { "epoch": 10.0, "grad_norm": 1.0626953840255737, "learning_rate": 2.189520092942894e-09, "loss": 0.0055, "step": 1017050 }, { "epoch": 10.0, "step": 1017070, "total_flos": 8.304182448183709e+21, "train_loss": 0.1179144298298952, "train_runtime": 4583335.8647, "train_samples_per_second": 1.775, "train_steps_per_second": 0.222 } ], "logging_steps": 25, "max_steps": 1017070, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 5000, "total_flos": 8.304182448183709e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }